Example #1
0
def least_squares_fit(x, y):
    """given training values for x and y,
    find the least-squares values of alpha and beta"""
    beta = stats.correlation(x, y) * \
        stats.standard_deviation(y) / stats.standard_deviation(x)
    alpha = stats.mean(y) - beta * stats.mean(x)
    return alpha, beta
def correlation(ticker1, ticker2, start, end):
	t1 = yf.get_historical_prices(ticker1, start, end)
	t2 = yf.get_historical_prices(ticker2, start, end)
	
	a = get_open_as_float(t1)
	b = get_open_as_float(t2)
	return stats.correlation(a, b)
Example #3
0
def least_squares_fit(x, y):
    """given training values for x and y,
    find the least-squares values of alpha and beta"""
    beta = stats.correlation(x, y) * \
        stats.standard_deviation(y) / stats.standard_deviation(x)
    alpha = stats.mean(y) - beta * stats.mean(x)
    return alpha, beta
def least_squares_fit(xs: Vector, ys: Vector) -> Tuple[float, float]:
    """
    Given a dataset represented by xs and ys, return the alpha, beta that provide the least squared error fit for a
    function y_i = alpha * x_i + beta
    """
    alpha = correlation(xs,
                        ys) * standard_deviation(ys) / standard_deviation(xs)
    beta = mean(ys) - alpha * mean(xs)
    return alpha, beta
Example #5
0
    def test_correlation(self):
        A = [
            0, -2, -5, 7, 7, 5, 2, -2, -4, 5, -5, 12, 3, 6, 10, 0, 6, 5, 7, 12
        ]
        B = [
            -2, -6, -12, 2, 5, 8, 2, -6, -10, 8, -7, 1, 4, 10, 18, -2, 6, 8,
            12, 22
        ]

        my_value = st.correlation(A, B)
        numpy_value = np.corrcoef(A, B)[0, 1]

        self.assertEquals(my_value, numpy_value)
total1 = stats.total(list1)  
total2 = stats.total(list2)  
mean1 = stats.mean(list1)  
mean2 = stats.mean(list2)  
mode1 = stats.mode(list1)  
mode2 = stats.mode(list2)  
median1 = stats.median(list1)  
median2 = stats.median(list2)  
variance1 = stats.variance(list1)  
variance2 = stats.variance(list2)  
standard_deviation1 = stats.SD(list1)  
standard_deviation2 = stats.SD(list2)  
covariance_pop = stats.covariance(list1, list2)  
covariance_sample = stats.covariance(list1, list2, True)  
correlation = stats.correlation(list1, list2)  
skewness_pop1 = stats.skewness(list1)  
skewness_pop2 = stats.skewness(list2)  
skewness_sample1 = stats.skewness(list1, True)  
skewness_sample2 = stats.skewness(list2, True)  
kurtosis_pop1 = stats.kurtosis(list1)  
kurtosis_pop2 = stats.kurtosis(list2)  
kurtosis_sample1 = stats.kurtosis(list1, True)  
kurtosis_sample2 = stats.kurtosis(list2, True)  

print("Total1:", total1)
print("Total2:", total2)
print("Mean1:", mean1)
print("Mean2", mean2)
print("Mode1:", mode1)
print("Mode2:", mode2)
Example #7
0
 def test_correlation_with_zero_std(self):
     self.assertEqual(stats.correlation([1, 2, 3, 4, 5], [0, 0, 0, 0, 0]),
                      0)
import matplotlib
import matplotlib.pyplot as plt
import rsg
import stats
import time

HARMONICS = 8
FREQUENCY = 1200
Ns = list(map(lambda num: 2**num, list(range(1, 20))))

listtimes = list()
arraytimes = list()
for N in Ns:
    start = time.time()
    stats.correlation(rsg.generate(HARMONICS, FREQUENCY, N),
                      rsg.generate(HARMONICS, FREQUENCY, N))
    listtimes.append(time.time() - start)

    start = time.time()
    stats.correlation_array(rsg.generate(HARMONICS, FREQUENCY, N),
                            rsg.generate(HARMONICS, FREQUENCY, N))
    arraytimes.append(time.time() - start)

fig, ax = plt.subplots()

ax.plot(Ns, listtimes, c="g", label="list")
ax.plot(Ns, arraytimes, c="r", label="array")

fig.savefig("example-listvsarray.png")
plt.show()
Example #9
0
median = st.median(A)
print("A's median = ", median)

quantile = st.quantile(A, 0.2)
print("A's 20% quantile = ", quantile)

quantile = st.quantile(A, 0.9)
print("A's 90% quantile = ", quantile)

mode = st.mode(A)
print("A's mode = ", mode)

data_range = st.data_range(A)
print("A's range = ", data_range)

variance = st.variance(A)
print("A's variance = ", variance)

standard_deviation = st.standard_deviation(A)
print("A's standard deviation = ", standard_deviation)

interquartile_range = st.interquartile_range(A)
print("A's interquartile range of 25% ~ 75% = ", interquartile_range)

x = [-2, -1, 0, 1, 2]
y = [2, 1, 0, 1, 2]

correlation = st.correlation(x, y)
print("correlation = ", correlation)
 def matrix_entry(i, j):
     return correlation(get_column(data, i), get_column(data, j))
Example #11
0
 def matrix_entry(i, j):
     return correlation(get_col(data, i), get_col(data, j))
Example #12
0
 def matrix_entry(i, j):
     return stats.correlation(algebra.get_column(data, i),
                              algebra.get_column(data, j))
    for _ in range(num_components):
        component = first_principal_component(X)
        components.append(component)
        X = remove_projection(X, component)

    return components

def transform_vector(v, components):
    return [dot(v, w) for w in components]

def transform(X, components):
    return [transform_vector(x_i, components) for x_i in X]

if __name__ == "__main__":

    print("correlation(xs, ys1)", correlation(xs, ys1))
    print("correlation(xs, ys2)", correlation(xs, ys2))

    # safe parsing

    data = []

    with open("comma_delimited_stock_prices.csv", "r", encoding='utf8', newline='') as f:
        reader = csv.reader(f)
        for line in parse_rows_with(reader, [dateutil.parser.parse, None, float]):
            data.append(line)

    for row in data:
        if any(x is None for x in row):
            print(row)
Example #14
0
def least_squares_fit(x, y):
    #xの学習データとyを与えて、alphaとbetaの最小二乗値を求める
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta
Example #15
0
 def test_correlation(self):
     self.assertAlmostEqual(stats.correlation([1, 2, 3, 4, 5],
                                              [1, 2, 3, 4, 5]),
                            1,
                            delta=0.00001)
Example #16
0
 def correlation_ij(i: int, j: int) -> float:
     # inner function which will serve as our data-generator for make_matrix 
     # (note: the function defines relationship to argument data)
     return correlation(data[i], data[j])
bucket_size = 0.1
buckets1 = [bucket_size * (floor(p / bucket_size)) for p in ys1]
buckets2 = [bucket_size * (floor(p / bucket_size)) for p in ys2]
h1 = Counter(buckets1)
h2 = Counter(buckets2)

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2)
ax1.bar(h1.keys(), h1.values(), width=bucket_size)
ax1.set_xlabel('ys1')
ax1.set_ylabel('counts')
ax1.set_title('similar to ys2 counts...')

ax2.bar(h2.keys(), h2.values(), width=bucket_size)
ax2.set_xlabel('ys2')
ax2.set_title('similar to ys1 counts')

plt.tight_layout()
plt.show()

# but each has a very different joint distribution with xs
plt.scatter(xs, ys1, marker='.', color='black', label='ys1')
plt.scatter(xs, ys2, marker='.', color='gray', label='ys2')
plt.xlabel('xs')
plt.ylabel('ys')
plt.legend(loc=9)
plt.title("Very Different Joint Distributions With xs")
plt.show()

from stats import correlation
print(correlation(xs, ys1))
print(correlation(xs, ys2))
Example #18
0
 def matrix_entry(i, j):
     return stats.correlation(algebra.get_column(data, i),
                              algebra.get_column(data, j))
Example #19
0
	def matrix_entry(i, j):
		return stats.correlation(lin_alg.get_col(data, i), get_col(data, j))
		spy_short = spy_long[-short_biz_days:]
		
		print str(len(earnings_list)) + " companies reporting on " + date_string + ":"
		print
		
		short_list = []
		long_list = []
		i = 0
		for earnings_info in earnings_list[:]:
			ticker = earnings_info[1]
			comp_hist = yf.get_historical_prices(ticker, long_back, earnings_date_string)
			if (len(comp_hist) != 0):
				company_long = yfutils.get_open_as_float(comp_hist)
				company_short = company_long[-short_biz_days:]
			
				long_corr = stats.correlation(company_long, spy_long)
				short_corr = stats.correlation(company_short, spy_short)
			
				long_list.append(long_corr)
				short_list.append(short_corr)
			
				print ticker + " - Processed " + str((i / float(len(earnings_list))) * 100) + '%'
			else:
				print "Warning: could not get historical prices for ticker: " + ticker
				earnings_list.remove(earnings_info)
			
			i += 1	
		
		corr_change = stats.minus(short_list, long_list)
		# corr_change = [math.fabs(a) for a in corr_change]