def least_squares_fit(x, y): """given training values for x and y, find the least-squares values of alpha and beta""" beta = stats.correlation(x, y) * \ stats.standard_deviation(y) / stats.standard_deviation(x) alpha = stats.mean(y) - beta * stats.mean(x) return alpha, beta
def correlation(ticker1, ticker2, start, end): t1 = yf.get_historical_prices(ticker1, start, end) t2 = yf.get_historical_prices(ticker2, start, end) a = get_open_as_float(t1) b = get_open_as_float(t2) return stats.correlation(a, b)
def least_squares_fit(xs: Vector, ys: Vector) -> Tuple[float, float]: """ Given a dataset represented by xs and ys, return the alpha, beta that provide the least squared error fit for a function y_i = alpha * x_i + beta """ alpha = correlation(xs, ys) * standard_deviation(ys) / standard_deviation(xs) beta = mean(ys) - alpha * mean(xs) return alpha, beta
def test_correlation(self): A = [ 0, -2, -5, 7, 7, 5, 2, -2, -4, 5, -5, 12, 3, 6, 10, 0, 6, 5, 7, 12 ] B = [ -2, -6, -12, 2, 5, 8, 2, -6, -10, 8, -7, 1, 4, 10, 18, -2, 6, 8, 12, 22 ] my_value = st.correlation(A, B) numpy_value = np.corrcoef(A, B)[0, 1] self.assertEquals(my_value, numpy_value)
total1 = stats.total(list1) total2 = stats.total(list2) mean1 = stats.mean(list1) mean2 = stats.mean(list2) mode1 = stats.mode(list1) mode2 = stats.mode(list2) median1 = stats.median(list1) median2 = stats.median(list2) variance1 = stats.variance(list1) variance2 = stats.variance(list2) standard_deviation1 = stats.SD(list1) standard_deviation2 = stats.SD(list2) covariance_pop = stats.covariance(list1, list2) covariance_sample = stats.covariance(list1, list2, True) correlation = stats.correlation(list1, list2) skewness_pop1 = stats.skewness(list1) skewness_pop2 = stats.skewness(list2) skewness_sample1 = stats.skewness(list1, True) skewness_sample2 = stats.skewness(list2, True) kurtosis_pop1 = stats.kurtosis(list1) kurtosis_pop2 = stats.kurtosis(list2) kurtosis_sample1 = stats.kurtosis(list1, True) kurtosis_sample2 = stats.kurtosis(list2, True) print("Total1:", total1) print("Total2:", total2) print("Mean1:", mean1) print("Mean2", mean2) print("Mode1:", mode1) print("Mode2:", mode2)
def test_correlation_with_zero_std(self): self.assertEqual(stats.correlation([1, 2, 3, 4, 5], [0, 0, 0, 0, 0]), 0)
import matplotlib import matplotlib.pyplot as plt import rsg import stats import time HARMONICS = 8 FREQUENCY = 1200 Ns = list(map(lambda num: 2**num, list(range(1, 20)))) listtimes = list() arraytimes = list() for N in Ns: start = time.time() stats.correlation(rsg.generate(HARMONICS, FREQUENCY, N), rsg.generate(HARMONICS, FREQUENCY, N)) listtimes.append(time.time() - start) start = time.time() stats.correlation_array(rsg.generate(HARMONICS, FREQUENCY, N), rsg.generate(HARMONICS, FREQUENCY, N)) arraytimes.append(time.time() - start) fig, ax = plt.subplots() ax.plot(Ns, listtimes, c="g", label="list") ax.plot(Ns, arraytimes, c="r", label="array") fig.savefig("example-listvsarray.png") plt.show()
median = st.median(A) print("A's median = ", median) quantile = st.quantile(A, 0.2) print("A's 20% quantile = ", quantile) quantile = st.quantile(A, 0.9) print("A's 90% quantile = ", quantile) mode = st.mode(A) print("A's mode = ", mode) data_range = st.data_range(A) print("A's range = ", data_range) variance = st.variance(A) print("A's variance = ", variance) standard_deviation = st.standard_deviation(A) print("A's standard deviation = ", standard_deviation) interquartile_range = st.interquartile_range(A) print("A's interquartile range of 25% ~ 75% = ", interquartile_range) x = [-2, -1, 0, 1, 2] y = [2, 1, 0, 1, 2] correlation = st.correlation(x, y) print("correlation = ", correlation)
def matrix_entry(i, j): return correlation(get_column(data, i), get_column(data, j))
def matrix_entry(i, j): return correlation(get_col(data, i), get_col(data, j))
def matrix_entry(i, j): return stats.correlation(algebra.get_column(data, i), algebra.get_column(data, j))
for _ in range(num_components): component = first_principal_component(X) components.append(component) X = remove_projection(X, component) return components def transform_vector(v, components): return [dot(v, w) for w in components] def transform(X, components): return [transform_vector(x_i, components) for x_i in X] if __name__ == "__main__": print("correlation(xs, ys1)", correlation(xs, ys1)) print("correlation(xs, ys2)", correlation(xs, ys2)) # safe parsing data = [] with open("comma_delimited_stock_prices.csv", "r", encoding='utf8', newline='') as f: reader = csv.reader(f) for line in parse_rows_with(reader, [dateutil.parser.parse, None, float]): data.append(line) for row in data: if any(x is None for x in row): print(row)
def least_squares_fit(x, y): #xの学習データとyを与えて、alphaとbetaの最小二乗値を求める beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x) alpha = mean(y) - beta * mean(x) return alpha, beta
def test_correlation(self): self.assertAlmostEqual(stats.correlation([1, 2, 3, 4, 5], [1, 2, 3, 4, 5]), 1, delta=0.00001)
def correlation_ij(i: int, j: int) -> float: # inner function which will serve as our data-generator for make_matrix # (note: the function defines relationship to argument data) return correlation(data[i], data[j])
bucket_size = 0.1 buckets1 = [bucket_size * (floor(p / bucket_size)) for p in ys1] buckets2 = [bucket_size * (floor(p / bucket_size)) for p in ys2] h1 = Counter(buckets1) h2 = Counter(buckets2) fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2) ax1.bar(h1.keys(), h1.values(), width=bucket_size) ax1.set_xlabel('ys1') ax1.set_ylabel('counts') ax1.set_title('similar to ys2 counts...') ax2.bar(h2.keys(), h2.values(), width=bucket_size) ax2.set_xlabel('ys2') ax2.set_title('similar to ys1 counts') plt.tight_layout() plt.show() # but each has a very different joint distribution with xs plt.scatter(xs, ys1, marker='.', color='black', label='ys1') plt.scatter(xs, ys2, marker='.', color='gray', label='ys2') plt.xlabel('xs') plt.ylabel('ys') plt.legend(loc=9) plt.title("Very Different Joint Distributions With xs") plt.show() from stats import correlation print(correlation(xs, ys1)) print(correlation(xs, ys2))
def matrix_entry(i, j): return stats.correlation(lin_alg.get_col(data, i), get_col(data, j))
spy_short = spy_long[-short_biz_days:] print str(len(earnings_list)) + " companies reporting on " + date_string + ":" print short_list = [] long_list = [] i = 0 for earnings_info in earnings_list[:]: ticker = earnings_info[1] comp_hist = yf.get_historical_prices(ticker, long_back, earnings_date_string) if (len(comp_hist) != 0): company_long = yfutils.get_open_as_float(comp_hist) company_short = company_long[-short_biz_days:] long_corr = stats.correlation(company_long, spy_long) short_corr = stats.correlation(company_short, spy_short) long_list.append(long_corr) short_list.append(short_corr) print ticker + " - Processed " + str((i / float(len(earnings_list))) * 100) + '%' else: print "Warning: could not get historical prices for ticker: " + ticker earnings_list.remove(earnings_info) i += 1 corr_change = stats.minus(short_list, long_list) # corr_change = [math.fabs(a) for a in corr_change]