def satisfies_preconditions(cls, historical_data, proxy_value): """Indicates whether or not the historical data allows this method to be used for the given proxy value. Arguments: historical_data(HistoricalData): The historical estimation data proxy_value(float): The proxy size estimate Returns: bool: True if this method can be used, False otherwise. """ proxy_sizes, actual_sizes = trim_to_equal_length(historical_data.proxy_sizes, historical_data.actual_sizes) # Too few data points if len(actual_sizes) < 3: return False regression = cls(historical_data).get_regression() estimated_size = regression.estimate(proxy_value) # Beta0 is not close to zero if regression.beta0 > 0.25 * estimated_size: return False # Beta1 is out of bounds if regression.beta1 < 0.5 or regression.beta1 > 2.0: return False # Weakly correlated if statistics.correlation(proxy_sizes, actual_sizes) ** 2 < 0.5: return False # Weak statistical significance if statistics.significance(proxy_sizes, actual_sizes) > 0.05: return False return True
def satisfies_preconditions(cls, historical_data, proxy_value): """Indicates whether or not the historical data allows this method to be used for the given proxy value. Arguments: historical_data(HistoricalData): The historical estimation data proxy_value(float): The proxy size estimate Returns: bool: True if this method can be used, False otherwise. """ planned_sizes, actual_times = trim_to_equal_length(historical_data.proxy_sizes, historical_data.actual_times) if len(planned_sizes) < 3: return False regression = cls(historical_data).get_regression() expected_time = regression.estimate(proxy_value) if regression.beta0 > 0.25 * expected_time: return False productivity = 1.0 / (sum(planned_sizes) / sum(actual_times)) beta1_range = 0.5 * productivity if regression.beta1 < (productivity - beta1_range) or regression.beta > (productivity + beta1_range): return False if statistics.correlation(planned_sizes, actual_times) ** 2 < 0.5: return False if statistics.significance(planned_sizes, actual_times) > 0.05: return False return True
def get_correlation(self): """Returns the correlation between estimation data values. Returns: float: The correlation (R^2) value. """ return statistics.correlation(self.x_values, self.y_values) ** 2
def execute(self): """Run the program""" parser = argparse.ArgumentParser() parser.add_argument('CSVFILE', help='path to csv file with data.') args = parser.parse_args() csv_data = io.read_csv_file(args.CSVFILE) if not csv_data: print 'ERROR: Invalid csv data file.' sys.exit(1) columns = csv_data[0].keys() x_column = io.choose_from_list('X Column:', columns) y_column = io.choose_from_list('Y Column:', columns) x_data = [float(each[x_column]) for each in csv_data if each[x_column]] y_data = [float(each[y_column]) for each in csv_data if each[x_column]] print 'R:', statistics.correlation(x_data, y_data) print 'T:', statistics.t_value(x_data, y_data) print 'Significance:', statistics.significance(x_data, y_data)
def matrix_entry(i, j): return correlation(get_column(data, i), get_column(data, j))
for _ in range(num_components): component = first_principal_component(X) components.append(component) X = remove_projection(X, component) return components def transform_vector(v, components): return [dot(v, w) for w in components] def transform(X, components): return [transform_vector(x_i, components) for x_i in X] if __name__ == "__main__": print("correlation(xs, ys1)", correlation(xs, ys1)) print("correlation(xs, ys2)", correlation(xs, ys2)) # safe parsing data = [] with open("data\\comma_delimited_stock_prices.csv", "r", encoding='utf8', newline='') as f: reader = csv.reader(f) for line in parse_rows_with(reader, [dateutil.parser.parse, None, float]): data.append(line) for row in data: if any(x is None for x in row): print(row)
def least_squares_fit(x, y): """при заданных обучающих значениях x и y, найти значения alpha и beta на основе МНК""" beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x) alpha = mean(y) - beta * mean(x) return alpha, beta
def least_squares_fit(x, y): """given training values for x and y, find the least-squares values of alpha and beta""" beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x) alpha = mean(y) - beta * mean(x) return alpha, beta
def test_should_correctly_compute_correlation(self): result = statistics.correlation(self.x_data, self.y_data) self.assertAlmostEqual(result, 0.9543158) self.assertAlmostEqual(result**2, 0.9107, 4)