def fit(self, data:[[float]], y:[float]) -> ([{}], [float], [float]): logging.info("log: Linear Regression Model Fit Invoked.") self.__validate(data, y) self.__variables__ = len(data) merge_data = [[None for x in range(len(y))] for x in range(len(data)+1)] for i in range(len(merge_data)): if i == 0: merge_data[i] = y else: merge_data[i] = data[i-1] self.__corr_mat__ = Statistics.correlation_matrix(merge_data) self.__partial_mat__ = Statistics.partial_correlation_matrix(merge_data) self.__stats__ = self.__stats(data, y) m1, m2 = self.__regression_matrix(data, y) m1i = Matrix.inverse(m1) m1it = Matrix.transpose(m1i) m2t = Matrix.transpose([m2]) params = Matrix.multiply(m1it, m2t) params = Matrix.transpose(params) params = params[0] self.__params__ = params self.__ycap__ = self.predicts(data) self.__model_stats__ = self.__model_stats(y, self.__ycap__) return (self.__stats__, self.__params__, self.__ycap__)
def test_var(self): data_1 = [1, 2, 3, 4, 5, 6, 7, 8, 9] actual = (statistics.variance(data_1), math.sqrt(statistics.variance(data_1))) expected = Statistics.variance(data_1) self.assertEqual(expected, actual) data_1_mean = Statistics.mean(data_1) actual = Statistics.variance(data_1, data_1_mean) self.assertEqual(expected, actual)
def pcf_data(ip, n): op_list = [] for i in range(1, n+1): lag_op = shift(ip, i) op_tmp = list(Statistics.covariance(ip[i:], lag_op[i:])) op_list.append([i, op_tmp[1]]) return op_list
def test_partial_correlation2(self): y = [64, 71, 53, 67, 55, 58, 77, 57, 56, 51, 76, 68] x1 = [57, 59, 49, 62, 51, 50, 55, 48, 52, 42, 61, 57] x2 = [8, 10, 6, 11, 8, 7, 10, 9, 10, 6, 12, 9] ry2_1 = Statistics.partial_correlation(y, x2, [x1]) ry2_1 = round(ry2_1, 3) self.assertEqual(ry2_1, 0.335)
def test_partial_correlation1(self): y = [64, 71, 53, 67, 55, 58, 77, 57, 56, 51, 76, 68] x1 = [57, 59, 49, 62, 51, 50, 55, 48, 52, 42, 61, 57] x2 = [8, 10, 6, 11, 8, 7, 10, 9, 10, 6, 12, 9] ry1_2 = Statistics.partial_correlation(y, x1, [x2]) ry1_2 = round(ry1_2, 3) self.assertEqual(ry1_2, 0.533)
def test_mean(self): data_1 = [1, 2, 3, 4, 5, 6, 7, 8, 9] actual = sum(data_1) / len(data_1) expected = Statistics.mean(data_1) self.assertEqual(expected, actual) with self.assertRaises(TypeError): Statistics.mean(["a", "b"]) data_2 = [ 10.5, 2.1, 3.2, 0.4, 5.0000123, 6.78, 7.5234524, 8.2654672, 9.1645164 ] actual = sum(data_2) / len(data_2) expected = Statistics.mean(data_2) self.assertEqual(expected, actual)
def __stats(self, data: [[float]], y: [float]) -> []: stats = [{} for i in range(len(data) + 1)] y_stats = Statistics.describe(y) (y_variance, y_std) = Statistics.variance(y, y_stats["mean"]) y_stats["covariance"] = y_variance y_stats["r"] = 1.0 y_stats["pr"] = 1.0 stats[0] = y_stats for i in range(len(data)): s = Statistics.describe(data[i]) (covariance, r) = Statistics.covariance(data[i], y, s["mean"], y_stats["mean"]) s["covariance"] = covariance s["r"] = r s["pr"] = self.__partial_mat__[0][i + 1] stats[i + 1] = s return stats
def test_partial_correlation_4data(self): x1 = [75, 83, 85, 85, 92, 97, 99] x2 = [1.85, 1.25, 1.5, 1.75, 1.15, 1.75, 1.6] x3 = [16, 20, 25, 27, 32, 48, 48] x4 = [14, 21, 29, 16, 21, 47, 47] r12_34 = Statistics.partial_correlation(x1, x2, [x3, x4]) r12_34 = round(r12_34, 3) self.assertEqual(r12_34, -0.961)
def test_describe(self): data = [1, 2, 3, 4, 5, 6, 7, 8, 9] stats = Statistics.describe(data) self.assertEqual(stats["count"], 9) self.assertEqual(stats["mean"], 5.0) self.assertEqual(stats["variance"], 7.5) self.assertEqual(stats["std"], 2.7386127875258306) self.assertEqual(stats["min"], 1) self.assertEqual(stats["max"], 9)
def test_partial_correlation_coefficient_greater_than_r(self): #https://stackoverflow.com/questions/51236206/partial-correlation-values-are-larger-than-normal-correlation-in-r hl = [7, 15, 19, 15, 21, 22, 57, 15, 20, 18] disp = [ 0.000, 0.964, 0.000, 0.000, 0.921, 0.000, 0.000, 1.006, 0.000, 1.011 ] deg = [9, 2, 3, 4, 1, 3, 1, 3, 6, 1] BC = [ 1.78e-02, 1.05e-06, 1.37e-05, 7.18e-03, 0.00e+00, 0.00e+00, 0.00e+00, 4.48e-03, 2.10e-06, 0.00e+00 ] var, r = Statistics.covariance(hl, disp) r = round(r, 5) self.assertEqual(r, -0.23787) pr = Statistics.partial_correlation(hl, disp, [deg, BC]) pr = round(pr, 5) self.assertEqual(pr, -0.67209) mat = Statistics.partial_correlation_matrix([hl, disp, deg, BC]) mat_pr = round(mat[0][1], 5) self.assertEqual(mat_pr, -0.67209)
def test_correlation_matrix_4data(self): x1 = [75, 83, 85, 85, 92, 97, 99] x2 = [1.85, 1.25, 1.5, 1.75, 1.15, 1.75, 1.6] x3 = [16, 20, 25, 27, 32, 48, 48] x4 = [14, 21, 29, 16, 21, 47, 47] mat = Statistics.correlation_matrix([x1, x2, x3, x4]) actual = [[ 1.0, -0.15534261150561485, 0.9627972334240521, 0.8520652907262152 ], [ -0.15534261150561485, 1.0, 0.10551846545168846, 0.13506331370887886 ], [0.9627972334240521, 0.10551846545168846, 1.0, 0.9088795720039011], [ 0.8520652907262152, 0.13506331370887886, 0.9088795720039011, 1.0 ]] self.assertEqual(mat, actual)
def test_partial_correlation_matrix_4data(self): x1 = [75, 83, 85, 85, 92, 97, 99] x2 = [1.85, 1.25, 1.5, 1.75, 1.15, 1.75, 1.6] x3 = [16, 20, 25, 27, 32, 48, 48] x4 = [14, 21, 29, 16, 21, 47, 47] mat = Statistics.partial_correlation_matrix([x1, x2, x3, x4]) actual = [[ 1.0, -0.9613273119340023, 0.9866069072448754, -0.3903841491779091 ], [-0.9613273119340022, 1.0, 0.9465782218637125, -0.3513457030175387], [ 0.9866069072448754, 0.9465782218637125, 1.0, 0.5215083165976273 ], [ -0.3903841491779091, -0.35134570301753876, 0.5215083165976273, 1.0 ]] self.assertEqual(mat, actual)
def test_covariance(self): data_x_1 = [1, 2, 3, 4, 5, 6, 7, 8, 9] data_y_1 = [1, 2, 3, 4, 5, 6, 7, 8, 9] actual = (7.5, 1) expected = Statistics.covariance(data_x_1, data_y_1) self.assertEqual(expected, actual) with self.assertRaises(ValueError): Statistics.covariance([1, 2, 3], [1, 2]) data_x_1_mean = Statistics.mean(data_x_1) data_y_1_mean = Statistics.mean(data_y_1) expected = Statistics.covariance(data_x_1, data_y_1, data_x_1_mean, data_y_1_mean) self.assertEqual(expected, actual)
def __model_stats(self, y:[float], ycap:[float]) -> {}: error = [0.0 for x in range(len(y))] for i in range(len(y)): error[i] = y[i] - ycap[i] s = Statistics.describe(error) return s
def test_yule_walker2(self): r12_3 = Statistics.yule_walker_eq(0.774, 0.819, 0.802) self.assertEqual(r12_3, 0.3418386394082465)
def test_yule_walker1(self): r12_3 = Statistics.yule_walker_eq(0.819, 0.774, 0.802) self.assertEqual(r12_3, 0.5241753109234627)
def test_correlation_coefficient(self): data_x_1 = [1, 2, 3, 4, 5, 6, 7, 8, 9] data_y_1 = [1, 2, 3, 4, 5, 6, 7, 8, 9] actual = 1 expected = Statistics.correlation_coefficient(7.5, 7.5, 7.5) self.assertEqual(expected, actual)
def test_partial_correlation_none_data(self): with self.assertRaises(ValueError): Statistics.partial_correlation(None, [1], [[2]]) with self.assertRaises(ValueError): Statistics.partial_correlation([1], None, [[2]]) with self.assertRaises(ValueError): Statistics.partial_correlation([1], [1], None) with self.assertRaises(ValueError): Statistics.partial_correlation([], [1], [[1]]) with self.assertRaises(ValueError): Statistics.partial_correlation([1], [], [[1]]) with self.assertRaises(ValueError): Statistics.partial_correlation([1], [1], [[]]) with self.assertRaises(ValueError): Statistics.partial_correlation([1], [1], [[1, 2]]) with self.assertRaises(ValueError): Statistics.partial_correlation([1], [1, 2], [[1, 2]])
def test_yule_walker_none_data(self): with self.assertRaises(ValueError): Statistics.yule_walker_eq(None, 0.774, 0.802) with self.assertRaises(ValueError): Statistics.yule_walker_eq(1, None, 0.802) with self.assertRaises(ValueError): Statistics.yule_walker_eq(1, 0.774, None) with self.assertRaises(ValueError): Statistics.yule_walker_eq(1.2, 0.774, 0.802) with self.assertRaises(ValueError): Statistics.yule_walker_eq(1, 1.2, 0.802) with self.assertRaises(ValueError): Statistics.yule_walker_eq(1, 0.774, -1.2)
def test_none_data(self): with self.assertRaises(ValueError): Statistics.mean(None) with self.assertRaises(ValueError): Statistics.mean([]) with self.assertRaises(ValueError): Statistics.variance(None) with self.assertRaises(ValueError): Statistics.variance([]) with self.assertRaises(ValueError): Statistics.covariance(None, [1]) with self.assertRaises(ValueError): Statistics.covariance([], [1]) with self.assertRaises(ValueError): Statistics.covariance([1], None) with self.assertRaises(ValueError): Statistics.covariance([1], []) with self.assertRaises(ValueError): Statistics.covariance([1], [1, 2])