def pyfora_linear_regression_test(self): random.seed(42) nRows = 100 x_col_1 = [] x_col_2 = [] y_col = [] for _ in range(nRows): x1 = random.uniform(-10, 10) x2 = random.uniform(-10, 10) noise = random.uniform(-1, 1) y = x1 * 5 + x2 * 2 - 8 + noise x_col_1.append(x1) x_col_2.append(x2) y_col.append(y) def computeCoefficients(): predictors = PurePandas.PurePythonDataFrame([x_col_1, x_col_2], ["x1", "x2"]) responses = PurePandas.PurePythonDataFrame([y_col], ["y"]) return LinearRegression.linearRegression(predictors, responses) res_python = computeCoefficients() res_pyfora = self.evaluateWithExecutor(computeCoefficients) self.assertArraysAreAlmostEqual(res_python, res_pyfora) df_x = pandas.DataFrame({ 'x1': x_col_1, 'x2': x_col_2 }) df_y = pandas.DataFrame({ 'y': y_col }) res_pandas = LinearRegression.linearRegression(df_x, df_y) self.assertArraysAreAlmostEqual(res_python, res_pandas) # verified using sklearn.linear_model.LinearRegression, on nRows = 100 res_scikit = numpy.array([[4.96925412, 2.00279298, -7.98208391]]) self.assertArraysAreAlmostEqual(res_python, res_scikit)
def computeCoefficients(): predictors = PurePandas.PurePythonDataFrame([x_col_1, x_col_2], ["x1", "x2"]) responses = PurePandas.PurePythonDataFrame([y_col], ["y"]) return LinearRegression.linearRegression(predictors, responses)