def main(): """ Linear regression example with plot """ # Example data x = np.array([ 1000, 4000, 5000, 4500, 3000, 4000, 9000, 11000, 15000, 12000, 7000, 3000 ]) y = np.array([ 9914, 40487, 54324, 50044, 34719, 42551, 94871, 118914, 158484, 131348, 78504, 36284 ]) x_ds = ds.array(x[:, np.newaxis], (4, 1)) y_ds = ds.array(y[:, np.newaxis], (4, 1)) reg = LinearRegression() reg.fit(x_ds, y_ds) coef = reg.coef_.collect() intercept = reg.intercept_.collect() print(coef, intercept) # plot_result: scatter(x, y, marker='x') x_mesh = np.linspace(min(x), max(x), 1000) plot(x_mesh, [coef * x + intercept for x in x_mesh]) show()
def test_fit_and_predict(self): """Tests LinearRegression's fit() and predict()""" x_data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1) y_data = np.array([2, 1, 1, 2, 4.5]).reshape(-1, 1) bn, bm = 2, 2 x = ds.array(x=x_data, block_size=(bn, bm)) y = ds.array(x=y_data, block_size=(bn, bm)) reg = LinearRegression() reg.fit(x, y) # y = 0.6 * x + 0.3 reg.coef_ = compss_wait_on(reg.coef_) reg.intercept_ = compss_wait_on(reg.intercept_) self.assertTrue(np.allclose(reg.coef_, 0.6)) self.assertTrue(np.allclose(reg.intercept_, 0.3)) x_test = np.array([3, 5]).reshape(-1, 1) test_data = ds.array(x=x_test, block_size=(bn, bm)) pred = reg.predict(test_data).collect() self.assertTrue(np.allclose(pred, [2.1, 3.3]))
def test_linear_regression(self): """ Tests linear regression fit_predict and compares the result with regular ds-arrays """ config.session.execute("TRUNCATE TABLE hecuba.istorage") config.session.execute("DROP KEYSPACE IF EXISTS hecuba_dislib") x_data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1) y_data = np.array([2, 1, 1, 2, 4.5]).reshape(-1, 1) block_size = (x_data.shape[0] // 3, x_data.shape[1]) x = ds.array(x=x_data, block_size=block_size) x.make_persistent(name="hecuba_dislib.test_array_x") y = ds.array(x=y_data, block_size=block_size) y.make_persistent(name="hecuba_dislib.test_array_y") reg = LinearRegression() reg.fit(x, y) # y = 0.6 * x + 0.3 reg.coef_ = compss_wait_on(reg.coef_) reg.intercept_ = compss_wait_on(reg.intercept_) self.assertTrue(np.allclose(reg.coef_, 0.6)) self.assertTrue(np.allclose(reg.intercept_, 0.3)) x_test = np.array([3, 5]).reshape(-1, 1) test_data = ds.array(x=x_test, block_size=block_size) test_data.make_persistent(name="hecuba_dislib.test_array_test") pred = reg.predict(test_data).collect() self.assertTrue(np.allclose(pred, [2.1, 3.3]))
def test_sparse(self): """Tests LR raises NotImplementedError for sparse data.""" np.random.seed(0) coo_matrix = sp_random(10, 1, density=0.5) sparse_arr = ds.array(x=coo_matrix, block_size=(5, 1)) reg = LinearRegression() with self.assertRaises(NotImplementedError): reg.fit(sparse_arr, sparse_arr) dense_arr = random_array((10, 1), (5, 1)) reg.fit(dense_arr, dense_arr) with self.assertRaises(NotImplementedError): reg.predict(sparse_arr)
def main(): x_kdd = ds.load_txt_file( "/gpfs/projects/bsc19/COMPSs_DATASETS/dislib/kdd99/train.csv", block_size=(11482, 122)) y_kdd = x_kdd[:, 121:122] x_kdd = x_kdd[:, :121] regression = LinearRegression(arity=48) performance.measure("LR", "KDD99", regression.fit, x_kdd, y_kdd)
def test_multivariate_no_intercept(self): """Tests fit() and predict(), multivariate, fit_intercept=False.""" x_data = np.array([[1, 2], [2, 0], [3, 1], [4, 4], [5, 3]]) y_data = np.array([2, 1, 1, 2, 4.5]) bn, bm = 2, 2 x = ds.array(x=x_data, block_size=(bn, bm)) y = ds.array(x=y_data, block_size=(bn, 1)) reg = LinearRegression(fit_intercept=False) reg.fit(x, y) self.assertTrue( np.allclose(reg.coef_.collect(), [0.48305085, 0.30367232])) self.assertTrue(np.allclose(reg.intercept_.collect(), 0)) # Predict one sample x_test = np.array([3, 2]) test_data = ds.array(x=x_test, block_size=(1, bm)) pred = reg.predict(test_data).collect() self.assertTrue(np.allclose(pred, [2.05649718])) # Predict multiple samples x_test = np.array([[3, 2], [4, 4], [1, 3]]) test_data = ds.array(x=x_test, block_size=(bn, bm)) pred = reg.predict(test_data).collect() self.assertTrue(np.allclose(pred, [2.05649718, 3.14689266, 1.3940678]))
def test_multivariate(self): """Tests fit() and predict(), multivariate.""" x_data = np.array([[1, 2], [2, 0], [3, 1], [4, 4], [5, 3]]) y_data = np.array([2, 1, 1, 2, 4.5]) bn, bm = 2, 2 x = ds.array(x=x_data, block_size=(bn, bm)) y = ds.array(x=y_data, block_size=(bn, 1)) reg = LinearRegression() reg.fit(x, y) self.assertTrue(np.allclose(reg.coef_.collect(), [0.421875, 0.296875])) self.assertTrue(np.allclose(reg.intercept_.collect(), 0.240625)) # Predict one sample x_test = np.array([3, 2]) test_data = ds.array(x=x_test, block_size=(1, bm)) pred = reg.predict(test_data).collect() self.assertTrue(np.allclose(pred, 2.1)) # Predict multiple samples x_test = np.array([[3, 2], [4, 4], [1, 3]]) test_data = ds.array(x=x_test, block_size=(bn, bm)) pred = reg.predict(test_data).collect() self.assertTrue(np.allclose(pred, [2.1, 3.115625, 1.553125]))
def test_univariate_no_intercept(self): """Tests fit() and predict(), univariate, fit_intercept=False.""" x_data = np.array([1, 2, 3, 4, 5]) y_data = np.array([2, 1, 1, 2, 4.5]) bn, bm = 2, 1 x = ds.array(x=x_data, block_size=(bn, bm)) y = ds.array(x=y_data, block_size=(bn, bm)) reg = LinearRegression(fit_intercept=False) reg.fit(x, y) self.assertTrue(np.allclose(reg.coef_.collect(), 0.68181818)) self.assertTrue(np.allclose(reg.intercept_.collect(), 0)) # Predict one sample x_test = np.array([3]) test_data = ds.array(x=x_test, block_size=(1, 1)) pred = reg.predict(test_data).collect() self.assertTrue(np.allclose(pred, 2.04545455)) # Predict multiple samples x_test = np.array([3, 5, 6]) test_data = ds.array(x=x_test, block_size=(bn, bm)) pred = reg.predict(test_data).collect() self.assertTrue(np.allclose(pred, [2.04545455, 3.4090909, 4.0909091]))
def test_univariate(self): """Tests fit() and predict(), univariate.""" x_data = np.array([1, 2, 3, 4, 5]) y_data = np.array([2, 1, 1, 2, 4.5]) bn, bm = 2, 1 x = ds.array(x=x_data, block_size=(bn, bm)) y = ds.array(x=y_data, block_size=(bn, bm)) reg = LinearRegression() reg.fit(x, y) self.assertTrue(np.allclose(reg.coef_.collect(), 0.6)) self.assertTrue(np.allclose(reg.intercept_.collect(), 0.3)) # Predict one sample x_test = np.array([3]) test_data = ds.array(x=x_test, block_size=(1, 1)) pred = reg.predict(test_data).collect() self.assertTrue(np.allclose(pred, 2.1)) # Predict multiple samples x_test = np.array([3, 5, 6]) test_data = ds.array(x=x_test, block_size=(bn, bm)) pred = reg.predict(test_data).collect() self.assertTrue(np.allclose(pred, [2.1, 3.3, 3.9]))
def main(): """ Linear regression example with plot """ # Example data x = np.array([ 1000, 4000, 5000, 4500, 3000, 4000, 9000, 11000, 15000, 12000, 7000, 3000 ]) y = np.array([ 9914, 40487, 54324, 50044, 34719, 42551, 94871, 118914, 158484, 131348, 78504, 36284 ]) ds = load_data(x=x[:, np.newaxis], y=y, subset_size=4) reg = LinearRegression() reg.fit(ds) reg.coef_ = compss_wait_on(reg.coef_) reg.intercept_ = compss_wait_on(reg.intercept_) print(reg.coef_, reg.intercept_) # plot_result: scatter(x, y, marker='x') x_mesh = np.linspace(min(x), max(x), 1000) plot(x_mesh, [reg.coef_ * x + reg.intercept_ for x in x_mesh]) show()
def test_multivariate_multiobjective(self): """Tests fit() and predict(), multivariate, multiobjective.""" x_data = np.array([[1, 2, 3], [2, 0, 4], [3, 1, 8], [4, 4, 2], [5, 3, 1], [2, 7, 1]]) y_data = np.array([[2, 0, 3], [1, 5, 2], [1, 3, 4], [2, 7, 9], [4.5, -1, 4], [0, 0, 0]]) bn, bm = 2, 2 x = ds.array(x=x_data, block_size=(bn, bm)) y = ds.array(x=y_data, block_size=(bn, bm)) reg = LinearRegression() reg.fit(x, y) # Predict one sample x_test = np.array([3, 2, 1]) test_data = ds.array(x=x_test, block_size=(1, bm)) pred = reg.predict(test_data).collect() self.assertTrue(np.allclose(pred, [3.0318415, 1.97164872, 3.85410906])) # Predict multiple samples x_test = np.array([[3, 2, 1], [4, 3, 3], [1, 1, 1]]) test_data = ds.array(x=x_test, block_size=(bn, bm)) pred = reg.predict(test_data).collect() self.assertTrue( np.allclose(pred, [[3.0318415, 1.97164872, 3.85410906], [2.5033157, 2.65809327, 5.05310495], [2.145797, 1.4840121, 1.5739791]])) # Check attributes values self.assertTrue( np.allclose(reg.coef_.collect(), [[0.65034768, 0.34673933, 1.22176283], [-0.41465084, -0.20584208, -0.16339571], [-0.38211131, 0.27277365, 0.07031439]])) self.assertTrue( np.allclose(reg.intercept_.collect(), [2.29221145, 1.07034124, 0.44529761]))