예제 #1
0
def main():
    """
    Linear regression example with plot
    """

    # Example data
    x = np.array([
        1000, 4000, 5000, 4500, 3000, 4000, 9000, 11000, 15000, 12000, 7000,
        3000
    ])
    y = np.array([
        9914, 40487, 54324, 50044, 34719, 42551, 94871, 118914, 158484, 131348,
        78504, 36284
    ])
    x_ds = ds.array(x[:, np.newaxis], (4, 1))
    y_ds = ds.array(y[:, np.newaxis], (4, 1))
    reg = LinearRegression()
    reg.fit(x_ds, y_ds)
    coef = reg.coef_.collect()
    intercept = reg.intercept_.collect()
    print(coef, intercept)

    # plot_result:
    scatter(x, y, marker='x')
    x_mesh = np.linspace(min(x), max(x), 1000)
    plot(x_mesh, [coef * x + intercept for x in x_mesh])
    show()
예제 #2
0
    def test_fit_and_predict(self):
        """Tests LinearRegression's fit() and predict()"""
        x_data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
        y_data = np.array([2, 1, 1, 2, 4.5]).reshape(-1, 1)

        bn, bm = 2, 2

        x = ds.array(x=x_data, block_size=(bn, bm))
        y = ds.array(x=y_data, block_size=(bn, bm))

        reg = LinearRegression()
        reg.fit(x, y)
        # y = 0.6 * x + 0.3

        reg.coef_ = compss_wait_on(reg.coef_)
        reg.intercept_ = compss_wait_on(reg.intercept_)

        self.assertTrue(np.allclose(reg.coef_, 0.6))
        self.assertTrue(np.allclose(reg.intercept_, 0.3))

        x_test = np.array([3, 5]).reshape(-1, 1)
        test_data = ds.array(x=x_test, block_size=(bn, bm))
        pred = reg.predict(test_data).collect()

        self.assertTrue(np.allclose(pred, [2.1, 3.3]))
예제 #3
0
    def test_linear_regression(self):
        """ Tests linear regression fit_predict and compares the result with
            regular ds-arrays """
        config.session.execute("TRUNCATE TABLE hecuba.istorage")
        config.session.execute("DROP KEYSPACE IF EXISTS hecuba_dislib")

        x_data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
        y_data = np.array([2, 1, 1, 2, 4.5]).reshape(-1, 1)

        block_size = (x_data.shape[0] // 3, x_data.shape[1])

        x = ds.array(x=x_data, block_size=block_size)
        x.make_persistent(name="hecuba_dislib.test_array_x")
        y = ds.array(x=y_data, block_size=block_size)
        y.make_persistent(name="hecuba_dislib.test_array_y")

        reg = LinearRegression()
        reg.fit(x, y)
        # y = 0.6 * x + 0.3

        reg.coef_ = compss_wait_on(reg.coef_)
        reg.intercept_ = compss_wait_on(reg.intercept_)
        self.assertTrue(np.allclose(reg.coef_, 0.6))
        self.assertTrue(np.allclose(reg.intercept_, 0.3))

        x_test = np.array([3, 5]).reshape(-1, 1)
        test_data = ds.array(x=x_test, block_size=block_size)
        test_data.make_persistent(name="hecuba_dislib.test_array_test")
        pred = reg.predict(test_data).collect()
        self.assertTrue(np.allclose(pred, [2.1, 3.3]))
예제 #4
0
 def test_sparse(self):
     """Tests LR raises NotImplementedError for sparse data."""
     np.random.seed(0)
     coo_matrix = sp_random(10, 1, density=0.5)
     sparse_arr = ds.array(x=coo_matrix, block_size=(5, 1))
     reg = LinearRegression()
     with self.assertRaises(NotImplementedError):
         reg.fit(sparse_arr, sparse_arr)
     dense_arr = random_array((10, 1), (5, 1))
     reg.fit(dense_arr, dense_arr)
     with self.assertRaises(NotImplementedError):
         reg.predict(sparse_arr)
예제 #5
0
def main():
    x_kdd = ds.load_txt_file(
        "/gpfs/projects/bsc19/COMPSs_DATASETS/dislib/kdd99/train.csv",
        block_size=(11482, 122))

    y_kdd = x_kdd[:, 121:122]
    x_kdd = x_kdd[:, :121]

    regression = LinearRegression(arity=48)

    performance.measure("LR", "KDD99", regression.fit, x_kdd, y_kdd)
예제 #6
0
    def test_multivariate_no_intercept(self):
        """Tests fit() and predict(), multivariate, fit_intercept=False."""
        x_data = np.array([[1, 2], [2, 0], [3, 1], [4, 4], [5, 3]])
        y_data = np.array([2, 1, 1, 2, 4.5])

        bn, bm = 2, 2

        x = ds.array(x=x_data, block_size=(bn, bm))
        y = ds.array(x=y_data, block_size=(bn, 1))

        reg = LinearRegression(fit_intercept=False)
        reg.fit(x, y)
        self.assertTrue(
            np.allclose(reg.coef_.collect(), [0.48305085, 0.30367232]))
        self.assertTrue(np.allclose(reg.intercept_.collect(), 0))

        # Predict one sample
        x_test = np.array([3, 2])
        test_data = ds.array(x=x_test, block_size=(1, bm))
        pred = reg.predict(test_data).collect()
        self.assertTrue(np.allclose(pred, [2.05649718]))

        # Predict multiple samples
        x_test = np.array([[3, 2], [4, 4], [1, 3]])
        test_data = ds.array(x=x_test, block_size=(bn, bm))
        pred = reg.predict(test_data).collect()
        self.assertTrue(np.allclose(pred, [2.05649718, 3.14689266, 1.3940678]))
예제 #7
0
    def test_multivariate(self):
        """Tests fit() and predict(), multivariate."""
        x_data = np.array([[1, 2], [2, 0], [3, 1], [4, 4], [5, 3]])
        y_data = np.array([2, 1, 1, 2, 4.5])

        bn, bm = 2, 2

        x = ds.array(x=x_data, block_size=(bn, bm))
        y = ds.array(x=y_data, block_size=(bn, 1))

        reg = LinearRegression()
        reg.fit(x, y)
        self.assertTrue(np.allclose(reg.coef_.collect(), [0.421875, 0.296875]))
        self.assertTrue(np.allclose(reg.intercept_.collect(), 0.240625))

        # Predict one sample
        x_test = np.array([3, 2])
        test_data = ds.array(x=x_test, block_size=(1, bm))
        pred = reg.predict(test_data).collect()
        self.assertTrue(np.allclose(pred, 2.1))

        # Predict multiple samples
        x_test = np.array([[3, 2], [4, 4], [1, 3]])
        test_data = ds.array(x=x_test, block_size=(bn, bm))
        pred = reg.predict(test_data).collect()
        self.assertTrue(np.allclose(pred, [2.1, 3.115625, 1.553125]))
예제 #8
0
    def test_univariate_no_intercept(self):
        """Tests fit() and predict(), univariate, fit_intercept=False."""
        x_data = np.array([1, 2, 3, 4, 5])
        y_data = np.array([2, 1, 1, 2, 4.5])

        bn, bm = 2, 1

        x = ds.array(x=x_data, block_size=(bn, bm))
        y = ds.array(x=y_data, block_size=(bn, bm))

        reg = LinearRegression(fit_intercept=False)
        reg.fit(x, y)
        self.assertTrue(np.allclose(reg.coef_.collect(), 0.68181818))
        self.assertTrue(np.allclose(reg.intercept_.collect(), 0))

        # Predict one sample
        x_test = np.array([3])
        test_data = ds.array(x=x_test, block_size=(1, 1))
        pred = reg.predict(test_data).collect()
        self.assertTrue(np.allclose(pred, 2.04545455))

        # Predict multiple samples
        x_test = np.array([3, 5, 6])
        test_data = ds.array(x=x_test, block_size=(bn, bm))
        pred = reg.predict(test_data).collect()
        self.assertTrue(np.allclose(pred, [2.04545455, 3.4090909, 4.0909091]))
예제 #9
0
    def test_univariate(self):
        """Tests fit() and predict(), univariate."""
        x_data = np.array([1, 2, 3, 4, 5])
        y_data = np.array([2, 1, 1, 2, 4.5])

        bn, bm = 2, 1

        x = ds.array(x=x_data, block_size=(bn, bm))
        y = ds.array(x=y_data, block_size=(bn, bm))

        reg = LinearRegression()
        reg.fit(x, y)
        self.assertTrue(np.allclose(reg.coef_.collect(), 0.6))
        self.assertTrue(np.allclose(reg.intercept_.collect(), 0.3))

        # Predict one sample
        x_test = np.array([3])
        test_data = ds.array(x=x_test, block_size=(1, 1))
        pred = reg.predict(test_data).collect()
        self.assertTrue(np.allclose(pred, 2.1))

        # Predict multiple samples
        x_test = np.array([3, 5, 6])
        test_data = ds.array(x=x_test, block_size=(bn, bm))
        pred = reg.predict(test_data).collect()
        self.assertTrue(np.allclose(pred, [2.1, 3.3, 3.9]))
예제 #10
0
def main():
    """
    Linear regression example with plot
    """

    # Example data
    x = np.array([
        1000, 4000, 5000, 4500, 3000, 4000, 9000, 11000, 15000, 12000, 7000,
        3000
    ])
    y = np.array([
        9914, 40487, 54324, 50044, 34719, 42551, 94871, 118914, 158484, 131348,
        78504, 36284
    ])

    ds = load_data(x=x[:, np.newaxis], y=y, subset_size=4)
    reg = LinearRegression()
    reg.fit(ds)
    reg.coef_ = compss_wait_on(reg.coef_)
    reg.intercept_ = compss_wait_on(reg.intercept_)
    print(reg.coef_, reg.intercept_)

    # plot_result:
    scatter(x, y, marker='x')
    x_mesh = np.linspace(min(x), max(x), 1000)
    plot(x_mesh, [reg.coef_ * x + reg.intercept_ for x in x_mesh])
    show()
예제 #11
0
    def test_multivariate_multiobjective(self):
        """Tests fit() and predict(), multivariate, multiobjective."""
        x_data = np.array([[1, 2, 3], [2, 0, 4], [3, 1, 8], [4, 4, 2],
                           [5, 3, 1], [2, 7, 1]])
        y_data = np.array([[2, 0, 3], [1, 5, 2], [1, 3, 4], [2, 7, 9],
                           [4.5, -1, 4], [0, 0, 0]])

        bn, bm = 2, 2

        x = ds.array(x=x_data, block_size=(bn, bm))
        y = ds.array(x=y_data, block_size=(bn, bm))

        reg = LinearRegression()
        reg.fit(x, y)

        # Predict one sample
        x_test = np.array([3, 2, 1])
        test_data = ds.array(x=x_test, block_size=(1, bm))
        pred = reg.predict(test_data).collect()
        self.assertTrue(np.allclose(pred, [3.0318415, 1.97164872, 3.85410906]))

        # Predict multiple samples
        x_test = np.array([[3, 2, 1], [4, 3, 3], [1, 1, 1]])
        test_data = ds.array(x=x_test, block_size=(bn, bm))
        pred = reg.predict(test_data).collect()
        self.assertTrue(
            np.allclose(pred, [[3.0318415, 1.97164872, 3.85410906],
                               [2.5033157, 2.65809327, 5.05310495],
                               [2.145797, 1.4840121, 1.5739791]]))

        # Check attributes values
        self.assertTrue(
            np.allclose(reg.coef_.collect(),
                        [[0.65034768, 0.34673933, 1.22176283],
                         [-0.41465084, -0.20584208, -0.16339571],
                         [-0.38211131, 0.27277365, 0.07031439]]))
        self.assertTrue(
            np.allclose(reg.intercept_.collect(),
                        [2.29221145, 1.07034124, 0.44529761]))