Esempio n. 1
0
    def test_same_prediction(self):
        X, y, Z = self.make_regression(1, 100000)

        local = LinearRegression()
        dist = SparkLinearRegression()

        y_local = local.fit(X, y).predict(X)
        y_dist = dist.fit(Z).predict(Z[:, 'X'])
        y_converted = dist.to_scikit().predict(X)

        assert_true(check_rdd_dtype(y_dist, (np.ndarray,)))
        assert_array_almost_equal(y_local, y_dist.toarray())
        assert_array_almost_equal(y_local, y_converted)
Esempio n. 2
0
    def test_same_coefs(self):
        X, y, Z = self.make_regression(1, 100000)

        local = LinearRegression()
        dist = SparkLinearRegression()

        local.fit(X, y)
        dist.fit(Z)
        converted = dist.to_scikit()

        assert_array_almost_equal(local.coef_, dist.coef_)
        assert_array_almost_equal(local.intercept_, dist.intercept_)
        assert_array_almost_equal(local.coef_, converted.coef_)
        assert_array_almost_equal(local.intercept_, converted.intercept_)