def test_same_coefs(self):
        X, y, Z = self.generate_dataset(2, 10000)

        local = LogisticRegression(tol=1e-4, C=10)
        dist = SparkLogisticRegression(tol=1e-4, C=10)

        local.fit(X, y)
        dist.fit(Z, classes=np.unique(y))

        assert_array_almost_equal(local.coef_, dist.coef_, decimal=1)
Ejemplo n.º 2
0
    def test_same_coefs(self):
        X, y, Z = self.make_classification(2, 10000)

        local = LogisticRegression(tol=1e-4, C=10)
        dist = SparkLogisticRegression(tol=1e-4, C=10)

        local.fit(X, y)
        dist.fit(Z, classes=np.unique(y))

        assert_array_almost_equal(local.coef_, dist.coef_, decimal=1)
Ejemplo n.º 3
0
    def test_same_prediction(self):
        X, y, Z = self.make_classification(2, 100000)

        local = LogisticRegression(tol=1e-4, C=10)
        dist = SparkLogisticRegression(tol=1e-4, C=10)

        y_local = local.fit(X, y).predict(X)
        y_dist = dist.fit(Z, classes=local.classes_).predict(Z[:, 'X'])
        y_converted = dist.to_scikit().predict(X)

        assert (sum(y_local != y_dist.toarray()) < len(y_local) * 1. / 100.)
        assert (sum(y_local != y_converted) < len(y_local) * 1. / 100.)
Ejemplo n.º 4
0
    def test_same_coefs(self):
        X, y, Z = self.make_classification(2, 10000)

        local = LogisticRegression(tol=1e-4, C=10)
        dist = SparkLogisticRegression(tol=1e-4, C=10)

        local.fit(X, y)
        dist.fit(Z, classes=np.unique(y))
        converted = dist.to_scikit()

        assert_array_almost_equal(local.coef_, dist.coef_, decimal=1)
        assert_array_almost_equal(local.coef_, converted.coef_, decimal=1)
Ejemplo n.º 5
0
    def test_same_prediction(self):
        X, y, Z = self.make_classification(2, 100000)

        local = LogisticRegression(tol=1e-4, C=10)
        dist = SparkLogisticRegression(tol=1e-4, C=10)

        y_local = local.fit(X, y).predict(X)
        y_dist = dist.fit(Z, classes=local.classes_).predict(Z[:, 'X'])
        y_converted = dist.to_scikit().predict(X)

        assert (sum(y_local != y_dist.toarray()) < len(y_local) * 1. / 100.)
        assert (sum(y_local != y_converted) < len(y_local) * 1. / 100.)