Ejemplo n.º 1
0
    def test_polynomial_features_slow(self):
        X1 = numpy.arange(6)[:, numpy.newaxis]
        P1 = numpy.hstack([numpy.ones_like(X1), X1, X1**2, X1**3])
        deg1 = 3

        X2 = numpy.arange(6).reshape((3, 2))
        x1 = X2[:, :1]
        x2 = X2[:, 1:]
        P2 = numpy.hstack([
            x1**0 * x2**0, x1**1 * x2**0, x1**0 * x2**1, x1**2 * x2**0,
            x1**1 * x2**1, x1**0 * x2**2
        ])
        deg2 = 2

        for (deg, X, P) in [(deg1, X1, P1), (deg2, X2, P2)]:
            poly = PolynomialFeatures(deg, include_bias=True)
            P_test = poly.fit_transform(X)
            self.assertEqual(P_test, P)
            names = poly.get_feature_names()

            ext = ExtendedFeatures(kind='poly-slow', poly_degree=deg)
            e_test = ext.fit_transform(X)
            e_names = ext.get_feature_names()
            self.assertEqual(len(names), len(e_names))
            self.assertEqual(names, e_names)

            self.assertEqual(P_test, P)
            self.assertEqual(P_test.shape, e_test.shape)
            self.assertEqual(P_test, e_test)
Ejemplo n.º 2
0
def fcts_model(X, y):

    model1 = SGDClassifier()
    model2 = make_pipeline(PolynomialFeatures(), SGDClassifier())
    model3 = make_pipeline(ExtendedFeatures(kind='poly'), SGDClassifier())
    model4 = make_pipeline(ExtendedFeatures(kind='poly-slow'), SGDClassifier())

    model1.fit(PolynomialFeatures().fit_transform(X), y)
    model2.fit(X, y)
    model3.fit(X, y)
    model4.fit(X, y)

    def partial_fit_model1(X, y, model=model1):
        return model.partial_fit(X, y)

    def partial_fit_model2(X, y, model=model2):
        X2 = model.steps[0][1].transform(X)
        return model.steps[1][1].partial_fit(X2, y)

    def partial_fit_model3(X, y, model=model3):
        X2 = model.steps[0][1].transform(X)
        return model.steps[1][1].partial_fit(X2, y)

    def partial_fit_model4(X, y, model=model4):
        X2 = model.steps[0][1].transform(X)
        return model.steps[1][1].partial_fit(X2, y)

    return (partial_fit_model1, partial_fit_model2, partial_fit_model3,
            partial_fit_model4)
Ejemplo n.º 3
0
    def test_polynomial_features_bigger_ionly(self):
        X = numpy.arange(30).reshape((5, 6))
        for deg in (1, 2, 3, 4, 5):
            poly = PolynomialFeatures(deg,
                                      include_bias=True,
                                      interaction_only=True)
            X_sk = poly.fit_transform(X)
            names_sk = poly.get_feature_names()

            ext = ExtendedFeatures(poly_degree=deg,
                                   poly_include_bias=True,
                                   poly_interaction_only=True)
            X_ext = ext.fit_transform(X)

            inames = ["x%d" % i for i in range(0, X.shape[1])]
            names_ext = ext.get_feature_names(inames)

            self.assertEqual(len(names_sk), len(names_ext))
            self.assertEqual(names_sk, names_ext)

            names_ext = ext.get_feature_names()
            self.assertEqual(len(names_sk), len(names_ext))
            self.assertEqual(names_sk, names_ext)

            self.assertEqual(X_sk.shape, X_ext.shape)
            self.assertEqual(X_sk, X_ext)
Ejemplo n.º 4
0
 def __init__(self, dim=None, **opts):
     # Models are fitted here. Every not measured
     # should take place here.
     assert dim is not None
     BenchPerfTest.__init__(self, **opts)
     self.model1 = SGDClassifier()
     self.model2 = make_pipeline(PolynomialFeatures(), SGDClassifier())
     self.model3 = make_pipeline(
         ExtendedFeatures(kind='poly'), SGDClassifier())
     self.model4 = make_pipeline(ExtendedFeatures(
         kind='poly-slow'), SGDClassifier())
     X, y = random_binary_classification(10000, dim)
     self.model1.fit(PolynomialFeatures().fit_transform(X), y)
     self.model2.fit(X, y)
     self.model3.fit(X, y)
     self.model4.fit(X, y)
Ejemplo n.º 5
0
    def test_polynomial_features_nobias_ionly_slow(self):
        X1 = numpy.arange(6)[:, numpy.newaxis]
        P1 = numpy.hstack([numpy.ones_like(X1), X1, X1**2, X1**3])
        deg1 = 3

        X2 = numpy.arange(6).reshape((3, 2))
        x1 = X2[:, :1]
        x2 = X2[:, 1:]
        P2 = numpy.hstack([
            x1**0 * x2**0, x1**1 * x2**0, x1**0 * x2**1, x1**2 * x2**0,
            x1**1 * x2**1, x1**0 * x2**2
        ])
        deg2 = 2

        for (deg, X, P) in [(deg1, X1, P1), (deg2, X2, P2)]:
            fc = [1] if deg == 3 else [1, 2, 4]
            poly = PolynomialFeatures(deg,
                                      include_bias=False,
                                      interaction_only=True)
            P_test = poly.fit_transform(X)

            names = poly.get_feature_names()
            self.assertEqual(P_test, P[:, fc])

            ext = ExtendedFeatures(kind="poly-slow",
                                   poly_degree=deg,
                                   poly_include_bias=False,
                                   poly_interaction_only=True)
            e_test = ext.fit_transform(X)

            e_names = ext.get_feature_names()
            self.assertEqual(len(names), len(e_names))
            self.assertEqual(names, e_names)

            self.assertEqual(P_test, P[:, fc])
            self.assertEqual(P_test.shape, e_test.shape)
            self.assertEqual(P_test, e_test)
Ejemplo n.º 6
0
    def test_polynomial_features_sparse(self):
        dtype = numpy.float64
        rng = numpy.random.RandomState(0)  # pylint: disable=E1101
        X = rng.randint(0, 2, (100, 2))
        X_sparse = sparse.csr_matrix(X)

        est = PolynomialFeatures(2)
        Xt_sparse = est.fit_transform(X_sparse.astype(dtype))
        Xt_dense = est.fit_transform(X.astype(dtype))

        self.assertIsInstance(Xt_sparse,
                              (sparse.csc_matrix, sparse.csr_matrix))
        self.assertEqual(Xt_sparse.dtype, Xt_dense.dtype)
        self.assertEqual(Xt_sparse.A, Xt_dense)

        est = ExtendedFeatures(poly_degree=2)
        Xt_sparse = est.fit_transform(X_sparse.astype(dtype))
        Xt_dense = est.fit_transform(X.astype(dtype))

        self.assertIsInstance(Xt_sparse, sparse.csc_matrix)
        self.assertEqual(Xt_sparse.dtype, Xt_dense.dtype)
        self.assertEqual(Xt_sparse.A, Xt_dense)
Ejemplo n.º 7
0
 def polynomial_features_csr_X_zero_row(self, zero_row_index, deg,
                                        interaction_only):
     X_csr = sparse_random(3, 10, 1.0, random_state=0).tocsr()
     X_csr[zero_row_index, :] = 0.0
     X = X_csr.toarray()
     est = ExtendedFeatures(poly_degree=deg,
                            poly_include_bias=False,
                            poly_interaction_only=interaction_only)
     est.fit(X)
     poly = PolynomialFeatures(degree=deg,
                               include_bias=False,
                               interaction_only=interaction_only)
     poly.fit(X)
     self.assertEqual(poly.get_feature_names(), est.get_feature_names())
     Xt_dense1 = est.fit_transform(X)
     Xt_dense2 = poly.fit_transform(X)
     self.assertEqual(Xt_dense1, Xt_dense2)