Example #1
0
    def test_Pipegraph__example_1_no_connections(self):
        import numpy as np
        from sklearn.preprocessing import MinMaxScaler
        from sklearn.linear_model import LinearRegression
        from pipegraph import PipeGraphRegressor

        X = np.random.rand(100, 1)
        y = 4 * X + 0.5 * np.random.randn(100, 1)

        scaler = MinMaxScaler()
        linear_model = LinearRegression()
        steps = [('scaler', scaler), ('linear_model', linear_model)]

        pgraph = PipeGraphRegressor(steps=steps)
        self.assertTrue(pgraph._pipegraph.fit_connections is None)
        self.assertTrue(pgraph._pipegraph.predict_connections is None)
        pgraph.fit(X, y)
        y_pred = pgraph.predict(X)
        self.assertEqual(y_pred.shape[0], y.shape[0])
        self.assertEqual(
            pgraph._pipegraph.fit_connections,
            dict(scaler={'X': 'X'},
                 linear_model={
                     'X': ('scaler', 'predict'),
                     'y': 'y'
                 }))
        self.assertEqual(
            pgraph._pipegraph.predict_connections,
            dict(scaler={'X': 'X'},
                 linear_model={
                     'X': ('scaler', 'predict'),
                     'y': 'y'
                 }))
Example #2
0
    def setUp(self):
        self.size = 100
        self.X = pd.DataFrame(dict(X=np.random.rand(self.size, )))
        self.y = pd.DataFrame(dict(y=(np.random.rand(self.size, ))))
        sc = MinMaxScaler()
        lm = LinearRegression()
        neutral_regressor = NeutralRegressor()

        steps = [
            ('scaler', sc),
            ('model', lm),
        ]
        connections = {
            'scaler': {
                'X': 'X'
            },
            'model': {
                'X': ('scaler', 'predict'),
                'y': 'y'
            },
        }
        model = PipeGraphRegressor(steps, connections)

        steps = [('scaler', sc), ('model', lm), ('neutral', neutral_regressor)]
        connections = {
            'scaler': {
                'X': 'X'
            },
            'model': {
                'X': ('scaler', 'predict'),
                'y': 'y'
            },
            'neutral': {
                'X': 'model'
            }
        }

        model_custom = PipeGraphRegressor(steps, connections)

        self.sc = sc
        self.lm = lm
        self.model = model
        self.model_custom = model_custom
Example #3
0
    def setUp(self):
        X_first = pd.Series(np.random.rand(1000, ))
        y_first = pd.Series(4 * X_first + 0.5 * np.random.randn(1000, ))

        X_second = pd.Series(np.random.rand(1000, ) + 3)
        y_second = pd.Series(-4 * X_second + 0.5 * np.random.randn(1000, ))

        X_third = pd.Series(np.random.rand(1000, ) + 6)
        y_third = pd.Series(2 * X_third + 0.5 * np.random.randn(1000, ))

        self.X = pd.concat([X_first, X_second, X_third], axis=0).to_frame()
        self.y = pd.concat([y_first, y_second, y_third], axis=0).to_frame()
        scaler = MinMaxScaler()
        gaussian_mixture = GaussianMixture(n_components=3)
        models = RegressorsWithDataDependentNumberOfReplicas(
            steps=[('regressor', LinearRegression())])
        neutral_regressor = NeutralRegressor()

        steps = [('scaler', scaler), ('classifier', gaussian_mixture),
                 ('models', models), ('neutral', neutral_regressor)]

        connections = {
            'scaler': {
                'X': 'X'
            },
            'classifier': {
                'X': 'scaler'
            },
            'models': {
                'X': 'scaler',
                'y': 'y',
                'selection': 'classifier'
            },
            'neutral': {
                'X': 'models'
            },
        }

        self.pgraph = PipeGraphRegressor(steps=steps,
                                         fit_connections=connections)
        self.pgraph.fit(self.X, self.y)
models = RegressorsWithParametrizedNumberOfReplicas(
    number_of_replicas=3,
    model_prototype=LinearRegression(),
    model_parameters={})

steps = [
    ('scaler', scaler),
    ('classifier', gaussian_mixture),
    ('models', models),
]

connections = {
    'scaler': {
        'X': 'X'
    },
    'classifier': {
        'X': 'scaler'
    },
    'models': {
        'X': 'scaler',
        'y': 'y',
        'selection': 'classifier'
    },
}

pgraph = PipeGraphRegressor(steps=steps, fit_connections=connections)
pgraph.fit(X, y)
y_pred = pgraph.predict(X)
plt.scatter(X, y)
plt.scatter(X, y_pred)
Example #5
0
    def test_Pipegraph__ex_3_inject(self):
        import numpy as np
        import pandas as pd
        from sklearn.preprocessing import MinMaxScaler
        from sklearn.preprocessing import PolynomialFeatures
        from sklearn.linear_model import LinearRegression
        from sklearn.model_selection import GridSearchCV
        from pipegraph.base import PipeGraphRegressor
        from pipegraph.demo_blocks import CustomPower

        X = pd.DataFrame(
            dict(X=np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
                 sample_weight=np.array([
                     0.01, 0.95, 0.10, 0.95, 0.95, 0.10, 0.10, 0.95, 0.95,
                     0.95, 0.01
                 ])))
        y = np.array([10, 4, 20, 16, 25, -60, 85, 64, 81, 100, 150])

        scaler = MinMaxScaler()
        polynomial_features = PolynomialFeatures()
        linear_model = LinearRegression()
        custom_power = CustomPower()
        selector = ColumnSelector(mapping={
            'X': slice(0, 1),
            'sample_weight': slice(1, 2)
        })

        steps = [('selector', selector), ('custom_power', custom_power),
                 ('scaler', scaler),
                 ('polynomial_features', polynomial_features),
                 ('linear_model', linear_model)]

        pgraph = PipeGraphRegressor(steps=steps)

        self.assertTrue(pgraph._pipegraph.fit_connections is None)
        self.assertTrue(pgraph._pipegraph.predict_connections is None)

        (pgraph.inject(
            sink='selector',
            sink_var='X', source='_External', source_var='X').inject(
                'custom_power', 'X', 'selector',
                'sample_weight').inject('scaler', 'X', 'selector', 'X').inject(
                    'polynomial_features', 'X', 'scaler').inject(
                        'linear_model', 'X',
                        'polynomial_features').inject('linear_model',
                                                      'y',
                                                      source_var='y').inject(
                                                          'linear_model',
                                                          'sample_weight',
                                                          'custom_power'))

        self.assertTrue(pgraph._pipegraph.fit_connections is not None)
        self.assertTrue(pgraph._pipegraph.predict_connections is not None)
        pgraph.fit(X, y)
        self.assertEqual(
            pgraph._pipegraph.fit_connections, {
                'selector': {
                    'X': ('_External', 'X')
                },
                'custom_power': {
                    'X': ('selector', 'sample_weight')
                },
                'scaler': {
                    'X': ('selector', 'X')
                },
                'polynomial_features': {
                    'X': ('scaler', 'predict')
                },
                'linear_model': {
                    'X': ('polynomial_features', 'predict'),
                    'y': ('_External', 'y'),
                    'sample_weight': ('custom_power', 'predict')
                }
            })

        self.assertEqual(
            pgraph._pipegraph.predict_connections, {
                'selector': {
                    'X': ('_External', 'X')
                },
                'custom_power': {
                    'X': ('selector', 'sample_weight')
                },
                'scaler': {
                    'X': ('selector', 'X')
                },
                'polynomial_features': {
                    'X': ('scaler', 'predict')
                },
                'linear_model': {
                    'X': ('polynomial_features', 'predict'),
                    'y': ('_External', 'y'),
                    'sample_weight': ('custom_power', 'predict')
                }
            })
Example #6
0
        'X': 'X'
    },
    'classifier': {
        'X': 'scaler'
    },
    'models': {
        'X': 'scaler',
        'y': 'y',
        'selection': 'classifier'
    },
    'neutral': {
        'X': 'models'
    }
}

pgraph = PipeGraphRegressor(steps=steps, fit_connections=connections)

##############################################################################################################
# Using GridSearchCV to find the best number of clusters and the best regressors
#
from sklearn.model_selection import GridSearchCV

param_grid = {'classifier__n_components': range(2, 10)}
gs = GridSearchCV(estimator=pgraph, param_grid=param_grid, refit=True)
gs.fit(X_train, y_train)
y_pred = gs.predict(X_train)
plt.scatter(X_train, y_train)
plt.scatter(X_train, y_pred)
print("Score:", gs.score(X_test, y_test))
print("classifier__n_components:",
      gs.best_estimator_.get_params()['classifier__n_components'])
Example #7
0
###############################################################################
# Secondly, we define the steps and a ``param_grid`` dictionary as specified by :class:`GridSearchCV`.
# In this case we just want to explore a few possibilities varying the degree of the polynomials and whether to use or not an intercept at the linear model.

steps = [('scaler', scaler), ('polynomial_features', polynomial_features),
         ('linear_model', linear_model)]

param_grid = {
    'polynomial_features__degree': range(1, 11),
    'linear_model__fit_intercept': [True, False]
}

###############################################################################
# Now, we use ``PipeGraphRegressor`` as estimator for :class:`GridSearchCV` and perform the ``fit`` and ``predict`` operations.

pgraph = PipeGraphRegressor(steps=steps)
grid_search_regressor = GridSearchCV(estimator=pgraph,
                                     param_grid=param_grid,
                                     refit=True)
grid_search_regressor.fit(X, y)
y_pred = grid_search_regressor.predict(X)

plt.scatter(X, y)
plt.scatter(X, y_pred)
plt.show()

coef = grid_search_regressor.best_estimator_.get_params()['linear_model'].coef_
degree = grid_search_regressor.best_estimator_.get_params(
)['polynomial_features'].degree

print(
Example #8
0
class TestModelsWithDataDependentNumberOfReplicas(unittest.TestCase):
    def setUp(self):
        X_first = pd.Series(np.random.rand(1000, ))
        y_first = pd.Series(4 * X_first + 0.5 * np.random.randn(1000, ))

        X_second = pd.Series(np.random.rand(1000, ) + 3)
        y_second = pd.Series(-4 * X_second + 0.5 * np.random.randn(1000, ))

        X_third = pd.Series(np.random.rand(1000, ) + 6)
        y_third = pd.Series(2 * X_third + 0.5 * np.random.randn(1000, ))

        self.X = pd.concat([X_first, X_second, X_third], axis=0).to_frame()
        self.y = pd.concat([y_first, y_second, y_third], axis=0).to_frame()
        scaler = MinMaxScaler()
        gaussian_mixture = GaussianMixture(n_components=3)
        models = RegressorsWithDataDependentNumberOfReplicas(
            steps=[('regressor', LinearRegression())])
        neutral_regressor = NeutralRegressor()

        steps = [('scaler', scaler), ('classifier', gaussian_mixture),
                 ('models', models), ('neutral', neutral_regressor)]

        connections = {
            'scaler': {
                'X': 'X'
            },
            'classifier': {
                'X': 'scaler'
            },
            'models': {
                'X': 'scaler',
                'y': 'y',
                'selection': 'classifier'
            },
            'neutral': {
                'X': 'models'
            },
        }

        self.pgraph = PipeGraphRegressor(steps=steps,
                                         fit_connections=connections)
        self.pgraph.fit(self.X, self.y)

    def test_ModelsWithDataDependentNumberOfReplicas__connections(self):
        X = self.X
        y = self.y
        pgraph = self.pgraph

        pgraph.fit(X, y)
        y_pred = pgraph.predict(X)

        self.assertTrue(
            isinstance(pgraph.named_steps['models'],
                       RegressorsWithDataDependentNumberOfReplicas))
        result_connections = pgraph.named_steps[
            'models']._pipegraph.fit_connections
        expected_connections = {
            'regressorsBundle': {
                'X': 'X',
                'selection': 'selection',
                'y': 'y'
            }
        }
        self.assertEqual(result_connections, expected_connections)
        result_steps = sorted(list(pgraph.named_steps.keys()))
        expected_steps = sorted(['scaler', 'classifier', 'models', 'neutral'])
        self.assertEqual(result_steps, expected_steps)
        self.assertEqual(y_pred.shape[0], y.shape[0])

    def test_ModelsWithDataDependentNumberOfReplicas__predict(self):
        X = self.X
        y = self.y
        pgraph = self.pgraph

        pgraph.fit(X, y)
        y_pred = pgraph.predict(X)
        self.assertEqual(y_pred.shape[0], y.shape[0])

    def test_ModelsWithDataDependentNumberOfReplicas__score(self):
        X = self.X
        y = self.y
        pgraph = self.pgraph

        pgraph.fit(X, y)
        result = pgraph.score(X, y)
        self.assertTrue(result > -42)

    def test_ModelsWithDataDependentNumberOfReplicas__GridSearchCV(self):
        X = self.X
        y = self.y

        X_train, X_test, y_train, y_test = train_test_split(X, y)

        pgraph = self.pgraph
        param_grid = {'classifier__n_components': range(2, 10)}
        gs = GridSearchCV(estimator=pgraph, param_grid=param_grid, refit=True)
        gs.fit(X_train, y_train)
        result = gs.score(X_test, y_test)
        self.assertTrue(result > -42)
# Next we define the steps and we use :class:`PipeGraphRegressor` as estimator for :class:`GridSearchCV`.

scaler = MinMaxScaler()
polynomial_features = PolynomialFeatures()
linear_model = LinearRegression()
custom_power = CustomPower()
selector = ColumnSelector(mapping={
    'X': slice(0, 1),
    'sample_weight': slice(1, 2)
})

steps = [('selector', selector), ('custom_power', custom_power),
         ('scaler', scaler), ('polynomial_features', polynomial_features),
         ('linear_model', linear_model)]

pgraph = PipeGraphRegressor(steps=steps)

(pgraph.inject(
    sink='selector', sink_var='X', source='_External', source_var='X').inject(
        'custom_power', 'X', 'selector',
        'sample_weight').inject('scaler', 'X', 'selector', 'X').inject(
            'polynomial_features', 'X', 'scaler').inject(
                'linear_model', 'X',
                'polynomial_features').inject('linear_model',
                                              'y',
                                              source_var='y').inject(
                                                  'linear_model',
                                                  'sample_weight',
                                                  'custom_power'))

###############################################################################