def setUp(self): X, y = datasets.make_blobs(n_samples=10000, n_features=5, centers=10) self.X, self.y = X, y clustering = KMeans(n_clusters=10) classification = LinearDiscriminantAnalysis() steps = [('clustering', clustering), ('classification', classification)] pgraph = PipeGraph(steps=steps) pgraph.inject(sink='clustering', sink_var='X', source='_External', source_var='X') pgraph.inject(sink='classification', sink_var='X', source='_External', source_var='X') pgraph.inject(sink='classification', sink_var='y', source='clustering', source_var='predict') self.pgraph = pgraph
'X': slice(0, 1), 'sample_weight': slice(1, 2) }) steps = [('selector', selector), ('custom_power', custom_power), ('scaler', scaler), ('polynomial_features', polynomial_features), ('linear_model', linear_model)] pgraph = PipeGraph(steps=steps) (pgraph.inject( sink='selector', sink_var='X', source='_External', source_var='X').inject( 'custom_power', 'X', 'selector', 'sample_weight').inject('scaler', 'X', 'selector', 'X').inject( 'polynomial_features', 'X', 'scaler').inject( 'linear_model', 'X', 'polynomial_features').inject('linear_model', 'y', source_var='y').inject( 'linear_model', 'sample_weight', 'custom_power')) ############################################################################### # Then we define ``param_grid`` as expected by :class:`GridSearchCV` exploring a few possibilities # of varying parameters. param_grid = { 'polynomial_features__degree': range(1, 3), 'linear_model__fit_intercept': [True, False], 'custom_power__power': [1, 5, 10, 20, 30] }
mlp = MLPClassifier() concatenator = Concatenator() steps = [('scaler', scaler), ('gaussian_nb', gaussian_nb), ('svc', svc), ('concat', concatenator), ('mlp', mlp)] ############################################################################### # In this example we use a :class:`PipeGraphClassifier` because the result is a classification and we want to take advantage of Scikit-Learn default scoring method for classifiers. pgraph = PipeGraph(steps=steps) (pgraph.inject(sink='scaler', sink_var='X', source='_External', source_var='X').inject('gaussian_nb', 'X', 'scaler').inject( 'gaussian_nb', 'y', source_var='y').inject('svc', 'X', 'scaler').inject( 'svc', 'y', source_var='y').inject('concat', 'X1', 'scaler').inject( 'concat', 'X2', 'gaussian_nb').inject('concat', 'X3', 'svc').inject( 'mlp', 'X', 'concat').inject('mlp', 'y', source_var='y')) param_grid = { 'svc__C': [0.1, 0.5, 1.0], 'mlp__hidden_layer_sizes': [ (3, ), (6, ), (9, ), ], 'mlp__max_iter': [5000, 10000] }
def test_Pipegraph__ex_3_inject(self): import numpy as np import pandas as pd from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression from sklearn.model_selection import GridSearchCV from pipegraph.base import PipeGraph from pipegraph.demo_blocks import CustomPower X = pd.DataFrame( dict(X=np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), sample_weight=np.array([ 0.01, 0.95, 0.10, 0.95, 0.95, 0.10, 0.10, 0.95, 0.95, 0.95, 0.01 ]))) y = np.array([10, 4, 20, 16, 25, -60, 85, 64, 81, 100, 150]) scaler = MinMaxScaler() polynomial_features = PolynomialFeatures() linear_model = LinearRegression() custom_power = CustomPower() selector = ColumnSelector(mapping={ 'X': slice(0, 1), 'sample_weight': slice(1, 2) }) steps = [('selector', selector), ('custom_power', custom_power), ('scaler', scaler), ('polynomial_features', polynomial_features), ('linear_model', linear_model)] pgraph = PipeGraph(steps=steps) #PipeGraphRegressor self.assertTrue(pgraph.fit_connections is None) self.assertTrue(pgraph.predict_connections is None) (pgraph.inject( sink='selector', sink_var='X', source='_External', source_var='X').inject( 'custom_power', 'X', 'selector', 'sample_weight').inject('scaler', 'X', 'selector', 'X').inject( 'polynomial_features', 'X', 'scaler').inject( 'linear_model', 'X', 'polynomial_features').inject('linear_model', 'y', source_var='y').inject( 'linear_model', 'sample_weight', 'custom_power')) self.assertTrue(pgraph.fit_connections is not None) self.assertTrue(pgraph.predict_connections is not None) pgraph.fit(X, y) self.assertEqual( pgraph.fit_connections, { 'selector': { 'X': ('_External', 'X') }, 'custom_power': { 'X': ('selector', 'sample_weight') }, 'scaler': { 'X': ('selector', 'X') }, 'polynomial_features': { 'X': ('scaler', 'predict') }, 'linear_model': { 'X': ('polynomial_features', 'predict'), 'y': ('_External', 'y'), 'sample_weight': ('custom_power', 'predict') } }) self.assertEqual( pgraph.predict_connections, { 'selector': { 'X': ('_External', 'X') }, 'custom_power': { 'X': ('selector', 'sample_weight') }, 'scaler': { 'X': ('selector', 'X') }, 'polynomial_features': { 'X': ('scaler', 'predict') }, 'linear_model': { 'X': ('polynomial_features', 'predict'), 'y': ('_External', 'y'), 'sample_weight': ('custom_power', 'predict') } })