def test_pipeline_savefig_poof(self): """ Test the poof call with an outdir to save all the figures """ pipeline = VisualPipeline( [ ("a", mock.MagicMock(MockTransformer())), ("b", mock.MagicMock(MockVisualTransformer())), ("c", mock.MagicMock(MockTransformer())), ("d", mock.MagicMock(MockVisualTransformer())), ("e", mock.MagicMock(MockVisualEstimator())), ] ) # Must use path joining for Windows compatibility tmpdir = os.path.join("tmp", "figures") pipeline.poof(outdir=tmpdir) pipeline.steps[1][1].poof.assert_called_once_with( outpath=os.path.join(tmpdir, "b.pdf") ) pipeline.steps[3][1].poof.assert_called_once_with( outpath=os.path.join(tmpdir, "d.pdf") ) pipeline.steps[4][1].poof.assert_called_once_with( outpath=os.path.join(tmpdir, "e.pdf") )
def test_fit_transform_poof_and_draw_calls(self): """ Test calling fit, transform, draw and poof on the pipeline """ pipeline = VisualPipeline([ ('a', MockTransformer()), ('b', MockVisualTransformer()), ('c', MockTransformer()), ('d', MockVisualTransformer()), ('e', MockEstimator()), ]) X = [[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]] y = [1, 2, 3, 4, 5] pipeline.fit(X, y) for name, step in pipeline.named_steps.items(): step.fit.assert_called_once_with(X, y) pipeline.transform(X) for name, step in pipeline.named_steps.items(): step.transform.assert_called_once_with(X) pipeline.draw() for name, step in pipeline.named_steps.items(): if name in {'a', 'c', 'e'}: continue step.draw.assert_called_once_with() pipeline.poof() for name, step in pipeline.named_steps.items(): if name in {'a', 'c', 'e'}: continue step.poof.assert_called_once_with()
def test_validate_steps(self): """ Assert that visual transformers can be added to pipelines """ # Pipeline objects have a _validate_steps method that raises an # TypeError if the steps don't match transforms --> estimator. # validate a bad intermediate transformer on the Pipeline with pytest.raises(TypeError): Pipeline( [ ("real", MockTransformer()), ("bad", Thing()), ("model", MockEstimator()), ] ) # validate a bad intermediate transformer on the VisualPipeline with pytest.raises(TypeError): VisualPipeline( [ ("real", MockTransformer()), ("bad", Thing()), ("model", MockEstimator()), ] ) # validate a bad final estimator on the Pipeline with pytest.raises(TypeError): Pipeline([("real", MockTransformer()), ("bad", Thing())]) # validate a bad final estimator on the VisualPipeline with pytest.raises(TypeError): VisualPipeline([("real", MockTransformer()), ("bad", Thing())]) # validate visual transformers on a Pipeline try: Pipeline( [ ("real", MockTransformer()), ("visual", MockVisualTransformer()), ("model", MockEstimator()), ] ) except TypeError: self.fail("could not add a visual transformer to a Pipeline!") # validate visual transformers on a VisualPipeline try: VisualPipeline( [ ("real", MockTransformer()), ("visual", MockVisualTransformer()), ("model", MockEstimator()), ] ) except TypeError: self.fail("could not add a visual transformer to a VisualPipeline!")
def test_fit_transform_show_and_draw_calls(self): """ Test calling fit, transform, and show on the pipeline """ pipeline = VisualPipeline([ ("a", mock.MagicMock(MockTransformer())), ("b", mock.MagicMock(MockVisualTransformer())), ("c", mock.MagicMock(MockTransformer())), ("d", mock.MagicMock(MockVisualTransformer())), ("e", mock.MagicMock(MockEstimator())), ]) X = [[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]] y = [1, 2, 3, 4, 5] pipeline.fit(X, y) for name, step in pipeline.named_steps.items(): step.fit.assert_called_once_with(X, y) pipeline.transform(X) for name, step in pipeline.named_steps.items(): if name == "e": continue step.transform.assert_called_once_with(X) pipeline.show() for name, step in pipeline.named_steps.items(): if name in {"a", "c", "e"}: continue step.show.assert_called_once_with(outpath=None)
def test_pipeline_show(self): """ Test the show call against the VisualPipeline """ pipeline = VisualPipeline([ ("a", mock.MagicMock(MockTransformer())), ("b", mock.MagicMock(MockVisualTransformer())), ("c", mock.MagicMock(MockTransformer())), ("d", mock.MagicMock(MockVisualTransformer())), ("e", mock.MagicMock(MockEstimator())), ]) pipeline.show() pipeline.steps[1][1].show.assert_called_once_with(outpath=None) pipeline.steps[3][1].show.assert_called_once_with(outpath=None)
def test_pipeline_savefig_poof(self): """ Test the poof call with an outdir to save all the figures """ pipeline = VisualPipeline([ ('a', mock.MagicMock(MockTransformer())), ('b', mock.MagicMock(MockVisualTransformer())), ('c', mock.MagicMock(MockTransformer())), ('d', mock.MagicMock(MockVisualTransformer())), ('e', mock.MagicMock(MockVisualEstimator()),) ]) pipeline.poof(outdir="/tmp/figures") pipeline.steps[1][1].poof.assert_called_once_with(outpath="/tmp/figures/b.pdf") pipeline.steps[3][1].poof.assert_called_once_with(outpath="/tmp/figures/d.pdf") pipeline.steps[4][1].poof.assert_called_once_with(outpath="/tmp/figures/e.pdf")
def test_pipeline_poof(self): """ Test the poof call against the VisualPipeline """ pipeline = VisualPipeline([ ('a', mock.MagicMock(MockTransformer())), ('b', mock.MagicMock(MockVisualTransformer())), ('c', mock.MagicMock(MockTransformer())), ('d', mock.MagicMock(MockVisualTransformer())), ('e', mock.MagicMock(MockEstimator()),) ]) pipeline.poof() pipeline.steps[1][1].poof.assert_called_once_with(outpath=None) pipeline.steps[3][1].poof.assert_called_once_with(outpath=None)
def test_fit_transform_poof_and_draw_calls(self): """ Test calling fit, transform, and poof on the pipeline """ pipeline = VisualPipeline([ ('a', mock.MagicMock(MockTransformer())), ('b', mock.MagicMock(MockVisualTransformer())), ('c', mock.MagicMock(MockTransformer())), ('d', mock.MagicMock(MockVisualTransformer())), ('e', mock.MagicMock(MockEstimator()),) ]) X = [[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]] y = [1, 2, 3, 4, 5] pipeline.fit(X, y) for name, step in pipeline.named_steps.items(): step.fit.assert_called_once_with(X, y) pipeline.transform(X) for name, step in pipeline.named_steps.items(): if name == 'e': continue step.transform.assert_called_once_with(X) pipeline.poof() for name, step in pipeline.named_steps.items(): if name in {'a', 'c', 'e'}: continue step.poof.assert_called_once_with(outpath=None)
def test_pipeline_savefig_poof(self): """ Test the poof call with an outdir to save all the figures """ pipeline = VisualPipeline([ ('a', mock.MagicMock(MockTransformer())), ('b', mock.MagicMock(MockVisualTransformer())), ('c', mock.MagicMock(MockTransformer())), ('d', mock.MagicMock(MockVisualTransformer())), ('e', mock.MagicMock(MockVisualEstimator()),) ]) # Must use path joining for Windows compatibility tmpdir = os.path.join("tmp", "figures") pipeline.poof(outdir=tmpdir) pipeline.steps[1][1].poof.assert_called_once_with(outpath=os.path.join(tmpdir, "b.pdf")) pipeline.steps[3][1].poof.assert_called_once_with(outpath=os.path.join(tmpdir, "d.pdf")) pipeline.steps[4][1].poof.assert_called_once_with(outpath=os.path.join(tmpdir, "e.pdf"))
def selectDiscr(): data_path = "labeled_data.csv" data = pd.read_csv(data_path) # We create the preprocessing pipelines for both numeric and categorical data. numeric_features = ['count_reviews', 'rating'] numeric_transformer = Pipeline( steps=[('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler())]) categorical_features = ['product_category'] categorical_transformer = Pipeline( steps=[('imputer', SimpleImputer(strategy='constant', fill_value='missing') ), ('onehot', OneHotEncoder(handle_unknown='ignore'))]) preprocessor = ColumnTransformer( transformers=[('num', numeric_transformer, numeric_features ), ('cat', categorical_transformer, categorical_features)]) viz = DiscriminationThreshold(LogisticRegression()) clf = VisualPipeline(steps=[ ('preprocessor', preprocessor), #('classifier', LogisticRegression(solver='lbfgs')), ('viz', viz) ]) X = data.drop('label', axis=1) y = data['label'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) model = clf.fit(X_train, y_train) model.poof()
def test_visual_steps_property(self): """ Test the visual steps property to filter visualizers """ pipeline = VisualPipeline([ ('a', MockTransformer()), ('b', VisualTransformerSpec()), ('c', MockTransformer()), ('d', VisualTransformerSpec()), ('e', MockEstimator()), ]) self.assertIn('b', pipeline.visual_steps) self.assertIn('d', pipeline.visual_steps)
def test_visual_steps_property(self): """ Test the visual steps property to filter visualizers """ pipeline = VisualPipeline([ ("a", MockTransformer()), ("b", MockVisualTransformer()), ("c", MockTransformer()), ("d", MockVisualTransformer()), ("e", MockEstimator()), ]) assert "a" not in pipeline.visual_steps assert "b" in pipeline.visual_steps assert "c" not in pipeline.visual_steps assert "d" in pipeline.visual_steps assert "e" not in pipeline.visual_steps
def modelSelection(): data_path = "labeled_data.csv" data = pd.read_csv(data_path) # Preprocessing pipelines for both numeric and categorical data. numeric_features = ['count_reviews', 'rating'] numeric_transformer = Pipeline( steps=[('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler())]) categorical_features = ['product_category'] categorical_transformer = Pipeline( steps=[('imputer', SimpleImputer(strategy='constant', fill_value='missing') ), ('onehot', OneHotEncoder(handle_unknown='ignore'))]) preprocessor = ColumnTransformer( transformers=[('num', numeric_transformer, numeric_features ), ('cat', categorical_transformer, categorical_features)]) X = data.drop('label', axis=1) y = data['label'] fig = plt.figure() ax = fig.add_subplot() #viz_ridge = ClassificationReport(RidgeClassifier(), classes = ['not recommended', 'recommended'], support=True) viz_logistic = ClassificationReport( LogisticRegression(), #SGDClassifier(), #RidgeClassifier(), classes=['not recommended', 'recommended'], support=True) #clf_ridge = VisualPipeline(steps=[('preprocessor', preprocessor), # #('classifier', LogisticRegression(solver='lbfgs')), # ('viz', viz_ridge)]) #Visual Pipeline is used to visualize the report clf_logistic = VisualPipeline(steps=[ ('preprocessor', preprocessor), #('classifier', LogisticRegression(solver='lbfgs')), ('viz', viz_logistic) ]) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) #model_ridge = clf_ridge.fit(X_train, y_train) model_logistic = clf_logistic.fit(X_train, y_train) #preds_ridge = clf_ridge.predict(X_test) preds_logistic = clf_logistic.predict(X_test) #print("RidgeClassifier model score: %.3f" % clf_ridge.score(X_test, y_test)) print("LogisticRegression model score: %.3f" % clf_logistic.score(X_test, y_test)) #clf_ridge.poof() clf_logistic.poof() # Evaluate accuracy #print("RidgeClassifier accuracy: ", accuracy_score(y_test, preds_ridge)) print("LogisticRegression accuracy: ", accuracy_score(y_test, preds_logistic)) final_predictions = X_test final_predictions['target'] = y_test final_predictions['prediction'] = preds_logistic #print(final_predictions) filename = 'model_products.sav' joblib.dump(model_logistic, filename)