def test_fit_transform_show_and_draw_calls(self):
        """
        Test calling fit, transform, and show on the pipeline
        """

        pipeline = VisualPipeline([
            ("a", mock.MagicMock(MockTransformer())),
            ("b", mock.MagicMock(MockVisualTransformer())),
            ("c", mock.MagicMock(MockTransformer())),
            ("d", mock.MagicMock(MockVisualTransformer())),
            ("e", mock.MagicMock(MockEstimator())),
        ])

        X = [[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]]

        y = [1, 2, 3, 4, 5]

        pipeline.fit(X, y)
        for name, step in pipeline.named_steps.items():
            step.fit.assert_called_once_with(X, y)

        pipeline.transform(X)
        for name, step in pipeline.named_steps.items():
            if name == "e":
                continue
            step.transform.assert_called_once_with(X)

        pipeline.show()
        for name, step in pipeline.named_steps.items():
            if name in {"a", "c", "e"}:
                continue
            step.show.assert_called_once_with(outpath=None)
Exemple #2
0
    def test_fit_transform_poof_and_draw_calls(self):
        """
        Test calling fit, transform, and poof on the pipeline
        """

        pipeline = VisualPipeline([
            ('a', mock.MagicMock(MockTransformer())),
            ('b', mock.MagicMock(MockVisualTransformer())),
            ('c', mock.MagicMock(MockTransformer())),
            ('d', mock.MagicMock(MockVisualTransformer())),
            ('e', mock.MagicMock(MockEstimator()),)
        ])

        X = [[1, 1, 1, 1, 1],
             [2, 2, 2, 2, 2],
             [3, 3, 3, 3, 3]]

        y =  [1, 2, 3, 4, 5]

        pipeline.fit(X, y)
        for name, step in pipeline.named_steps.items():
            step.fit.assert_called_once_with(X, y)

        pipeline.transform(X)
        for name, step in pipeline.named_steps.items():
            if name == 'e': continue
            step.transform.assert_called_once_with(X)

        pipeline.poof()
        for name, step in pipeline.named_steps.items():
            if name in {'a', 'c', 'e'}: continue
            step.poof.assert_called_once_with(outpath=None)
    def test_fit_transform_poof_and_draw_calls(self):
        """
        Test calling fit, transform, and poof on the pipeline
        """

        pipeline = VisualPipeline([
            ('a', mock.MagicMock(MockTransformer())),
            ('b', mock.MagicMock(MockVisualTransformer())),
            ('c', mock.MagicMock(MockTransformer())),
            ('d', mock.MagicMock(MockVisualTransformer())),
            ('e', mock.MagicMock(MockEstimator()),)
        ])

        X = [[1, 1, 1, 1, 1],
             [2, 2, 2, 2, 2],
             [3, 3, 3, 3, 3]]

        y =  [1, 2, 3, 4, 5]

        pipeline.fit(X, y)
        for name, step in pipeline.named_steps.items():
            step.fit.assert_called_once_with(X, y)

        pipeline.transform(X)
        for name, step in pipeline.named_steps.items():
            if name == 'e': continue
            step.transform.assert_called_once_with(X)

        pipeline.poof()
        for name, step in pipeline.named_steps.items():
            if name in {'a', 'c', 'e'}: continue
            step.poof.assert_called_once_with(outpath=None)
def selectDiscr():
    data_path = "labeled_data.csv"
    data = pd.read_csv(data_path)

    # We create the preprocessing pipelines for both numeric and categorical data.
    numeric_features = ['count_reviews', 'rating']
    numeric_transformer = Pipeline(
        steps=[('imputer',
                SimpleImputer(strategy='median')), ('scaler',
                                                    StandardScaler())])

    categorical_features = ['product_category']
    categorical_transformer = Pipeline(
        steps=[('imputer',
                SimpleImputer(strategy='constant', fill_value='missing')
                ), ('onehot', OneHotEncoder(handle_unknown='ignore'))])

    preprocessor = ColumnTransformer(
        transformers=[('num', numeric_transformer, numeric_features
                       ), ('cat', categorical_transformer,
                           categorical_features)])

    viz = DiscriminationThreshold(LogisticRegression())

    clf = VisualPipeline(steps=[
        ('preprocessor', preprocessor),
        #('classifier', LogisticRegression(solver='lbfgs')),
        ('viz', viz)
    ])

    X = data.drop('label', axis=1)
    y = data['label']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    model = clf.fit(X_train, y_train)
    model.poof()
def modelSelection():
    data_path = "labeled_data.csv"
    data = pd.read_csv(data_path)

    # Preprocessing pipelines for both numeric and categorical data.
    numeric_features = ['count_reviews', 'rating']
    numeric_transformer = Pipeline(
        steps=[('imputer',
                SimpleImputer(strategy='median')), ('scaler',
                                                    StandardScaler())])

    categorical_features = ['product_category']
    categorical_transformer = Pipeline(
        steps=[('imputer',
                SimpleImputer(strategy='constant', fill_value='missing')
                ), ('onehot', OneHotEncoder(handle_unknown='ignore'))])

    preprocessor = ColumnTransformer(
        transformers=[('num', numeric_transformer, numeric_features
                       ), ('cat', categorical_transformer,
                           categorical_features)])

    X = data.drop('label', axis=1)
    y = data['label']
    fig = plt.figure()
    ax = fig.add_subplot()

    #viz_ridge = ClassificationReport(RidgeClassifier(), classes = ['not recommended', 'recommended'], support=True)
    viz_logistic = ClassificationReport(
        LogisticRegression(),
        #SGDClassifier(),
        #RidgeClassifier(),
        classes=['not recommended', 'recommended'],
        support=True)

    #clf_ridge = VisualPipeline(steps=[('preprocessor', preprocessor),
    #                      #('classifier', LogisticRegression(solver='lbfgs')),
    #                      ('viz', viz_ridge)])

    #Visual Pipeline is used to visualize the report
    clf_logistic = VisualPipeline(steps=[
        ('preprocessor', preprocessor),
        #('classifier', LogisticRegression(solver='lbfgs')),
        ('viz', viz_logistic)
    ])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    #model_ridge = clf_ridge.fit(X_train, y_train)
    model_logistic = clf_logistic.fit(X_train, y_train)

    #preds_ridge = clf_ridge.predict(X_test)
    preds_logistic = clf_logistic.predict(X_test)

    #print("RidgeClassifier model score: %.3f" % clf_ridge.score(X_test, y_test))
    print("LogisticRegression model score: %.3f" %
          clf_logistic.score(X_test, y_test))
    #clf_ridge.poof()
    clf_logistic.poof()

    # Evaluate accuracy
    #print("RidgeClassifier accuracy: ", accuracy_score(y_test, preds_ridge))
    print("LogisticRegression accuracy: ",
          accuracy_score(y_test, preds_logistic))

    final_predictions = X_test
    final_predictions['target'] = y_test
    final_predictions['prediction'] = preds_logistic

    #print(final_predictions)

    filename = 'model_products.sav'
    joblib.dump(model_logistic, filename)