Ejemplo n.º 1
0
 def generateClassificationReport(clf, class_names):
     try:
         visualizer = ClassificationReport(clf, classes=class_names, support=True)
         visualizer.score(data_test, target_test)
         visualizerPath = os.path.join(tree_evaluations_out, str(classifierName) + '_classificationReport.png')
         g = visualizer.poof(outpath=visualizerPath, clear_figure=True)
     except KeyError:
         print(('Warning, not enough data for classification report: ') + str(classifierName))
Ejemplo n.º 2
0
def naiveBayesClassifierTest(X_train, X_test, y_train, y_test, X_1_df, Y_1_df):
    path = Path(__file__).parent.absolute()
      #Creates a new directory under svm-linear if it doesn't exist
    Path("output/GaussianNB/").mkdir(parents=True, exist_ok=True)

    gnb = GaussianNB()
    print('-----------------------------')    
    print('Naive Bayes Classifier Test was Called. Wait...')
    # capture the start time
    start = time.time()
    y_pred = gnb.fit(X_train, np.ravel(y_train)).predict(X_test)
    # capture the end time of calculation
    end = time.time()

    print("Time taken to train model and prediction :", end-start , "seconds")

    print("Number of mislabeled points out of a total %d points : %d" % (X_test.shape[0], (np.ravel(y_test)!= y_pred).sum()))
    
    # comparing actual response values (y_test) with predicted response values (y_pred)
    print("Gaussian Naive Bayes model accuracy(in %):", metrics.accuracy_score(y_test, y_pred)*100)

    #Printing the metrics/Generating visualization
    print("Classification report, class prediction error, Test accuracy, Running time for Naive Bayes is generated in the output folder")
    #Printing the classification report
    vizualizer = ClassificationReport(GaussianNB(), classes = [0,1,2,3,4,5])
    vizualizer.fit(X_train, y_train.values.ravel())
    vizualizer.score(X_test, y_test)
    strFile = str(path)+"/output/GaussianNB"+"/Classification Report.png"
    if os.path.isfile(strFile):
        os.remove(strFile)
    vizualizer.show(strFile)

    plt.clf()
Ejemplo n.º 3
0
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir):
    model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c)
    rfe = RFE(model, n_features_to_select=n)

    ## learning curve
    plt.clf()
    viz_LC = LearningCurve(
        rfe, scoring='f1_weighted', n_jobs=4
    )
    viz_LC.fit(X, y)
    viz_LC.show(outpath=outdir + '/LC.png')

    ## classification report
    plt.clf()
    viz_CR = ClassificationReport(rfe, classes=class_names, support=True)
    viz_CR.fit(X, y)
    viz_CR.score(X_test, y_test)
    viz_CR.show(outpath=outdir + '/CR.png')

    ## confusion matrix
    plt.clf()
    viz_CM = ConfusionMatrix(rfe, classes=class_names)
    viz_CM.fit(X, y)
    viz_CM.score(X_test, y_test)
    viz_CM.show(outpath=outdir + '/CM.png')

    ## precision recall curve
    plt.clf()
    viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True,
                                   fill_area=False, micro=False, classes=class_names)
    viz_PRC.fit(X, y)
    viz_PRC.score(X_test, y_test)
    viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720))

    ## class prediction error
    plt.clf()
    viz_CPE = ClassPredictionError(
        rfe, classes=class_names
    )
    viz_CPE.fit(X, y)
    viz_CPE.score(X_test, y_test)
    viz_CPE.show(outpath=outdir + '/CPE.png')

    ## ROCAUC
    plt.clf()
    viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720))
    viz_RA.fit(X, y)
    viz_RA.score(X, y)
    viz_RA.show(outpath=outdir + '/RA.png')

    fit = rfe.fit(X,y)
    y_predict = fit.predict(X_test)
    f1 = f1_score(y_test, y_predict, average='weighted')

    features_retained_RFE = X.columns[rfe.get_support()].values
    feature_df =pd.DataFrame(features_retained_RFE.tolist())
    feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False)

    return f1
Ejemplo n.º 4
0
def visualizeClassificationReport(classifier, features_train, labels_train,
                                  features_test, labels_test):

    visualizer = ClassificationReport(classifier)

    visualizer.fit(features_train, labels_train)
    visualizer.score(features_test, labels_test)
    visualizer.poof()
Ejemplo n.º 5
0
def create_classification_report_chart(classifier, X_train, X_test, y_train,
                                       y_test):
    """Create classification report chart.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['visuals/classification_report'] = \
                npt_utils.create_classification_report_chart(rfc, X_train, X_test, y_train, y_test)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'

    chart = None

    try:
        fig, ax = plt.subplots()
        visualizer = ClassificationReport(classifier,
                                          support=True,
                                          is_fitted=True,
                                          ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log Classification Report chart. Error: {}'.format(e))

    return chart
def classification_report(X, y, test_size=0.10, random_state=42):
    models = [
        GaussianNB(),
        KNeighborsClassifier(),
        SGDClassifier(),
        BaggingClassifier(KNeighborsClassifier()),
        DecisionTreeClassifier(),
        LinearSVC(penalty="l1", dual=False)
    ]

    classes = ["not_passed", "passed"]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state)

    Reg_len = len(models)

    i = 0
    while i < Reg_len:
        model = models[i]

        model.fit(X_train, y_train)

        visualizer = ClassificationReport(model, classes=classes)
        visualizer.fit(X_train, y_train)  # Fit the visualizer and the model
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data

        print("Coefficient of Determination: %0.6f" %
              model.score(X_test, y_test))
        g = visualizer.poof()

        print('')

        i = i + 1
def visualize_model(X, y, estimator, **kwargs):
    y = LabelEncoder().fit_transform(y)
    
    model = Pipeline([('One_Hot_Encoder', OneHotEncoder()), 
                      ('estimator', estimator)
                     ])
    
    visualizer = ClassificationReport(model, classes=['edible', 'poisonous'], cmap='YlOrRd', support='count')
    visualizer.fit(X, y)
    visualizer.score(X, y)
    visualizer.show()
Ejemplo n.º 8
0
    def store_experiment_data(self, X_test, y_test):
        class_report = ClassificationReport(self.model)
        score = class_report.score(X_test, y_test)
        class_report.poof(
            'metrics/classification_report.png', clear_figure=True)
        self.ex.add_artifact('metrics/classification_report.png')

        confustion_matrix = ConfusionMatrix(self.model)
        confustion_matrix.score(X_test, y_test)
        confustion_matrix.poof(
            'metrics/confusion_matrix.png', clear_figure=True)
        self.ex.add_artifact('metrics/confusion_matrix.png')

        cpd = ClassPredictionError(self.model)
        cpd.score(X_test, y_test)
        cpd.poof('metrics/class_prediction_error.png', clear_figure=True)
        self.ex.add_artifact('metrics/class_prediction_error.png')

        print('score=', score)
        self.ex.log_scalar('score', score)
def visual_model_selection(X, y, estimator):
    """
    Function to plot classification report
    :param X: test set
    :param y: test set target
    :param estimator: model to analyze performance
    :return: plot of the different metrics f1 score, recall, precision
    """
    
    visualizer = ClassificationReport(estimator, classes=['Low', 'Medium', 'High'], cmap='PRGn')
    visualizer.fit(X, y)  
    visualizer.score(X, y)
    visualizer.poof()
Ejemplo n.º 10
0
def make_gb_report(path="images/classification_report.png"):
    X_train, X_test, y_train, y_test = make_dataset()

    _, ax = plt.subplots()

    bayes = GaussianNB()
    viz = ClassificationReport(bayes,
                               ax=ax,
                               classes=['unoccupied', 'occupied'])

    viz.fit(X_train, y_train)
    viz.score(X_test, y_test)

    viz.poof(outpath=path)
Ejemplo n.º 11
0
def log_classification_report_chart(classifier, X_train, X_test, y_train, y_test, experiment=None):
    """Log classification report chart.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            exp = neptune.create_experiment()

            log_classification_report_chart(rfc, X_train, X_test, y_train, y_test, experiment=exp)
    """
    assert is_classifier(classifier), 'classifier should be sklearn classifier.'
    exp = _validate_experiment(experiment)

    try:
        fig, ax = plt.subplots()
        visualizer = ClassificationReport(classifier, support=True, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        exp.log_image('charts_sklearn', fig, image_name='Classification Report')
        plt.close(fig)
    except Exception as e:
        print('Did not log Classification Report chart. Error: {}'.format(e))
Ejemplo n.º 12
0
def yb_classification_report(note, tree_clf, X_test, y_test):
    print(note)

    visualizer = ClassificationReport(tree_clf)

    visualizer.score(X_test, y_test)
    visualizer.show()
Ejemplo n.º 13
0
def classreport():
    X, y = load_occupancy()
    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)
    oz = ClassificationReport(GaussianNB(), support=True, ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "classification_report")
Ejemplo n.º 14
0
def classification_report(model, classes, X_train, Y_train, X_test, Y_test):

    from yellowbrick.classifier import ClassificationReport

    # Instantiate the classification model and visualizer

    visualizer = ClassificationReport(model, classes=classes, support=True)

    visualizer.fit(X_train, Y_train)  # Fit the visualizer and the model
    visualizer.score(X_test, Y_test)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data
Ejemplo n.º 15
0
 def classification_report(self) -> None:
     """Show precision, recall and F1 score by class
     """
     visualizer = ClassificationReport(self.trained_model,
                                       cmap="YlGn",
                                       size=(600, 360))
     visualizer.fit(self.X_train, self.y_train)
     visualizer.score(self.X_test, self.y_test)
     save_dir = f"{self.plots_dir}/classification_report_{self.model_id}.png"
     visualizer.show(outpath=save_dir)
     if not LOCAL:
         upload_to_s3(save_dir,
                      f'plots/classification_report_{self.model_id}.png',
                      bucket=S3_BUCKET_NAME)
     plt.clf()
def make_gb_report(path="images/classification_report.png"):
    X_train, X_test, y_train, y_test = make_dataset()

    _, ax = plt.subplots()

    bayes = GaussianNB()
    viz = ClassificationReport(bayes, ax=ax, classes=['unoccupied', 'occupied'])

    viz.fit(X_train, y_train)
    viz.score(X_test, y_test)

    viz.poof(outpath=path)
Ejemplo n.º 17
0
def model_selection_and_performance():
    st.title('Model Selection and Performance')
    selected_sampling_type = st.sidebar.selectbox('Select data sampling type',
                                                  ['No sampling', 'SMOTEENN'])
    df_prep = load_data_prep()
    X, X_test, y_test = preprocess(df_prep)
    del X

    if selected_sampling_type == 'No sampling':
        selected_model = st.sidebar.selectbox('Select Model', [
            'Logistic Regression', 'Random Forest', 'SVC', 'XGB',
            'Naive bayes', 'All models comparison'
        ])
        if selected_model == 'All models comparison':
            st.info('ROC Curves comparison')
            roc_all = cv2.imread('images/base_models_comparison.jpg')
            st.image(roc_all, use_column_width=True)
            del roc_all
        else:
            model = load_base_models(selected_model)

        gc.collect()

    elif selected_sampling_type == 'SMOTEENN':
        selected_model = st.sidebar.selectbox('Select Model', [
            'Logistic Regression SM', 'Random Forest SM', 'SVC SM', 'XGB SM',
            'Naive bayes SM', 'All models comparison'
        ])
        if selected_model == 'All models comparison':
            st.info('ROC Curves comparison')
            roc_all = cv2.imread('images/sm_models_comparison.jpg')
            st.image(roc_all, use_column_width=True)
            del roc_all
        else:
            model = load_sm_models(selected_model)

        gc.collect()

    if selected_model != 'All models comparison':
        fig, ax = plt.subplots()
        visualizer = ClassificationReport(model,
                                          classes=['non-churn', 'churn'],
                                          support=True,
                                          ax=ax)
        visualizer.score(X_test, y_test)
        visualizer.show()
        st.pyplot(fig)
        st.info('Confusion Matrix')
        fig1, ax1 = plt.subplots()
        plot_confusion_matrix(model, X_test, y_test, ax=ax1)
        st.pyplot(fig1)
        st.info('ROC Curve')
        fig2, ax2 = plt.subplots()
        plot_roc_curve(model, X_test, y_test, ax=ax2)
        st.pyplot(fig2)
        del X_test, y_test, fig, ax, fig1, ax1, fig2, ax2, model
        gc.collect()
def yellowbrick_visualizations(model, classes, X_tr, y_tr, X_te, y_te):
    visualizer = ConfusionMatrix(model, classes=classes)
    visualizer.fit(X_tr, y_tr)
    visualizer.score(X_te, y_te)
    visualizer.show()

    visualizer = ClassificationReport(model, classes=classes, support=True)
    visualizer.fit(X_tr, y_tr)
    visualizer.score(X_te, y_te)
    visualizer.show()

    visualizer = ROCAUC(model, classes=classes)
    visualizer.fit(X_tr, y_tr)
    visualizer.score(X_te, y_te)
    visualizer.show()
Ejemplo n.º 19
0
def visual_model_selection(X, y, estimator, path):
    """
    Test various estimators.
    """
    model = Pipeline([('label_encoding', EncodeCategorical(X.keys())),
                      ('one_hot_encoder', OneHotEncoder()),
                      ('estimator', estimator)])

    _, ax = plt.subplots()

    # Instantiate the classification model and visualizer
    visualizer = ClassificationReport(model,
                                      ax=ax,
                                      classes=['edible', 'poisonous'])
    visualizer.fit(X, y)
    visualizer.score(X, y)
    visualizer.poof(outpath=path)
def visualize_model(X, y, estimator, **kwargs):
    """
    Test various estimators.
    """
    y = LabelEncoder().fit_transform(y)
    model = Pipeline([('one_hot_encoder', OneHotEncoder()),
                      ('estimator', estimator)])

    # Instantiate the classification model and visualizer
    visualizer = ClassificationReport(model,
                                      classes=['edible', 'poisonous'],
                                      cmap="YlGn",
                                      size=(600, 360),
                                      **kwargs)
    visualizer.fit(X, y)
    visualizer.score(X, y)
    visualizer.poof()
Ejemplo n.º 21
0
    def plot_precision_recall_f1(self, classes=['Won', 'Loss'], display=False):
        """ Plot Precision Recall F1

        # Arguments:
            - classes: A list of all labels
            - display: boolean value for showing plot or not; default is False
        """

        self.train()
        # Instantiate the classification model and visualizer
        visualizer = ClassificationReport(self.svc_model, classes=classes)
        visualizer.fit(
            self.data_train,
            self.label_train)  # Fit the training data to the visualizer
        visualizer.score(
            self.data_test,
            self.label_test)  # Evaluate the model on the test data
        visualizer.poof(outpath=self.cfg['plot_path'] +
                        "linear-svc-report.png")  # save the data
        if display:
            g = visualizer.poof()  # show the data
Ejemplo n.º 22
0
    def plot_classifier_metrics(self):

        fig, axes = plt.subplots(2, 2, figsize=(12, 8))

        visualgrid = [
            ConfusionMatrix(self.clf, ax=axes[0][0]),
            ClassificationReport(self.clf, ax=axes[0][1]),
            ROCAUC(self.clf, ax=axes[1][0]),
        ]
        fig.delaxes(axes[1, 1])
        for viz in visualgrid:
            viz.fit(self.X_train, self.y_train)
            viz.score(self.X_test, self.y_test)
            viz.finalize()
        plt.savefig('../docs/metrics_classifier.png')
        plt.show()
Ejemplo n.º 23
0
def classificationreport(clf, classes, X_train, y_train, X_test, y_test):
    #classes = ['increase','little change', 'decrease']
    img = io.BytesIO()

    #plt.switch_backend('Agg')

    #plt.style.use('ggplot')

    visualizer = ClassificationReport(clf, classes=classes, support=True)

    visualizer.fit(X_train, y_train)  # Fit the visualizer and the model
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.show(outpath=img)  # Finalize and show the figure
    plt.figure(figsize=(8, 8))

    img.seek(0)
    graph_url = base64.b64encode(img.getvalue()).decode()
    return 'data:image/png;base64,{}'.format(graph_url)
Ejemplo n.º 24
0
def visual_model_selection(X_train, X_test, y_train, y_test, estimator, show_plot=True):
    """
    Takes train and test data sets for both features and target plus an estimator and 
    returns a visual classification report.
    """ 
    from sklearn.pipeline import Pipeline 
    from yellowbrick.classifier import ClassificationReport
    #y_train = preprocessing.LabelEncoder().fit_transform(y_train.values.ravel())
    #y_test = preprocessing.LabelEncoder().fit_transform(y_test.values.ravel())
        
    model = Pipeline([('estimator', estimator)])

    # Instantiate the classification model and visualizer
    visualizer = ClassificationReport(model, classes=['on-time', 'delayed'])
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.poof()
    return visualizer.scores
Ejemplo n.º 25
0
def DTC(X_train, y_train, X_test, y_test):
    dtc = DecisionTreeClassifier(random_state=2)
    dtc.fit(X_train, y_train)
    print("DecisionTreeClassifier:train set")
    y_pred = dtc.predict(X_train)
    pred = dtc.predict_proba(X_test)
    print("DecisionTreeClassifier:Confusion Matrix: ",
          confusion_matrix(y_train, y_pred))
    print("DecisionTreeClassifier:Accuracy : ",
          accuracy_score(y_train, y_pred) * 100)
    print("DecisionTreeClassifier:Test set")
    y_pred = dtc.predict(X_test)
    print("DecisionTreeClassifier:Confusion Matrix: ",
          confusion_matrix(y_test, y_pred))
    print("DecisionTreeClassifier:Accuracy : ",
          accuracy_score(y_test, y_pred) * 100)
    #Confusion Matrix

    matrix = confusion_matrix(y_test, y_pred)
    class_names = [0, 1]
    fig, ax = plt.subplots()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names)
    plt.yticks(tick_marks, class_names)
    sns.heatmap(pd.DataFrame(matrix), annot=True, cmap="YlGnBu", fmt='g')
    ax.xaxis.set_label_position("top")
    plt.tight_layout()
    plt.title('Confusion matrix', y=1.1)
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')
    plt.show()
    #ROC_AUC curve
    probs = dtc.predict_proba(X_test)
    probs = probs[:, 1]
    auc = roc_auc_score(y_test, probs)
    print('AUC: %.2f' % auc)
    le = preprocessing.LabelEncoder()
    y_test1 = le.fit_transform(y_test)
    fpr, tpr, thresholds = roc_curve(y_test1, probs)
    plot_roc_curve(fpr, tpr)
    #Classification Report
    target_names = ['Yes', 'No']
    prediction = dtc.predict(X_test)
    print(classification_report(y_test, prediction, target_names=target_names))
    classes = ["Yes", "No"]
    visualizer = ClassificationReport(dtc, classes=classes, support=True)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    g = visualizer.poof()
Ejemplo n.º 26
0
    def RF_Model(X,Y,X1,Y1):
        global acc1
        print("___________________________Random Forest__________________________________________") 
        model1=RandomForestClassifier()
        model1.fit(X,Y)
        y_pred1 = model1.predict(X1)
        print("_____________Report___________________")
        acc1=cal_accuracy(Y1, y_pred1)
        # print("_____________user input ___________________")
        
        #confusion Matrix
        import matplotlib.pyplot as plt1
        matrix =confusion_matrix(Y1, y_pred1)
        class_names=[0,1] 
        fig, ax = plt.subplots()
        tick_marks = np.arange(len(class_names))
        plt1.xticks(tick_marks, class_names)
        plt1.yticks(tick_marks, class_names)
        sns.heatmap(pd.DataFrame(matrix), annot=True, cmap="YlGnBu" ,fmt='g')
        ax.xaxis.set_label_position("top")
        plt1.tight_layout()
        plt1.title('Confusion matrix', y=1.1)
        plt1.ylabel('Actual label')
        plt1.xlabel('Predicted label')
        fig.canvas.set_window_title('RF')
        plt.show()

        #ROC_AUC curve
        probs = model1.predict_proba(X1) 
        probs = probs[:, 1]  
        auc = roc_auc_score(Y1, probs)  
        print('AUC: %.2f' % auc)
        le = preprocessing.LabelEncoder()
        y_test1=le.fit_transform(Y1)
        fpr1, tpr1, thresholds = roc_curve(y_test1, probs)
        #fig.canvas.set_window_title('XGBoost')
        plot_roc_curve(fpr1, tpr1)


        #Classification Report
        target_names = ['Yes', 'No']
        prediction=model1.predict(X1)
        #print(classification_report(Y1, prediction, target_names=target_names))
        classes = ["Yes", "No"]
        visualizer1 = ClassificationReport(model1, classes=classes, support=True)
        visualizer1.fit(X, Y)  
        visualizer1.score(X1, Y1)
        #fig.canvas.set_window_title('XGBoost')  
        g = visualizer1.poof()
Ejemplo n.º 27
0
    def __init__(self,
                 X_train,
                 X_test,
                 y_train,
                 y_test,
                 labels,
                 model,
                 viz_selection,
                 upsampled=False):
        """
        Class for yellowbrick classifier visualizer

        Args:
            X_train: numpy ndarray of model features training data values
            X_test: numpy ndarray of model features test data values
            y_train: numpy ndarray of model target variable training data values
            y_test: numpy ndarray of model target variable test data values
            labels: list of class labels for binary classification
            model: sklearn estimator for classification
            viz_selection: string value used to reference yellowbrick classification visualizer
            upsampled: binary value to determine to which subdirectory output image should be saved

        """

        self.labels = labels
        self.model = model
        self.viz_selection = viz_selection
        self.upsampled = upsampled
        self.X_train, self.X_test, self.y_train, self.y_test = X_train, X_test, y_train, y_test

        if self.viz_selection == 'ClassificationReport':
            self.visualizer = ClassificationReport(self.model,
                                                   classes=self.labels,
                                                   support=True)
        elif self.viz_selection == 'ROCAUC':
            self.visualizer = ROCAUC(self.model,
                                     classes=self.labels,
                                     support=True)
        elif self.viz_selection == 'PrecisionRecallCurve':
            self.visualizer = PrecisionRecallCurve(self.model)
        elif self.viz_selection == 'ConfusionMatrix':
            self.visualizer = ConfusionMatrix(model, classes=self.labels)
        else:
            return print(
                "Error: viz_selection does not match accepted values. View Visualizer Class for accepted values."
            )
Ejemplo n.º 28
0
def classifier_report(classifier, X_test, y_test):
    classes = np.unique(y_test)
    cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    cm.fit(X_test, y_test)
    cm.score(X_test, y_test)
    filename = classifier.__class__.__name__ + '_confusion_matrix.png'
    cm.poof(outpath=filename,
            clear_figure=True,
            kwargs=dict(transparent=False, dpi=80, inches='tight'))
    ex.add_artifact(filename)
    visualizer = ClassificationReport(classifier,
                                      classes=classes,
                                      support=True)
    visualizer.fit(X_test, y_test)
    visualizer.score(X_test, y_test)
    visualizer.poof(outpath="classification_report.png",
                    clear_figure=True,
                    kwargs=dict(transparent=False, dpi=80, inches='tight'))
    ex.add_artifact('classification_report.png')
Ejemplo n.º 29
0
def visualize_model(X, y, estimator, path, **kwargs):
    """
    Test various estimators.
    """
    y = LabelEncoder().fit_transform(y)
    model = Pipeline([("one_hot_encoder", OneHotEncoder()),
                      ("estimator", estimator)])

    _, ax = plt.subplots()

    # Instantiate the classification model and visualizer
    visualizer = ClassificationReport(model,
                                      classes=["edible", "poisonous"],
                                      cmap="YlGn",
                                      size=(600, 360),
                                      ax=ax,
                                      **kwargs)
    visualizer.fit(X, y)
    visualizer.score(X, y)
    visualizer.poof(outpath=path)
Ejemplo n.º 30
0
def ClassReport_Graph(Classif, Data_train, Target_train, Data_test, Target_test, Class, ModelName='Classifier', Accur=False, Predict=None):
    """ Function imports method to report and analyse predictions from different scikit-learn model implementations
    INPUT: training examples' features, training examples' outputs, testing examples' features, testing examples' outputs
            and list with the names of the classes """
    try:
        from yellowbrick.classifier import ClassificationReport
        
        if(Accur==True):
            print((ModelName+" accuracy: %0.4f")%(metrics.accuracy_score(Target_test, Predict, normalize=True)))
        
        view_graph = ClassificationReport(Classif, classes=Class, size=(900, 720)) #Object for classification model and visualization
        view_graph.fit(Data_train, Target_train) # Fit the training data to the visualizer
        view_graph.score(Data_test, Target_test) # Evaluate the model on the test data
        graph = view_graph.poof() # Draw/show/poof the data
        return graph
    except:
        print("CLASSIFICATION-REPORT_ERROR\n")
Ejemplo n.º 31
0
def visual_model_selection(X, y, estimator, path):
    """
    Test various estimators.
    """
    model = Pipeline([
         ('label_encoding', EncodeCategorical(X.keys())),
         ('one_hot_encoder', OneHotEncoder()),
         ('estimator', estimator)
    ])

    _, ax = plt.subplots()

    # Instantiate the classification model and visualizer
    visualizer = ClassificationReport(model, ax=ax, classes=['edible', 'poisonous'])
    visualizer.fit(X, y)
    visualizer.score(X, y)
    visualizer.poof(outpath=path)
def visualize_model(X, y, estimators,pred=False,disc=False, conf=False, bal=False,**kwargs):
    """
    Visualize models using the yellowbrick plotting library.
    """

    # Instantiate the classification model and visualizer
    visualizer = ClassificationReport(
        estimators, classes=['Reach, 1 Reach, or L/R Reach', 'Null, Multiple Reaches, Or Multiple Arms'],
        cmap="YlGn", size=(600, 360), **kwargs
    )
    visualizer.fit(X, y)
    visualizer.score(X, y)
    visualizer.show()
    if pred:
        class_prediction_errors(X, y, estimators, **kwargs)
    if disc:
        discrimination_thresholding(X, y, estimators, **kwargs)
    if conf:
        confusion_matrix(X, y, estimators, **kwargs)
    if bal:
        plot_class_balance(y, **kwargs)
Ejemplo n.º 33
0
def classification(fname="classification.png"):

    # Create side-by-side axes grid
    _, axes = plt.subplots(ncols=2, figsize=(18,6))

    # Add ClassificationReport to the reft
    data = load_spam(split=True)
    oz = ClassificationReport(MultinomialNB(), classes=["ham", "spam"], ax=axes[0])
    oz.fit(data.X.train, data.y.train)
    oz.score(data.X.test, data.y.test)
    oz.finalize()

    # Add DiscriminationThreshold to the right
    data = load_spam(split=False)
    oz = DiscriminationThreshold(LogisticRegression(), ax=axes[1])
    oz.fit(data.X, data.y)
    oz.finalize()

    # Save figure
    path = os.path.join(FIGURES, fname)
    plt.tight_layout()
    plt.savefig(path)
Ejemplo n.º 34
0
        files=files,
        data=data,
        target=target,
    )


# Load the data and create document vectors
corpus = load_corpus('hobbies')
tfidf  = TfidfVectorizer()

docs   = tfidf.fit_transform(corpus.data)
labels = corpus.target

X_train, X_test, y_train, y_test = train_test_split(docs.toarray(), labels, test_size=0.2, random_state=42)

visualizer = ClassificationReport(GaussianNB(), classes=corpus.categories)
visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.poof()

visualizer = ClassificationReport(SGDClassifier(), classes=corpus.categories)
visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.poof()

visualizer = ConfusionMatrix(LogisticRegression(), classes=corpus.categories)
visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.poof()

visualizer = ConfusionMatrix(MultinomialNB(), classes=corpus.categories)