def pred_error(X, y, test_size=0.10, random_state=42):
    models = [
        GaussianNB(),
        KNeighborsClassifier(),
        SGDClassifier(),
        BaggingClassifier(KNeighborsClassifier()),
        DecisionTreeClassifier(),
        LinearSVC(penalty="l1", dual=False)
    ]

    classes = ["not_passed", "passed"]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state)

    Reg_len = len(models)

    i = 0
    while i < Reg_len:
        model = models[i]

        model.fit(X_train, y_train)

        visualizer = ClassPredictionError(model, classes=classes)
        visualizer.fit(X_train, y_train)  # Fit the visualizer and the model
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data

        print("Coefficient of Determination: %0.6f" %
              model.score(X_test, y_test))
        g = visualizer.poof()

        print('')

        i = i + 1
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir):
    model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c)
    rfe = RFE(model, n_features_to_select=n)

    ## learning curve
    plt.clf()
    viz_LC = LearningCurve(
        rfe, scoring='f1_weighted', n_jobs=4
    )
    viz_LC.fit(X, y)
    viz_LC.show(outpath=outdir + '/LC.png')

    ## classification report
    plt.clf()
    viz_CR = ClassificationReport(rfe, classes=class_names, support=True)
    viz_CR.fit(X, y)
    viz_CR.score(X_test, y_test)
    viz_CR.show(outpath=outdir + '/CR.png')

    ## confusion matrix
    plt.clf()
    viz_CM = ConfusionMatrix(rfe, classes=class_names)
    viz_CM.fit(X, y)
    viz_CM.score(X_test, y_test)
    viz_CM.show(outpath=outdir + '/CM.png')

    ## precision recall curve
    plt.clf()
    viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True,
                                   fill_area=False, micro=False, classes=class_names)
    viz_PRC.fit(X, y)
    viz_PRC.score(X_test, y_test)
    viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720))

    ## class prediction error
    plt.clf()
    viz_CPE = ClassPredictionError(
        rfe, classes=class_names
    )
    viz_CPE.fit(X, y)
    viz_CPE.score(X_test, y_test)
    viz_CPE.show(outpath=outdir + '/CPE.png')

    ## ROCAUC
    plt.clf()
    viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720))
    viz_RA.fit(X, y)
    viz_RA.score(X, y)
    viz_RA.show(outpath=outdir + '/RA.png')

    fit = rfe.fit(X,y)
    y_predict = fit.predict(X_test)
    f1 = f1_score(y_test, y_predict, average='weighted')

    features_retained_RFE = X.columns[rfe.get_support()].values
    feature_df =pd.DataFrame(features_retained_RFE.tolist())
    feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False)

    return f1
def log_class_prediction_error_chart(classifier,
                                     X_train,
                                     X_test,
                                     y_train,
                                     y_test,
                                     experiment=None):
    """Log class prediction error chart.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            exp = neptune.create_experiment()

            log_class_prediction_error_chart(rfc, X_train, X_test, y_train, y_test, experiment=exp)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'
    exp = _validate_experiment(experiment)

    try:
        fig, ax = plt.subplots()
        visualizer = ClassPredictionError(classifier, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        exp.log_image('charts_sklearn',
                      fig,
                      image_name='Class Prediction Error')
        plt.close(fig)
    except Exception as e:
        print('Did not log Class Prediction Error chart. Error {}'.format(e))
def create_class_prediction_error_chart(classifier, X_train, X_test, y_train,
                                        y_test):
    """Create class prediction error chart.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['visuals/class_prediction_error'] = \
                npt_utils.create_class_prediction_error_chart(rfc, X_train, X_test, y_train, y_test)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'

    chart = None

    try:
        fig, ax = plt.subplots()
        visualizer = ClassPredictionError(classifier, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log Class Prediction Error chart. Error {}'.format(e))

    return chart
Exemple #5
0
def class_predict_error(model, classes, X_train, Y_train, X_test, Y_test):
    from yellowbrick.classifier import ClassPredictionError

    # Instantiate the classification model and visualizer
    visualizer = ClassPredictionError(RandomForestClassifier(),
                                      classes=classes)

    # Fit the training data to the visualizer
    visualizer.fit(X_train, y_train)

    # Evaluate the model on the test data
    visualizer.score(X_test, y_test)

    # Draw visualization
    g = visualizer.poof()
def classprede():
    X, y = make_classification(n_samples=1000,
                               n_classes=5,
                               n_informative=3,
                               n_clusters_per_class=1)

    classes = ["apple", "kiwi", "pear", "banana", "orange"]

    # Perform 80/20 training/test split
    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.20)
    oz = ClassPredictionError(RandomForestClassifier(),
                              classes=classes,
                              ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "class_prediction_error")
def make_cb_pred_error(dataset="fruit", path=None, clf=None):
    clf = clf or RandomForestClassifier()

    loader = {
        'fruit': make_fruit_dataset,
        'credit': load_credit_dataset,
    }[dataset]

    (X_train, X_test, y_train, y_test), classes = loader()

    _, ax = plt.subplots()
    viz =  ClassPredictionError(clf, ax=ax, classes=classes)
    viz.fit(X_train, y_train)
    viz.score(X_test, y_test)

    return viz.poof(outpath=path)
 def class_prediction_error(self) -> None:
     """Plot the support (number of training samples) for each class in the fitted classification
      model as a stacked bar chart. Each bar is segmented to show the proportion of predictions
      (including false negatives and false positives, like a Confusion Matrix) for each class.
      You can use a ClassPredictionError to visualize which classes your classifier is having
      a particularly difficult time with, and more importantly, what incorrect answers it is
      giving on a per-class basis.
      """
     visualizer = ClassPredictionError(self.trained_model)
     visualizer.fit(self.X_train, self.y_train)
     visualizer.score(self.X_test, self.y_test)
     save_dir = f"{self.plots_dir}/class_prediction_error_{self.model_id}.png"
     visualizer.show(outpath=save_dir)
     if not LOCAL:
         upload_to_s3(save_dir,
                      f'plots/class_prediction_error_{self.model_id}.png',
                      bucket=S3_BUCKET_NAME)
     plt.clf()
def draw_plots():
    classifier = MultinomialNB(alpha=0.01)

    for technique in ["base", "SMOTE", "ADASYN", "text-aug"]:
        X_train, X_test, y_train, y_test = get_baseline_split(representation="bow")
        if technique == "base":
            X_plot_train, X_plot_test, y_plot_train, y_plot_test = X_train, X_test, y_train, y_test
        elif technique == "SMOTE":
            X_plot_train, y_plot_train = smote.run(X_train, y_train)
            X_plot_test, y_plot_test = X_test, y_test
        elif technique == "ADASYN":
            X_plot_train, y_plot_train = adasyn.run(X_train, y_train)
            X_plot_test, y_plot_test = X_test, y_test
        elif technique == "text-aug":
            X_plot_train, X_plot_test, y_plot_train, y_plot_test = text_augmentation.run(
                books_df=get_fully_processed_books_df(),
                representation="bow")
        else:
            raise Exception()

        # ROC micro average
        viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=False)
        viz_roc.fit(X_plot_train, y_plot_train)  # Fit the training data to the viz_roc
        viz_roc.score(X_plot_test, y_plot_test)  # Evaluate the model on the test data
        viz_roc.show()  # Finalize and show the figure

        # ROC - Per Class
        viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=True)
        viz_roc.fit(X_plot_train, y_plot_train)  # Fit the training data to the viz_roc
        viz_roc.score(X_plot_test, y_plot_test)  # Evaluate the model on the test data
        viz_roc.show()  # Finalize and show the figure

        # Class Prediction Error
        viz_pred_err = ClassPredictionError(classifier, classes=get_selected_genres())
        viz_pred_err.fit(X_plot_train, y_plot_train)
        viz_pred_err.score(X_plot_test, y_plot_test)
        viz_pred_err.show()

        # The ConfusionMatrix
        cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8])
        cm.fit(X_plot_train, y_plot_train)
        cm.score(X_plot_test, y_plot_test)
        cm.show()
def train(experiment_id, run_name, xtrain, xtest, ytrain, ytest):
    
    np.random.seed(100)

    
    with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run:
        
        tfid_vect =TfidfVectorizer(analyzer='word', tokenizer=nltk.tokenize.word_tokenize, stop_words='english', min_df=5)
        
        
        my_pipeline = Pipeline(steps=[('vectorizer', tfid_vect),
                                       ('lr', LogisticRegression(random_state=42))])
        
           
        my_pipeline.fit(xtrain, ytrain)
        predictions = my_pipeline.predict(xtest)
                                      
        joblib.dump(my_pipeline, 'pipeline_lr.pkl')
        
        accuracy = accuracy_score(ytest, predictions)
        
        f1score = f1_score(ytest, predictions)
        
        auc_score = roc_auc_score(ytest, predictions)
        
        class_report = classification_report(ytest, predictions)
        
        print(f'Accuracy : {round(accuracy, 2)}')
        print(f'f1_score : {round(f1score, 2)}')
        print(f'auc_score : {round(auc_score, 2)}')
        print(f'class_report : \n {class_report}')
        
        mlflow.log_metric('Accuracy', round(accuracy, 2))
        mlflow.log_metric('f1_score', round(f1score, 2))
        mlflow.log_metric('auc_score', round(auc_score, 2))
        
        fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4)
        
        visualizer = ClassificationReport(my_pipeline, ax=ax1, classes=[0,1])
        visualizer.fit(xtrain, ytrain)
        visualizer.score(xtest, ytest)
        a=visualizer.poof(outpath="image/classification_report.png")
        print(' ')
        
        mlflow.log_artifact("image/classification_report.png")
        
        # The ConfusionMatrix visualizer taxes a model
        cm = ConfusionMatrix(my_pipeline, ax=ax2, classes=[0,1])
        cm.fit(xtrain, ytrain)
        cm.score(xtest, ytest) 
        b=cm.poof(outpath="image/confusionmatrix.png")
        
        mlflow.log_artifact("image/confusionmatrix.png")
        print(' ')
        
        vis = ROCAUC(my_pipeline, ax=ax3, classes=[0,1])
        vis.fit(xtrain, ytrain)  # Fit the training data to the visualizer
        vis.score(xtest, ytest)  # Evaluate the model on the test data
        c = vis.poof(outpath="image/rocauc.png")             # Draw/show/poof the data
        print(' ')
        mlflow.log_artifact("image/rocauc.png")
        
        visual = ClassPredictionError(my_pipeline, ax=ax4, classes=[0,1])
        visual.fit(xtrain, ytrain)
        visual.score(xtest, ytest)
        g = visual.poof(outpath="image/ClassificationError.png")
        print(' ')
        mlflow.log_artifact("image/ClassificationError.png")
        
        
        return run.info.run_uuid
Exemple #11
0
def get_plots():
    all_plots = []
    # FEATURE Visualization

    # Instantiate the visualizer
    plt.figure(figsize=(3.5, 3.5))
    viz = Manifold(manifold="tsne")
    # Fit the data to the visualizer
    viz.fit_transform(X_train, y_train)
    # save to html
    fig = plt.gcf()
    some_htmL = mpld3.fig_to_html(fig)
    all_plots.append("<h4 align='center'>Manifold Visualization</h4>" +
                     some_htmL)
    # clear plot
    plt.clf()

    if ML_ALG_nr == 1:
        # classification

        # Check if we can get the classes
        classes = None
        try:
            classes = list(Enc.inverse_transform(model_def.classes_))
        except ValueError as e:
            app.logger.info(e)

        if classes is not None:
            # Instantiate the classification model and visualizer
            visualizer = ClassPredictionError(DecisionTreeClassifier(),
                                              classes=classes)
            # Fit the training data to the visualizer
            visualizer.fit(X_train, y_train)
            # Evaluate the model on the test data
            visualizer.score(X_test, y_test)
            # save to html
            fig = plt.gcf()
            some_htmL = mpld3.fig_to_html(fig)
            all_plots.append("<h4 align='center'>Class Prediction Error</h4>" +
                             some_htmL)
            # clear plot
            plt.clf()
            # The ConfusionMatrix visualizer taxes a model
            cm = ConfusionMatrix(model_def, classes=classes)
            cm = ConfusionMatrix(model_def, classes=classes)
            # Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model
            cm.fit(X_train, y_train)
            # To create the ConfusionMatrix, we need some test data. Score runs predict() on the data
            # and then creates the confusion_matrix from scikit-learn.
            cm.score(X_test, y_test)
            # save to html
            fig = plt.gcf()
            some_htmL = mpld3.fig_to_html(fig)
            all_plots.append("<h4 align='center'>Confusion Matrix</h4>" +
                             some_htmL)
            # clear plot
            plt.clf()

        return all_plots

    elif ML_ALG_nr == 0:
        # regression

        # Instantiate the linear model and visualizer
        visualizer = PredictionError(model_def, identity=True)
        visualizer.fit(X_train,
                       y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data
        # save to html
        fig = plt.gcf()
        some_htmL = mpld3.fig_to_html(fig)
        all_plots.append("<h4 align='center'>Prediction Error Plot</h4>" +
                         some_htmL)
        # clear plot
        plt.clf()

        # Instantiate the model and visualizer
        visualizer = ResidualsPlot(model_def)
        visualizer.fit(X_train,
                       y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)
        # save to html
        fig = plt.gcf()
        some_htmL = mpld3.fig_to_html(fig)
        all_plots.append("<h4 align='center'>Residuals Plot</h4>" + some_htmL)
        # clear plot
        plt.clf()

        return all_plots
def class_prediction_errors(xx,yy,estimatorss,**kwargs):
    vz2 = ClassPredictionError(estimatorss, classes=['Reach, 1 Reach, or L/R Reach', 'Null, Multiple Reaches, Or Multiple Arms'],
        cmap="YlGn", size=(600, 360), **kwargs)
    vz2.fit(xx, yy)
    vz2.score(xx, yy)
    vz2.show()
Exemple #13
0
print(confusion_matrix(y_test, y_pred))


# In[34]:


from yellowbrick.classifier import ClassPredictionError


# In[35]:


classes = ['Exited', 'Not Exited']
clf = RandomForestClassifier(n_estimators = 200, random_state=200)
visualizer = ClassPredictionError(clf)
visualizer.fit(X_train, y_train)
visualizer.score(X_test,y_test)
visualizer.show()


# In[36]:


svclassifier = SVC(kernel='rbf')
visualizer = ClassPredictionError(svclassifier)
visualizer.fit(X_train, y_train)
visualizer.score(X_test,y_test)
visualizer.show()


# In[10]:
Exemple #14
0
roc = ROCAUC(rf, classes=cancer.target_names)
roc.fit(X_train, y_train)
roc.score(X_test, y_test)
roc.poof()

### Confusion Matrix

from yellowbrick.classifier import ConfusionMatrix

classes = cancer.target_names

conf_matrix = ConfusionMatrix(rf,
                              classes=classes,
                              label_encoder={
                                  0: 'benign',
                                  1: 'malignant'
                              })
conf_matrix.fit(X_train, y_train)
conf_matrix.score(X_test, y_test)
conf_matrix.poof()

### Class Prediction Error

from yellowbrick.classifier import ClassPredictionError

visualizer = ClassPredictionError(rf, classes=classes)

visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()
Exemple #15
0
# plot no skill
plt.plot([0, 1], [0, 1], linestyle='--')
# plot the roc curve for the model
plt.plot(fpr, tpr, marker='.')
# show the plot
plt.show()

from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

logit_roc_auc = roc_auc_score(y_test, model.predict(X_test))
fpr, tpr, thresholds = roc_curve(y_test, probs)
plt.figure()
plt.plot(fpr, tpr, label='Logistic Regression (area = %0.2f)' % logit_roc_auc)
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.savefig('Visually/Log_ROC')
plt.show()

from sklearn.ensemble import RandomForestClassifier
from yellowbrick.classifier import ClassPredictionError

visualizer = ClassPredictionError(model=LogisticRegression())
visualizer.fit(X=X_train, y=y_train)
visualizer.score(X=X_test, y=y_test)
visualizer.poof()
print('moyenne score cross validation : {:.2f}'.format(result3.mean()))

cm = ConfusionMatrix(foret, classes=[0, 1, 2, 3, 4, 6], percent=True)
cm.fit(x_train, y_train)
cm.score(x_test, y_test)
cm.poof()

size = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]

lc = LearningCurve(RandomForestClassifier(), train_sizes=size, score='r2')
lc.fit(x_train, y_train)
lc.poof()

viz = ClassPredictionError(RandomForestClassifier(),
                           classes=["0", "1", "2", "3", "4", "5", "6"])
viz.fit(x_train, y_train)
viz.score(x_test, y_test)
viz.poof

fig = plt.figure()
ax = fig.add_subplot()

feat = FeatureImportances(RandomForestClassifier(), ax=ax)
feat.fit(x_train, y_train)
feat.poof()
'''--------------------- Réseau de neurones --------------------- '''

neurone = MLPClassifier()
neurone.fit(x_train, y_train)

print(neurone.score(x_test, y_test))
# #### load a model in...if starting this notebook from scratch just load pre trained models to visualise

# In[64]:

#insert the trained classifier from above in here
fitted_classifier_for_visualization = XG_clf_finetuned

# In[65]:

# seems to be predicting non loyal pretty well, however loyal is kind of hit or miss
from yellowbrick.classifier import ClassPredictionError

visualizer_entropy = ClassPredictionError(fitted_classifier_for_visualization,
                                          classes=class_names)

visualizer_entropy.fit(X_train, y_train)
visualizer_entropy.score(X_test, y_test)
g = visualizer_entropy.poof()

# #### To get the visualization of ROC and AUC curves plug in the CLF object from Section 2.3 to visualize these curves for the specific model that was trained

# In[66]:

from yellowbrick.classifier import ROCAUC

visualizer_entropy = ROCAUC(fitted_classifier_for_visualization,
                            classes=class_names)

visualizer_entropy.fit(X_train,
                       y_train)  # Fit the training data to the visualizer
visualizer_entropy.score(X_test, y_test)  # Evaluate the model on the test data
Exemple #18
0
 def draw_prediction_error(self):
     visualizer = ClassPredictionError(self.model, classes=self.le.classes_)
     visualizer.fit(self.training_data, self.training_labels)
     visualizer.score(self.test_data, self.test_labels)
     visualizer.poof()