コード例 #1
0
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir):
    model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c)
    rfe = RFE(model, n_features_to_select=n)

    ## learning curve
    plt.clf()
    viz_LC = LearningCurve(
        rfe, scoring='f1_weighted', n_jobs=4
    )
    viz_LC.fit(X, y)
    viz_LC.show(outpath=outdir + '/LC.png')

    ## classification report
    plt.clf()
    viz_CR = ClassificationReport(rfe, classes=class_names, support=True)
    viz_CR.fit(X, y)
    viz_CR.score(X_test, y_test)
    viz_CR.show(outpath=outdir + '/CR.png')

    ## confusion matrix
    plt.clf()
    viz_CM = ConfusionMatrix(rfe, classes=class_names)
    viz_CM.fit(X, y)
    viz_CM.score(X_test, y_test)
    viz_CM.show(outpath=outdir + '/CM.png')

    ## precision recall curve
    plt.clf()
    viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True,
                                   fill_area=False, micro=False, classes=class_names)
    viz_PRC.fit(X, y)
    viz_PRC.score(X_test, y_test)
    viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720))

    ## class prediction error
    plt.clf()
    viz_CPE = ClassPredictionError(
        rfe, classes=class_names
    )
    viz_CPE.fit(X, y)
    viz_CPE.score(X_test, y_test)
    viz_CPE.show(outpath=outdir + '/CPE.png')

    ## ROCAUC
    plt.clf()
    viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720))
    viz_RA.fit(X, y)
    viz_RA.score(X, y)
    viz_RA.show(outpath=outdir + '/RA.png')

    fit = rfe.fit(X,y)
    y_predict = fit.predict(X_test)
    f1 = f1_score(y_test, y_predict, average='weighted')

    features_retained_RFE = X.columns[rfe.get_support()].values
    feature_df =pd.DataFrame(features_retained_RFE.tolist())
    feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False)

    return f1
コード例 #2
0
def log_class_prediction_error_chart(classifier,
                                     X_train,
                                     X_test,
                                     y_train,
                                     y_test,
                                     experiment=None):
    """Log class prediction error chart.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            exp = neptune.create_experiment()

            log_class_prediction_error_chart(rfc, X_train, X_test, y_train, y_test, experiment=exp)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'
    exp = _validate_experiment(experiment)

    try:
        fig, ax = plt.subplots()
        visualizer = ClassPredictionError(classifier, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        exp.log_image('charts_sklearn',
                      fig,
                      image_name='Class Prediction Error')
        plt.close(fig)
    except Exception as e:
        print('Did not log Class Prediction Error chart. Error {}'.format(e))
コード例 #3
0
def class_prediction_error(ax=None):
    data = load_game(return_dataset=True)
    X, y = data.to_numpy()

    X = OneHotEncoder().fit_transform(X).toarray()

    viz = ClassPredictionError(GaussianNB(), ax=ax)
    return tts_plot(viz, X, y)
def pred_error(X, y, test_size=0.10, random_state=42):
    models = [
        GaussianNB(),
        KNeighborsClassifier(),
        SGDClassifier(),
        BaggingClassifier(KNeighborsClassifier()),
        DecisionTreeClassifier(),
        LinearSVC(penalty="l1", dual=False)
    ]

    classes = ["not_passed", "passed"]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state)

    Reg_len = len(models)

    i = 0
    while i < Reg_len:
        model = models[i]

        model.fit(X_train, y_train)

        visualizer = ClassPredictionError(model, classes=classes)
        visualizer.fit(X_train, y_train)  # Fit the visualizer and the model
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data

        print("Coefficient of Determination: %0.6f" %
              model.score(X_test, y_test))
        g = visualizer.poof()

        print('')

        i = i + 1
コード例 #5
0
def create_class_prediction_error_chart(classifier, X_train, X_test, y_train,
                                        y_test):
    """Create class prediction error chart.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['visuals/class_prediction_error'] = \
                npt_utils.create_class_prediction_error_chart(rfc, X_train, X_test, y_train, y_test)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'

    chart = None

    try:
        fig, ax = plt.subplots()
        visualizer = ClassPredictionError(classifier, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log Class Prediction Error chart. Error {}'.format(e))

    return chart
コード例 #6
0
ファイル: imputer.py プロジェクト: ortsed/demo_imputer
def eval_models(df,
                race="W",
                models=["gnb", "rf", "xgb"],
                census=False,
                report=False,
                roc=False,
                pr=False,
                cpe=False):
    """ Run evaluation on a set of models and a single race class """

    df = prep_data(df)
    tes = joblib.load(DIR + "/data/models/transformers_binary.joblib")

    for col in ["first_name", "last_name", "middle_name"]:
        te = tes[race][col]
        df[col] = te.transform(df[col])
        df[col] = df[col].fillna(0)

    tmpa = np.where(df.race_code == race, True, False)
    df = df.fillna(0)

    for modelv in models:

        models = joblib.load(DIR + "/data/models/models_binary_%s%s.joblib" %
                             (modelv, model_string))
        model = models[race]

        model.target_type_ = "binary"

        if report:
            visualizer = ClassificationReport(model,
                                              classes=model.classes_,
                                              support=True)
            visualizer.score(df[MODEL_COLS], tmpa)
            visualizer.show()

        if roc:
            visualizer = ROCAUC(model, classes=["W", "not-W"])
            visualizer.score(df[MODEL_COLS], tmpa)
            visualizer.show()
        if pr:
            viz = PrecisionRecallCurve(model,
                                       is_fitted=True,
                                       classes=["W", "not-W"])
            viz.score(df[MODEL_COLS], tmpa)
            viz.show()

        if cpe:
            viz = ClassPredictionError(model)
            viz.score(df[MODEL_COLS], tmpa)
            viz.show()
コード例 #7
0
 def class_prediction_error(self) -> None:
     """Plot the support (number of training samples) for each class in the fitted classification
      model as a stacked bar chart. Each bar is segmented to show the proportion of predictions
      (including false negatives and false positives, like a Confusion Matrix) for each class.
      You can use a ClassPredictionError to visualize which classes your classifier is having
      a particularly difficult time with, and more importantly, what incorrect answers it is
      giving on a per-class basis.
      """
     visualizer = ClassPredictionError(self.trained_model)
     visualizer.fit(self.X_train, self.y_train)
     visualizer.score(self.X_test, self.y_test)
     save_dir = f"{self.plots_dir}/class_prediction_error_{self.model_id}.png"
     visualizer.show(outpath=save_dir)
     if not LOCAL:
         upload_to_s3(save_dir,
                      f'plots/class_prediction_error_{self.model_id}.png',
                      bucket=S3_BUCKET_NAME)
     plt.clf()
コード例 #8
0
def class_predict_error(model, classes, X_train, Y_train, X_test, Y_test):
    from yellowbrick.classifier import ClassPredictionError

    # Instantiate the classification model and visualizer
    visualizer = ClassPredictionError(RandomForestClassifier(),
                                      classes=classes)

    # Fit the training data to the visualizer
    visualizer.fit(X_train, y_train)

    # Evaluate the model on the test data
    visualizer.score(X_test, y_test)

    # Draw visualization
    g = visualizer.poof()
コード例 #9
0
def draw_plots():
    classifier = MultinomialNB(alpha=0.01)

    for technique in ["base", "SMOTE", "ADASYN", "text-aug"]:
        X_train, X_test, y_train, y_test = get_baseline_split(representation="bow")
        if technique == "base":
            X_plot_train, X_plot_test, y_plot_train, y_plot_test = X_train, X_test, y_train, y_test
        elif technique == "SMOTE":
            X_plot_train, y_plot_train = smote.run(X_train, y_train)
            X_plot_test, y_plot_test = X_test, y_test
        elif technique == "ADASYN":
            X_plot_train, y_plot_train = adasyn.run(X_train, y_train)
            X_plot_test, y_plot_test = X_test, y_test
        elif technique == "text-aug":
            X_plot_train, X_plot_test, y_plot_train, y_plot_test = text_augmentation.run(
                books_df=get_fully_processed_books_df(),
                representation="bow")
        else:
            raise Exception()

        # ROC micro average
        viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=False)
        viz_roc.fit(X_plot_train, y_plot_train)  # Fit the training data to the viz_roc
        viz_roc.score(X_plot_test, y_plot_test)  # Evaluate the model on the test data
        viz_roc.show()  # Finalize and show the figure

        # ROC - Per Class
        viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=True)
        viz_roc.fit(X_plot_train, y_plot_train)  # Fit the training data to the viz_roc
        viz_roc.score(X_plot_test, y_plot_test)  # Evaluate the model on the test data
        viz_roc.show()  # Finalize and show the figure

        # Class Prediction Error
        viz_pred_err = ClassPredictionError(classifier, classes=get_selected_genres())
        viz_pred_err.fit(X_plot_train, y_plot_train)
        viz_pred_err.score(X_plot_test, y_plot_test)
        viz_pred_err.show()

        # The ConfusionMatrix
        cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8])
        cm.fit(X_plot_train, y_plot_train)
        cm.score(X_plot_test, y_plot_test)
        cm.show()
コード例 #10
0
def make_cb_pred_error(dataset="fruit", path=None, clf=None):
    clf = clf or RandomForestClassifier()

    loader = {
        'fruit': make_fruit_dataset,
        'credit': load_credit_dataset,
    }[dataset]

    (X_train, X_test, y_train, y_test), classes = loader()

    _, ax = plt.subplots()
    viz =  ClassPredictionError(clf, ax=ax, classes=classes)
    viz.fit(X_train, y_train)
    viz.score(X_test, y_test)

    return viz.poof(outpath=path)
コード例 #11
0
def classprede():
    X, y = make_classification(n_samples=1000,
                               n_classes=5,
                               n_informative=3,
                               n_clusters_per_class=1)

    classes = ["apple", "kiwi", "pear", "banana", "orange"]

    # Perform 80/20 training/test split
    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.20)
    oz = ClassPredictionError(RandomForestClassifier(),
                              classes=classes,
                              ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "class_prediction_error")
コード例 #12
0
    def store_experiment_data(self, X_test, y_test):
        class_report = ClassificationReport(self.model)
        score = class_report.score(X_test, y_test)
        class_report.poof(
            'metrics/classification_report.png', clear_figure=True)
        self.ex.add_artifact('metrics/classification_report.png')

        confustion_matrix = ConfusionMatrix(self.model)
        confustion_matrix.score(X_test, y_test)
        confustion_matrix.poof(
            'metrics/confusion_matrix.png', clear_figure=True)
        self.ex.add_artifact('metrics/confusion_matrix.png')

        cpd = ClassPredictionError(self.model)
        cpd.score(X_test, y_test)
        cpd.poof('metrics/class_prediction_error.png', clear_figure=True)
        self.ex.add_artifact('metrics/class_prediction_error.png')

        print('score=', score)
        self.ex.log_scalar('score', score)
コード例 #13
0
def class_prediction_errors(xx,yy,estimatorss,**kwargs):
    vz2 = ClassPredictionError(estimatorss, classes=['Reach, 1 Reach, or L/R Reach', 'Null, Multiple Reaches, Or Multiple Arms'],
        cmap="YlGn", size=(600, 360), **kwargs)
    vz2.fit(xx, yy)
    vz2.score(xx, yy)
    vz2.show()
コード例 #14
0
print("Confusion Matrix: ")
print(confusion_matrix(y_test, y_pred))


# In[34]:


from yellowbrick.classifier import ClassPredictionError


# In[35]:


classes = ['Exited', 'Not Exited']
clf = RandomForestClassifier(n_estimators = 200, random_state=200)
visualizer = ClassPredictionError(clf)
visualizer.fit(X_train, y_train)
visualizer.score(X_test,y_test)
visualizer.show()


# In[36]:


svclassifier = SVC(kernel='rbf')
visualizer = ClassPredictionError(svclassifier)
visualizer.fit(X_train, y_train)
visualizer.score(X_test,y_test)
visualizer.show()

コード例 #15
0
ファイル: app.py プロジェクト: L0xGames/MLV
def get_plots():
    all_plots = []
    # FEATURE Visualization

    # Instantiate the visualizer
    plt.figure(figsize=(3.5, 3.5))
    viz = Manifold(manifold="tsne")
    # Fit the data to the visualizer
    viz.fit_transform(X_train, y_train)
    # save to html
    fig = plt.gcf()
    some_htmL = mpld3.fig_to_html(fig)
    all_plots.append("<h4 align='center'>Manifold Visualization</h4>" +
                     some_htmL)
    # clear plot
    plt.clf()

    if ML_ALG_nr == 1:
        # classification

        # Check if we can get the classes
        classes = None
        try:
            classes = list(Enc.inverse_transform(model_def.classes_))
        except ValueError as e:
            app.logger.info(e)

        if classes is not None:
            # Instantiate the classification model and visualizer
            visualizer = ClassPredictionError(DecisionTreeClassifier(),
                                              classes=classes)
            # Fit the training data to the visualizer
            visualizer.fit(X_train, y_train)
            # Evaluate the model on the test data
            visualizer.score(X_test, y_test)
            # save to html
            fig = plt.gcf()
            some_htmL = mpld3.fig_to_html(fig)
            all_plots.append("<h4 align='center'>Class Prediction Error</h4>" +
                             some_htmL)
            # clear plot
            plt.clf()
            # The ConfusionMatrix visualizer taxes a model
            cm = ConfusionMatrix(model_def, classes=classes)
            cm = ConfusionMatrix(model_def, classes=classes)
            # Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model
            cm.fit(X_train, y_train)
            # To create the ConfusionMatrix, we need some test data. Score runs predict() on the data
            # and then creates the confusion_matrix from scikit-learn.
            cm.score(X_test, y_test)
            # save to html
            fig = plt.gcf()
            some_htmL = mpld3.fig_to_html(fig)
            all_plots.append("<h4 align='center'>Confusion Matrix</h4>" +
                             some_htmL)
            # clear plot
            plt.clf()

        return all_plots

    elif ML_ALG_nr == 0:
        # regression

        # Instantiate the linear model and visualizer
        visualizer = PredictionError(model_def, identity=True)
        visualizer.fit(X_train,
                       y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data
        # save to html
        fig = plt.gcf()
        some_htmL = mpld3.fig_to_html(fig)
        all_plots.append("<h4 align='center'>Prediction Error Plot</h4>" +
                         some_htmL)
        # clear plot
        plt.clf()

        # Instantiate the model and visualizer
        visualizer = ResidualsPlot(model_def)
        visualizer.fit(X_train,
                       y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)
        # save to html
        fig = plt.gcf()
        some_htmL = mpld3.fig_to_html(fig)
        all_plots.append("<h4 align='center'>Residuals Plot</h4>" + some_htmL)
        # clear plot
        plt.clf()

        return all_plots
コード例 #16
0
result3 = cross_val_score(foret, x_train, y_train, cv=shuffle)
print(' score shuffle split cross validation :{}'.format(result3))
print('moyenne score cross validation : {:.2f}'.format(result3.mean()))

cm = ConfusionMatrix(foret, classes=[0, 1, 2, 3, 4, 6], percent=True)
cm.fit(x_train, y_train)
cm.score(x_test, y_test)
cm.poof()

size = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]

lc = LearningCurve(RandomForestClassifier(), train_sizes=size, score='r2')
lc.fit(x_train, y_train)
lc.poof()

viz = ClassPredictionError(RandomForestClassifier(),
                           classes=["0", "1", "2", "3", "4", "5", "6"])
viz.fit(x_train, y_train)
viz.score(x_test, y_test)
viz.poof

fig = plt.figure()
ax = fig.add_subplot()

feat = FeatureImportances(RandomForestClassifier(), ax=ax)
feat.fit(x_train, y_train)
feat.poof()
'''--------------------- Réseau de neurones --------------------- '''

neurone = MLPClassifier()
neurone.fit(x_train, y_train)
コード例 #17
0
# plot no skill
plt.plot([0, 1], [0, 1], linestyle='--')
# plot the roc curve for the model
plt.plot(fpr, tpr, marker='.')
# show the plot
plt.show()

from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

logit_roc_auc = roc_auc_score(y_test, model.predict(X_test))
fpr, tpr, thresholds = roc_curve(y_test, probs)
plt.figure()
plt.plot(fpr, tpr, label='Logistic Regression (area = %0.2f)' % logit_roc_auc)
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.savefig('Visually/Log_ROC')
plt.show()

from sklearn.ensemble import RandomForestClassifier
from yellowbrick.classifier import ClassPredictionError

visualizer = ClassPredictionError(model=LogisticRegression())
visualizer.fit(X=X_train, y=y_train)
visualizer.score(X=X_test, y=y_test)
visualizer.poof()
コード例 #18
0
#calling function
model_performance(X_fclass_train, X_fclass_test, y_train, y_test)

#BaggingClassifierwith f_classif features
model = BaggingClassifier()

model.fit(X_fclass_train, y_train)

classes = ["not_passed", "passed"]

visualizer = ClassificationReport(model, classes=classes)
visualizer.fit(X_fclass_train, y_train)  # Fit the visualizer and the model
visualizer.score(X_fclass_test, y_test)  # Evaluate the model on the test data
visualizer.poof(outpath="bag_classification_report_f_classIF.png")

visualizer = ClassPredictionError(model, classes=classes)
visualizer.fit(X_fclass_train, y_train)
visualizer.score(X_fclass_test, y_test)
visualizer.poof(outpath="bag_class_errorf_classIF.png")

visualizer = DiscriminationThreshold(model)
visualizer.fit(X_fclass_train,
               y_train)  # Fit the training data to the visualizer
visualizer.score(X_fclass_test, y_test)
visualizer.poof(outpath="bag_descrimination_thresholdf_classIF.png")

# Create the visualizer, fit, score, and poof it
viz = PrecisionRecallCurve(model)
viz.fit(X_fclass_train, y_train)
viz.score(X_fclass_test, y_test)
viz.poof(outpath="bag_precision_recall_curvef_classIF.png")
コード例 #19
0
model = KNeighborsClassifier(n_neighbors=kVals[i])
model.fit(trainData[:datasize], trainLabels[:datasize])
predictions = model.predict(X_test)

# show classification reports demonstrating the accuracy of the classifier for each of the digits
print(classification_report(y_test, predictions))

model = KNeighborsClassifier(n_neighbors=kVals[i])
visualizer = ClassificationReport(model, support=True)
visualizer.fit(trainData[:datasize], trainLabels[:datasize])
visualizer.score(X_test, y_test)
g = visualizer.poof()

#class prediction error plot
model = KNeighborsClassifier(n_neighbors=kVals[i])
visualizer = ClassPredictionError(model, support=True)
visualizer.fit(trainData[:datasize], trainLabels[:datasize])
visualizer.score(X_test, y_test)
g = visualizer.poof()

#plot pairs pca plots
X_train, y_train = trainData[:datasize], trainLabels[:datasize]
pca = PCA(n_components=2)
fig, plots = plt.subplots(10, 10)
fig.set_size_inches(50, 50)
plt.prism()
for i, j in product(range(10), repeat=2):
    if i > j:
        continue
    X_ = X_train[(y_train == i) + (y_train == j)]
    y_ = y_train[(y_train == i) + (y_train == j)]
コード例 #20
0
X = load('X.joblib')
y = load('y.joblib')

# %%
to_graphviz(clf, num_trees=0, rankdir='LR')

# %%
classification_report(clf, X, y)

# %%
visualizer = ROCAUC(clf, classes=class_names)
visualizer.score(X, y)
visualizer.poof()

# %%
visualizer = ClassPredictionError(clf, classes=class_names)
visualizer.score(X, y)
visualizer.poof()

# %%
visualizer = DiscriminationThreshold(clf)
visualizer.fit(X, y)
visualizer.poof()

# %%
keep = [263, 268, 287, 288, 300, 302, 307, 308, 313, 315]

# %%
seed = 15
test_size = 0.33
Xt, Xv, yt, yv = \
コード例 #21
0
 def evaluate_visualizer(self, classes=None, params={}):
     LOGGER.info('Initializing plot model')
     if os.path.isdir(os.path.join(os.getcwd(), 'visualizer/')) == False:
         os.makedirs(os.path.join(os.getcwd(), 'visualizer/'))
     if classes is None:
         classes = pd.value_counts(self.y.values.flatten()).index.tolist()
     visualizers = []
     for idx, (name_model, estimator) in enumerate(self.estimator.items()):
         X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
             self.X,
             self.y,
             test_size=0.2,
             stratify=self.y,
             random_state=24)
         try:
             LOGGER.info('Visualizer ClassificationReport')
             visualizer = ClassificationReport(model=estimator,
                                               classes=classes)
             if visualizer.__class__.__name__ in params.keys():
                 visualizer = ClassificationReport(
                     **params[visualizer.__class__.__name__])
             visualizer.fit(X_train, y_train)
             visualizer.score(X_test, y_test)
             visualizer.show(outpath=os.path.join(
                 os.getcwd(),
                 f'visualizer/{visualizer.__class__.__name__}_{estimator.__class__.__name__}.png'
             ))
             plt.cla()
         except:
             LOGGER.warn('ERROR ClassificationReport')
         try:
             LOGGER.info('Visualizer ConfusionMatrix')
             visualizer = ConfusionMatrix(model=estimator, classes=classes)
             if visualizer.__class__.__name__ in params.keys():
                 visualizer = ConfusionMatrix(
                     **params[visualizer.__class__.__name__])
             visualizer.fit(X_train, y_train)
             visualizer.score(X_test, y_test)
             visualizer.show(outpath=os.path.join(
                 os.getcwd(),
                 f'visualizer/{visualizer.__class__.__name__}_{estimator.__class__.__name__}.png'
             ))
             plt.cla()
         except:
             LOGGER.warn('ERROR ConfusionMatrix')
         try:
             LOGGER.info('Visualizer ROCAUC')
             visualizer = ROCAUC(model=estimator, classes=classes)
             if visualizer.__class__.__name__ in params.keys():
                 visualizer = ROCAUC(
                     **params[visualizer.__class__.__name__])
             visualizer.fit(X_train, y_train)
             visualizer.score(X_test, y_test)
             visualizer.show(outpath=os.path.join(
                 os.getcwd(),
                 f'visualizer/{visualizer.__class__.__name__}_{estimator.__class__.__name__}.png'
             ))
             plt.cla()
         except:
             LOGGER.warn('ERROR ROCAUC')
         try:
             LOGGER.info('Visualizer PrecisionRecallCurve')
             visualizer = PrecisionRecallCurve(model=estimator,
                                               per_class=True,
                                               classes=classes)
             if visualizer.__class__.__name__ in params.keys():
                 visualizer = PrecisionRecallCurve(
                     **params[visualizer.__class__.__name__])
             visualizer.fit(X_train, y_train)
             visualizer.score(X_test, y_test)
             visualizer.show(outpath=os.path.join(
                 os.getcwd(),
                 f'visualizer/{visualizer.__class__.__name__}_{estimator.__class__.__name__}.png'
             ))
             plt.cla()
         except:
             LOGGER.warn('ERROR PrecisionRecallCurve')
         try:
             LOGGER.info('Visualizer ClassPredictionError')
             visualizer = ClassPredictionError(model=estimator,
                                               classes=classes)
             if visualizer.__class__.__name__ in params.keys():
                 visualizer = ClassPredictionError(
                     **params[visualizer.__class__.__name__])
             visualizer.fit(X_train, y_train)
             visualizer.score(X_test, y_test)
             visualizer.show(outpath=os.path.join(
                 os.getcwd(),
                 f'visualizer/{visualizer.__class__.__name__}_{estimator.__class__.__name__}.png'
             ))
             plt.cla()
         except:
             LOGGER.warn('ERROR ClassPredictionError')
         try:
             LOGGER.info('Visualizer Discrimination Threshold')
             visualizer = DiscriminationThreshold(model=estimator,
                                                  classes=classes)
             if visualizer.__class__.__name__ in params.keys():
                 visualizer = DiscriminationThreshold(
                     **params[visualizer.__class__.__name__])
             visualizer.fit(X_train, y_train)
             visualizer.score(X_test, y_test)
             visualizer.show(outpath=os.path.join(
                 os.getcwd(),
                 f'visualizer/{visualizer.__class__.__name__}_{estimator.__class__.__name__}.png'
             ))
             plt.cla()
         except:
             LOGGER.warn('ERROR Discrimination Threshold')
コード例 #22
0
ファイル: random_forest.py プロジェクト: mirzask/summer19
roc = ROCAUC(rf, classes=cancer.target_names)
roc.fit(X_train, y_train)
roc.score(X_test, y_test)
roc.poof()

### Confusion Matrix

from yellowbrick.classifier import ConfusionMatrix

classes = cancer.target_names

conf_matrix = ConfusionMatrix(rf,
                              classes=classes,
                              label_encoder={
                                  0: 'benign',
                                  1: 'malignant'
                              })
conf_matrix.fit(X_train, y_train)
conf_matrix.score(X_test, y_test)
conf_matrix.poof()

### Class Prediction Error

from yellowbrick.classifier import ClassPredictionError

visualizer = ClassPredictionError(rf, classes=classes)

visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()
コード例 #23
0
def train(experiment_id, run_name, xtrain, xtest, ytrain, ytest):
    
    np.random.seed(100)

    
    with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run:
        
        tfid_vect =TfidfVectorizer(analyzer='word', tokenizer=nltk.tokenize.word_tokenize, stop_words='english', min_df=5)
        
        
        my_pipeline = Pipeline(steps=[('vectorizer', tfid_vect),
                                       ('lr', LogisticRegression(random_state=42))])
        
           
        my_pipeline.fit(xtrain, ytrain)
        predictions = my_pipeline.predict(xtest)
                                      
        joblib.dump(my_pipeline, 'pipeline_lr.pkl')
        
        accuracy = accuracy_score(ytest, predictions)
        
        f1score = f1_score(ytest, predictions)
        
        auc_score = roc_auc_score(ytest, predictions)
        
        class_report = classification_report(ytest, predictions)
        
        print(f'Accuracy : {round(accuracy, 2)}')
        print(f'f1_score : {round(f1score, 2)}')
        print(f'auc_score : {round(auc_score, 2)}')
        print(f'class_report : \n {class_report}')
        
        mlflow.log_metric('Accuracy', round(accuracy, 2))
        mlflow.log_metric('f1_score', round(f1score, 2))
        mlflow.log_metric('auc_score', round(auc_score, 2))
        
        fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4)
        
        visualizer = ClassificationReport(my_pipeline, ax=ax1, classes=[0,1])
        visualizer.fit(xtrain, ytrain)
        visualizer.score(xtest, ytest)
        a=visualizer.poof(outpath="image/classification_report.png")
        print(' ')
        
        mlflow.log_artifact("image/classification_report.png")
        
        # The ConfusionMatrix visualizer taxes a model
        cm = ConfusionMatrix(my_pipeline, ax=ax2, classes=[0,1])
        cm.fit(xtrain, ytrain)
        cm.score(xtest, ytest) 
        b=cm.poof(outpath="image/confusionmatrix.png")
        
        mlflow.log_artifact("image/confusionmatrix.png")
        print(' ')
        
        vis = ROCAUC(my_pipeline, ax=ax3, classes=[0,1])
        vis.fit(xtrain, ytrain)  # Fit the training data to the visualizer
        vis.score(xtest, ytest)  # Evaluate the model on the test data
        c = vis.poof(outpath="image/rocauc.png")             # Draw/show/poof the data
        print(' ')
        mlflow.log_artifact("image/rocauc.png")
        
        visual = ClassPredictionError(my_pipeline, ax=ax4, classes=[0,1])
        visual.fit(xtrain, ytrain)
        visual.score(xtest, ytest)
        g = visual.poof(outpath="image/ClassificationError.png")
        print(' ')
        mlflow.log_artifact("image/ClassificationError.png")
        
        
        return run.info.run_uuid
コード例 #24
0
 def draw_prediction_error(self):
     visualizer = ClassPredictionError(self.model, classes=self.le.classes_)
     visualizer.fit(self.training_data, self.training_labels)
     visualizer.score(self.test_data, self.test_labels)
     visualizer.poof()
#
#

# #### load a model in...if starting this notebook from scratch just load pre trained models to visualise

# In[64]:

#insert the trained classifier from above in here
fitted_classifier_for_visualization = XG_clf_finetuned

# In[65]:

# seems to be predicting non loyal pretty well, however loyal is kind of hit or miss
from yellowbrick.classifier import ClassPredictionError

visualizer_entropy = ClassPredictionError(fitted_classifier_for_visualization,
                                          classes=class_names)

visualizer_entropy.fit(X_train, y_train)
visualizer_entropy.score(X_test, y_test)
g = visualizer_entropy.poof()

# #### To get the visualization of ROC and AUC curves plug in the CLF object from Section 2.3 to visualize these curves for the specific model that was trained

# In[66]:

from yellowbrick.classifier import ROCAUC

visualizer_entropy = ROCAUC(fitted_classifier_for_visualization,
                            classes=class_names)

visualizer_entropy.fit(X_train,
コード例 #26
0
ファイル: main.py プロジェクト: nixonjin/BigDataTraining
    print(f"f1值:{f1_score_value}")

    confusion_matrix_value = confusion_matrix(y_test, y_pred)
    print(f"混淆矩阵:{confusion_matrix_value}")

    report = classification_report(y_test, y_pred)
    print(f"分类报告:{report}")

    # 可视化
    # ROCAUC
    visualizer = ROCAUC(model)
    visualizer.score(X_test, y_test)
    visualizer.show()

    # 分类预测
    visualizer = ClassPredictionError(model)
    visualizer.score(X_test, y_test)
    visualizer.show()

    # 分类报告
    visualizer = ClassificationReport(model)
    visualizer.score(X_test, y_test)
    visualizer.show()

    # 混淆矩阵
    visualizer = ConfusionMatrix(model)
    visualizer.score(X_test, y_test)
    visualizer.show()

    # 阈值选择
    visualizer = DiscriminationThreshold(model)
コード例 #27
0
def score_model_outcome(X_train, y_train, X_test, y_test, model, **kwargs):
    """ A function that returns the different metrics of accuracy, confusion matrix and other model reports depending on the type of model that is asked.
    
    This function is for prognosis

    Parameters
    ----------
    X_train: matrix of training features
    
    y_train: vector of training labels
    
    X_test: matrix of test features
    
    y_test: vector of test labels

    Returns
    -------
    
    - Accuracy, F1 score and ROC_AUC for the train and test set
    
    - Confusion matrix
    
    - ClassificationReport
    
    - PrecisionRecallCurve
    
    - ClassPredictionError
    
    """

    # Train the model
    model.fit(X_train, y_train, **kwargs)

    # Predict on the train set
    prediction_train = model.predict(X_train)

    # Compute metrics for the train set
    accuracy_train = accuracy_score(y_train, prediction_train)

    # False Positive Rate, True Positive Rate, Threshold
    fpr_train, tpr_train, thresholds_train = roc_curve(y_train,
                                                       prediction_train)
    auc_train = auc(fpr_train, tpr_train)

    f1_score_train = f1_score(y_train, prediction_train)

    # Predict on the test set
    prediction_test = model.predict(X_test)

    accuracy_test = accuracy_score(y_test, prediction_test)

    fpr_test, tpr_test, thresholds_test = roc_curve(y_test, prediction_test)
    auc_test = auc(fpr_test, tpr_test)

    f1_score_test = f1_score(y_test, prediction_test)

    print("{}:".format(model.__class__.__name__))
    # Compute and return F1 (harmonic mean of precision and recall)
    print(
        "On training we get an Accuracy {}, an AUC {} and F1 score {} ".format(
            accuracy_train, auc_train, f1_score_train))

    print("For test we get an Accuracy {}, an AUC {} and F1 score {}".format(
        accuracy_test, auc_test, f1_score_test))

    fig, axes = plt.subplots(3, 2, figsize=(20, 20))

    visualgrid = [
        ConfusionMatrix(model,
                        ax=axes[0][0],
                        classes=['Death', 'Survival'],
                        cmap="YlGnBu"),
        ClassificationReport(
            model,
            ax=axes[0][1],
            classes=['Death', 'Survival'],
            cmap="YlGn",
        ),
        PrecisionRecallCurve(model, ax=axes[1][0]),
        ClassPredictionError(model,
                             classes=['Death', 'Survival'],
                             ax=axes[1][1]),
    ]

    for viz in visualgrid:
        viz.fit(X_train, y_train)
        viz.score(X_test, y_test)
        viz.finalize()

    try:
        roc_auc(model,
                X_train,
                y_train,
                X_test=X_test,
                y_test=y_test,
                classes=['Death', 'Survival'],
                ax=axes[2][0])
    except:
        print('Can plot ROC curve for this model')

    try:
        viz = FeatureImportances(model,
                                 ax=axes[2][1],
                                 stack=True,
                                 relative=False)
        viz.fit(X_train, y_train)
        viz.score(X_test, y_test)
        viz.finalize()
    except:
        print('Don\'t have feature importance')

    plt.show()
    print('\n')
コード例 #28
0
### ROC-AUC

from yellowbrick.classifier import ROCAUC

visualizer = ROCAUC(LogisticRegression(), classes=classes)

visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()

### Class Prediction Error

from yellowbrick.classifier import ClassPredictionError

visualizer = ClassPredictionError(LogisticRegression(), classes=classes)

visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()

### Discrimination Threshold

# Only works for binary classification

from yellowbrick.classifier import DiscriminationThreshold

visualizer = DiscriminationThreshold(LogisticRegression())

visualizer.fit(X, y)
visualizer.poof()