def pred_error(X, y, test_size=0.10, random_state=42):
    models = [
        GaussianNB(),
        KNeighborsClassifier(),
        SGDClassifier(),
        BaggingClassifier(KNeighborsClassifier()),
        DecisionTreeClassifier(),
        LinearSVC(penalty="l1", dual=False)
    ]

    classes = ["not_passed", "passed"]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state)

    Reg_len = len(models)

    i = 0
    while i < Reg_len:
        model = models[i]

        model.fit(X_train, y_train)

        visualizer = ClassPredictionError(model, classes=classes)
        visualizer.fit(X_train, y_train)  # Fit the visualizer and the model
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data

        print("Coefficient of Determination: %0.6f" %
              model.score(X_test, y_test))
        g = visualizer.poof()

        print('')

        i = i + 1
Esempio n. 2
0
    def store_experiment_data(self, X_test, y_test):
        class_report = ClassificationReport(self.model)
        score = class_report.score(X_test, y_test)
        class_report.poof(
            'metrics/classification_report.png', clear_figure=True)
        self.ex.add_artifact('metrics/classification_report.png')

        confustion_matrix = ConfusionMatrix(self.model)
        confustion_matrix.score(X_test, y_test)
        confustion_matrix.poof(
            'metrics/confusion_matrix.png', clear_figure=True)
        self.ex.add_artifact('metrics/confusion_matrix.png')

        cpd = ClassPredictionError(self.model)
        cpd.score(X_test, y_test)
        cpd.poof('metrics/class_prediction_error.png', clear_figure=True)
        self.ex.add_artifact('metrics/class_prediction_error.png')

        print('score=', score)
        self.ex.log_scalar('score', score)
Esempio n. 3
0
def class_predict_error(model, classes, X_train, Y_train, X_test, Y_test):
    from yellowbrick.classifier import ClassPredictionError

    # Instantiate the classification model and visualizer
    visualizer = ClassPredictionError(RandomForestClassifier(),
                                      classes=classes)

    # Fit the training data to the visualizer
    visualizer.fit(X_train, y_train)

    # Evaluate the model on the test data
    visualizer.score(X_test, y_test)

    # Draw visualization
    g = visualizer.poof()
Esempio n. 4
0
def make_cb_pred_error(dataset="fruit", path=None, clf=None):
    clf = clf or RandomForestClassifier()

    loader = {
        'fruit': make_fruit_dataset,
        'credit': load_credit_dataset,
    }[dataset]

    (X_train, X_test, y_train, y_test), classes = loader()

    _, ax = plt.subplots()
    viz =  ClassPredictionError(clf, ax=ax, classes=classes)
    viz.fit(X_train, y_train)
    viz.score(X_test, y_test)

    return viz.poof(outpath=path)
def train(experiment_id, run_name, xtrain, xtest, ytrain, ytest):
    
    np.random.seed(100)

    
    with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run:
        
        tfid_vect =TfidfVectorizer(analyzer='word', tokenizer=nltk.tokenize.word_tokenize, stop_words='english', min_df=5)
        
        
        my_pipeline = Pipeline(steps=[('vectorizer', tfid_vect),
                                       ('lr', LogisticRegression(random_state=42))])
        
           
        my_pipeline.fit(xtrain, ytrain)
        predictions = my_pipeline.predict(xtest)
                                      
        joblib.dump(my_pipeline, 'pipeline_lr.pkl')
        
        accuracy = accuracy_score(ytest, predictions)
        
        f1score = f1_score(ytest, predictions)
        
        auc_score = roc_auc_score(ytest, predictions)
        
        class_report = classification_report(ytest, predictions)
        
        print(f'Accuracy : {round(accuracy, 2)}')
        print(f'f1_score : {round(f1score, 2)}')
        print(f'auc_score : {round(auc_score, 2)}')
        print(f'class_report : \n {class_report}')
        
        mlflow.log_metric('Accuracy', round(accuracy, 2))
        mlflow.log_metric('f1_score', round(f1score, 2))
        mlflow.log_metric('auc_score', round(auc_score, 2))
        
        fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4)
        
        visualizer = ClassificationReport(my_pipeline, ax=ax1, classes=[0,1])
        visualizer.fit(xtrain, ytrain)
        visualizer.score(xtest, ytest)
        a=visualizer.poof(outpath="image/classification_report.png")
        print(' ')
        
        mlflow.log_artifact("image/classification_report.png")
        
        # The ConfusionMatrix visualizer taxes a model
        cm = ConfusionMatrix(my_pipeline, ax=ax2, classes=[0,1])
        cm.fit(xtrain, ytrain)
        cm.score(xtest, ytest) 
        b=cm.poof(outpath="image/confusionmatrix.png")
        
        mlflow.log_artifact("image/confusionmatrix.png")
        print(' ')
        
        vis = ROCAUC(my_pipeline, ax=ax3, classes=[0,1])
        vis.fit(xtrain, ytrain)  # Fit the training data to the visualizer
        vis.score(xtest, ytest)  # Evaluate the model on the test data
        c = vis.poof(outpath="image/rocauc.png")             # Draw/show/poof the data
        print(' ')
        mlflow.log_artifact("image/rocauc.png")
        
        visual = ClassPredictionError(my_pipeline, ax=ax4, classes=[0,1])
        visual.fit(xtrain, ytrain)
        visual.score(xtest, ytest)
        g = visual.poof(outpath="image/ClassificationError.png")
        print(' ')
        mlflow.log_artifact("image/ClassificationError.png")
        
        
        return run.info.run_uuid
# In[64]:

#insert the trained classifier from above in here
fitted_classifier_for_visualization = XG_clf_finetuned

# In[65]:

# seems to be predicting non loyal pretty well, however loyal is kind of hit or miss
from yellowbrick.classifier import ClassPredictionError

visualizer_entropy = ClassPredictionError(fitted_classifier_for_visualization,
                                          classes=class_names)

visualizer_entropy.fit(X_train, y_train)
visualizer_entropy.score(X_test, y_test)
g = visualizer_entropy.poof()

# #### To get the visualization of ROC and AUC curves plug in the CLF object from Section 2.3 to visualize these curves for the specific model that was trained

# In[66]:

from yellowbrick.classifier import ROCAUC

visualizer_entropy = ROCAUC(fitted_classifier_for_visualization,
                            classes=class_names)

visualizer_entropy.fit(X_train,
                       y_train)  # Fit the training data to the visualizer
visualizer_entropy.score(X_test, y_test)  # Evaluate the model on the test data
g = visualizer_entropy.poof()  # Draw/show/poof the data
Esempio n. 7
0
roc = ROCAUC(rf, classes=cancer.target_names)
roc.fit(X_train, y_train)
roc.score(X_test, y_test)
roc.poof()

### Confusion Matrix

from yellowbrick.classifier import ConfusionMatrix

classes = cancer.target_names

conf_matrix = ConfusionMatrix(rf,
                              classes=classes,
                              label_encoder={
                                  0: 'benign',
                                  1: 'malignant'
                              })
conf_matrix.fit(X_train, y_train)
conf_matrix.score(X_test, y_test)
conf_matrix.poof()

### Class Prediction Error

from yellowbrick.classifier import ClassPredictionError

visualizer = ClassPredictionError(rf, classes=classes)

visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()
Esempio n. 8
0
 def draw_prediction_error(self):
     visualizer = ClassPredictionError(self.model, classes=self.le.classes_)
     visualizer.fit(self.training_data, self.training_labels)
     visualizer.score(self.test_data, self.test_labels)
     visualizer.poof()