def eva_model(c, n, X, y, X_test, y_test, class_names, outdir): model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c) rfe = RFE(model, n_features_to_select=n) ## learning curve plt.clf() viz_LC = LearningCurve( rfe, scoring='f1_weighted', n_jobs=4 ) viz_LC.fit(X, y) viz_LC.show(outpath=outdir + '/LC.png') ## classification report plt.clf() viz_CR = ClassificationReport(rfe, classes=class_names, support=True) viz_CR.fit(X, y) viz_CR.score(X_test, y_test) viz_CR.show(outpath=outdir + '/CR.png') ## confusion matrix plt.clf() viz_CM = ConfusionMatrix(rfe, classes=class_names) viz_CM.fit(X, y) viz_CM.score(X_test, y_test) viz_CM.show(outpath=outdir + '/CM.png') ## precision recall curve plt.clf() viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True, fill_area=False, micro=False, classes=class_names) viz_PRC.fit(X, y) viz_PRC.score(X_test, y_test) viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720)) ## class prediction error plt.clf() viz_CPE = ClassPredictionError( rfe, classes=class_names ) viz_CPE.fit(X, y) viz_CPE.score(X_test, y_test) viz_CPE.show(outpath=outdir + '/CPE.png') ## ROCAUC plt.clf() viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720)) viz_RA.fit(X, y) viz_RA.score(X, y) viz_RA.show(outpath=outdir + '/RA.png') fit = rfe.fit(X,y) y_predict = fit.predict(X_test) f1 = f1_score(y_test, y_predict, average='weighted') features_retained_RFE = X.columns[rfe.get_support()].values feature_df =pd.DataFrame(features_retained_RFE.tolist()) feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False) return f1
def log_class_prediction_error_chart(classifier, X_train, X_test, y_train, y_test, experiment=None): """Log class prediction error chart. Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. Tip: Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example. Args: classifier (:obj:`classifier`): | Fitted sklearn classifier object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The classification target for training y_test (:obj:`ndarray`): | The classification target for testing experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): | Neptune ``Experiment`` object to control to which experiment you log the data. | If ``None``, log to currently active, and most recent experiment. Returns: ``None`` Examples: .. code:: python3 rfc = RandomForestClassifier() rfc.fit(X_train, y_train) neptune.init('my_workspace/my_project') exp = neptune.create_experiment() log_class_prediction_error_chart(rfc, X_train, X_test, y_train, y_test, experiment=exp) """ assert is_classifier( classifier), 'classifier should be sklearn classifier.' exp = _validate_experiment(experiment) try: fig, ax = plt.subplots() visualizer = ClassPredictionError(classifier, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() exp.log_image('charts_sklearn', fig, image_name='Class Prediction Error') plt.close(fig) except Exception as e: print('Did not log Class Prediction Error chart. Error {}'.format(e))
def class_prediction_error(ax=None): data = load_game(return_dataset=True) X, y = data.to_numpy() X = OneHotEncoder().fit_transform(X).toarray() viz = ClassPredictionError(GaussianNB(), ax=ax) return tts_plot(viz, X, y)
def pred_error(X, y, test_size=0.10, random_state=42): models = [ GaussianNB(), KNeighborsClassifier(), SGDClassifier(), BaggingClassifier(KNeighborsClassifier()), DecisionTreeClassifier(), LinearSVC(penalty="l1", dual=False) ] classes = ["not_passed", "passed"] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=random_state) Reg_len = len(models) i = 0 while i < Reg_len: model = models[i] model.fit(X_train, y_train) visualizer = ClassPredictionError(model, classes=classes) visualizer.fit(X_train, y_train) # Fit the visualizer and the model visualizer.score(X_test, y_test) # Evaluate the model on the test data print("Coefficient of Determination: %0.6f" % model.score(X_test, y_test)) g = visualizer.poof() print('') i = i + 1
def create_class_prediction_error_chart(classifier, X_train, X_test, y_train, y_test): """Create class prediction error chart. Tip: Check Sklearn-Neptune integration `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_ for the full example. Args: classifier (:obj:`classifier`): | Fitted sklearn classifier object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The classification target for training y_test (:obj:`ndarray`): | The classification target for testing Returns: ``neptune.types.File`` object that you can assign to run's ``base_namespace``. Examples: .. code:: python3 import neptune.new.integrations.sklearn as npt_utils rfc = RandomForestClassifier() rfc.fit(X_train, y_train) run = neptune.init(project='my_workspace/my_project') run['visuals/class_prediction_error'] = \ npt_utils.create_class_prediction_error_chart(rfc, X_train, X_test, y_train, y_test) """ assert is_classifier( classifier), 'classifier should be sklearn classifier.' chart = None try: fig, ax = plt.subplots() visualizer = ClassPredictionError(classifier, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() chart = neptune.types.File.as_image(fig) plt.close(fig) except Exception as e: print('Did not log Class Prediction Error chart. Error {}'.format(e)) return chart
def eval_models(df, race="W", models=["gnb", "rf", "xgb"], census=False, report=False, roc=False, pr=False, cpe=False): """ Run evaluation on a set of models and a single race class """ df = prep_data(df) tes = joblib.load(DIR + "/data/models/transformers_binary.joblib") for col in ["first_name", "last_name", "middle_name"]: te = tes[race][col] df[col] = te.transform(df[col]) df[col] = df[col].fillna(0) tmpa = np.where(df.race_code == race, True, False) df = df.fillna(0) for modelv in models: models = joblib.load(DIR + "/data/models/models_binary_%s%s.joblib" % (modelv, model_string)) model = models[race] model.target_type_ = "binary" if report: visualizer = ClassificationReport(model, classes=model.classes_, support=True) visualizer.score(df[MODEL_COLS], tmpa) visualizer.show() if roc: visualizer = ROCAUC(model, classes=["W", "not-W"]) visualizer.score(df[MODEL_COLS], tmpa) visualizer.show() if pr: viz = PrecisionRecallCurve(model, is_fitted=True, classes=["W", "not-W"]) viz.score(df[MODEL_COLS], tmpa) viz.show() if cpe: viz = ClassPredictionError(model) viz.score(df[MODEL_COLS], tmpa) viz.show()
def class_prediction_error(self) -> None: """Plot the support (number of training samples) for each class in the fitted classification model as a stacked bar chart. Each bar is segmented to show the proportion of predictions (including false negatives and false positives, like a Confusion Matrix) for each class. You can use a ClassPredictionError to visualize which classes your classifier is having a particularly difficult time with, and more importantly, what incorrect answers it is giving on a per-class basis. """ visualizer = ClassPredictionError(self.trained_model) visualizer.fit(self.X_train, self.y_train) visualizer.score(self.X_test, self.y_test) save_dir = f"{self.plots_dir}/class_prediction_error_{self.model_id}.png" visualizer.show(outpath=save_dir) if not LOCAL: upload_to_s3(save_dir, f'plots/class_prediction_error_{self.model_id}.png', bucket=S3_BUCKET_NAME) plt.clf()
def class_predict_error(model, classes, X_train, Y_train, X_test, Y_test): from yellowbrick.classifier import ClassPredictionError # Instantiate the classification model and visualizer visualizer = ClassPredictionError(RandomForestClassifier(), classes=classes) # Fit the training data to the visualizer visualizer.fit(X_train, y_train) # Evaluate the model on the test data visualizer.score(X_test, y_test) # Draw visualization g = visualizer.poof()
def draw_plots(): classifier = MultinomialNB(alpha=0.01) for technique in ["base", "SMOTE", "ADASYN", "text-aug"]: X_train, X_test, y_train, y_test = get_baseline_split(representation="bow") if technique == "base": X_plot_train, X_plot_test, y_plot_train, y_plot_test = X_train, X_test, y_train, y_test elif technique == "SMOTE": X_plot_train, y_plot_train = smote.run(X_train, y_train) X_plot_test, y_plot_test = X_test, y_test elif technique == "ADASYN": X_plot_train, y_plot_train = adasyn.run(X_train, y_train) X_plot_test, y_plot_test = X_test, y_test elif technique == "text-aug": X_plot_train, X_plot_test, y_plot_train, y_plot_test = text_augmentation.run( books_df=get_fully_processed_books_df(), representation="bow") else: raise Exception() # ROC micro average viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=False) viz_roc.fit(X_plot_train, y_plot_train) # Fit the training data to the viz_roc viz_roc.score(X_plot_test, y_plot_test) # Evaluate the model on the test data viz_roc.show() # Finalize and show the figure # ROC - Per Class viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=True) viz_roc.fit(X_plot_train, y_plot_train) # Fit the training data to the viz_roc viz_roc.score(X_plot_test, y_plot_test) # Evaluate the model on the test data viz_roc.show() # Finalize and show the figure # Class Prediction Error viz_pred_err = ClassPredictionError(classifier, classes=get_selected_genres()) viz_pred_err.fit(X_plot_train, y_plot_train) viz_pred_err.score(X_plot_test, y_plot_test) viz_pred_err.show() # The ConfusionMatrix cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8]) cm.fit(X_plot_train, y_plot_train) cm.score(X_plot_test, y_plot_test) cm.show()
def make_cb_pred_error(dataset="fruit", path=None, clf=None): clf = clf or RandomForestClassifier() loader = { 'fruit': make_fruit_dataset, 'credit': load_credit_dataset, }[dataset] (X_train, X_test, y_train, y_test), classes = loader() _, ax = plt.subplots() viz = ClassPredictionError(clf, ax=ax, classes=classes) viz.fit(X_train, y_train) viz.score(X_test, y_test) return viz.poof(outpath=path)
def classprede(): X, y = make_classification(n_samples=1000, n_classes=5, n_informative=3, n_clusters_per_class=1) classes = ["apple", "kiwi", "pear", "banana", "orange"] # Perform 80/20 training/test split X_train, X_test, y_train, y_test = tts(X, y, test_size=0.20) oz = ClassPredictionError(RandomForestClassifier(), classes=classes, ax=newfig()) oz.fit(X_train, y_train) oz.score(X_test, y_test) savefig(oz, "class_prediction_error")
def store_experiment_data(self, X_test, y_test): class_report = ClassificationReport(self.model) score = class_report.score(X_test, y_test) class_report.poof( 'metrics/classification_report.png', clear_figure=True) self.ex.add_artifact('metrics/classification_report.png') confustion_matrix = ConfusionMatrix(self.model) confustion_matrix.score(X_test, y_test) confustion_matrix.poof( 'metrics/confusion_matrix.png', clear_figure=True) self.ex.add_artifact('metrics/confusion_matrix.png') cpd = ClassPredictionError(self.model) cpd.score(X_test, y_test) cpd.poof('metrics/class_prediction_error.png', clear_figure=True) self.ex.add_artifact('metrics/class_prediction_error.png') print('score=', score) self.ex.log_scalar('score', score)
def class_prediction_errors(xx,yy,estimatorss,**kwargs): vz2 = ClassPredictionError(estimatorss, classes=['Reach, 1 Reach, or L/R Reach', 'Null, Multiple Reaches, Or Multiple Arms'], cmap="YlGn", size=(600, 360), **kwargs) vz2.fit(xx, yy) vz2.score(xx, yy) vz2.show()
print("Confusion Matrix: ") print(confusion_matrix(y_test, y_pred)) # In[34]: from yellowbrick.classifier import ClassPredictionError # In[35]: classes = ['Exited', 'Not Exited'] clf = RandomForestClassifier(n_estimators = 200, random_state=200) visualizer = ClassPredictionError(clf) visualizer.fit(X_train, y_train) visualizer.score(X_test,y_test) visualizer.show() # In[36]: svclassifier = SVC(kernel='rbf') visualizer = ClassPredictionError(svclassifier) visualizer.fit(X_train, y_train) visualizer.score(X_test,y_test) visualizer.show()
def get_plots(): all_plots = [] # FEATURE Visualization # Instantiate the visualizer plt.figure(figsize=(3.5, 3.5)) viz = Manifold(manifold="tsne") # Fit the data to the visualizer viz.fit_transform(X_train, y_train) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Manifold Visualization</h4>" + some_htmL) # clear plot plt.clf() if ML_ALG_nr == 1: # classification # Check if we can get the classes classes = None try: classes = list(Enc.inverse_transform(model_def.classes_)) except ValueError as e: app.logger.info(e) if classes is not None: # Instantiate the classification model and visualizer visualizer = ClassPredictionError(DecisionTreeClassifier(), classes=classes) # Fit the training data to the visualizer visualizer.fit(X_train, y_train) # Evaluate the model on the test data visualizer.score(X_test, y_test) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Class Prediction Error</h4>" + some_htmL) # clear plot plt.clf() # The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(model_def, classes=classes) cm = ConfusionMatrix(model_def, classes=classes) # Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model cm.fit(X_train, y_train) # To create the ConfusionMatrix, we need some test data. Score runs predict() on the data # and then creates the confusion_matrix from scikit-learn. cm.score(X_test, y_test) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Confusion Matrix</h4>" + some_htmL) # clear plot plt.clf() return all_plots elif ML_ALG_nr == 0: # regression # Instantiate the linear model and visualizer visualizer = PredictionError(model_def, identity=True) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Prediction Error Plot</h4>" + some_htmL) # clear plot plt.clf() # Instantiate the model and visualizer visualizer = ResidualsPlot(model_def) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Residuals Plot</h4>" + some_htmL) # clear plot plt.clf() return all_plots
result3 = cross_val_score(foret, x_train, y_train, cv=shuffle) print(' score shuffle split cross validation :{}'.format(result3)) print('moyenne score cross validation : {:.2f}'.format(result3.mean())) cm = ConfusionMatrix(foret, classes=[0, 1, 2, 3, 4, 6], percent=True) cm.fit(x_train, y_train) cm.score(x_test, y_test) cm.poof() size = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] lc = LearningCurve(RandomForestClassifier(), train_sizes=size, score='r2') lc.fit(x_train, y_train) lc.poof() viz = ClassPredictionError(RandomForestClassifier(), classes=["0", "1", "2", "3", "4", "5", "6"]) viz.fit(x_train, y_train) viz.score(x_test, y_test) viz.poof fig = plt.figure() ax = fig.add_subplot() feat = FeatureImportances(RandomForestClassifier(), ax=ax) feat.fit(x_train, y_train) feat.poof() '''--------------------- Réseau de neurones --------------------- ''' neurone = MLPClassifier() neurone.fit(x_train, y_train)
# plot no skill plt.plot([0, 1], [0, 1], linestyle='--') # plot the roc curve for the model plt.plot(fpr, tpr, marker='.') # show the plot plt.show() from sklearn.metrics import roc_auc_score from sklearn.metrics import roc_curve logit_roc_auc = roc_auc_score(y_test, model.predict(X_test)) fpr, tpr, thresholds = roc_curve(y_test, probs) plt.figure() plt.plot(fpr, tpr, label='Logistic Regression (area = %0.2f)' % logit_roc_auc) plt.plot([0, 1], [0, 1], 'r--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="lower right") plt.savefig('Visually/Log_ROC') plt.show() from sklearn.ensemble import RandomForestClassifier from yellowbrick.classifier import ClassPredictionError visualizer = ClassPredictionError(model=LogisticRegression()) visualizer.fit(X=X_train, y=y_train) visualizer.score(X=X_test, y=y_test) visualizer.poof()
#calling function model_performance(X_fclass_train, X_fclass_test, y_train, y_test) #BaggingClassifierwith f_classif features model = BaggingClassifier() model.fit(X_fclass_train, y_train) classes = ["not_passed", "passed"] visualizer = ClassificationReport(model, classes=classes) visualizer.fit(X_fclass_train, y_train) # Fit the visualizer and the model visualizer.score(X_fclass_test, y_test) # Evaluate the model on the test data visualizer.poof(outpath="bag_classification_report_f_classIF.png") visualizer = ClassPredictionError(model, classes=classes) visualizer.fit(X_fclass_train, y_train) visualizer.score(X_fclass_test, y_test) visualizer.poof(outpath="bag_class_errorf_classIF.png") visualizer = DiscriminationThreshold(model) visualizer.fit(X_fclass_train, y_train) # Fit the training data to the visualizer visualizer.score(X_fclass_test, y_test) visualizer.poof(outpath="bag_descrimination_thresholdf_classIF.png") # Create the visualizer, fit, score, and poof it viz = PrecisionRecallCurve(model) viz.fit(X_fclass_train, y_train) viz.score(X_fclass_test, y_test) viz.poof(outpath="bag_precision_recall_curvef_classIF.png")
model = KNeighborsClassifier(n_neighbors=kVals[i]) model.fit(trainData[:datasize], trainLabels[:datasize]) predictions = model.predict(X_test) # show classification reports demonstrating the accuracy of the classifier for each of the digits print(classification_report(y_test, predictions)) model = KNeighborsClassifier(n_neighbors=kVals[i]) visualizer = ClassificationReport(model, support=True) visualizer.fit(trainData[:datasize], trainLabels[:datasize]) visualizer.score(X_test, y_test) g = visualizer.poof() #class prediction error plot model = KNeighborsClassifier(n_neighbors=kVals[i]) visualizer = ClassPredictionError(model, support=True) visualizer.fit(trainData[:datasize], trainLabels[:datasize]) visualizer.score(X_test, y_test) g = visualizer.poof() #plot pairs pca plots X_train, y_train = trainData[:datasize], trainLabels[:datasize] pca = PCA(n_components=2) fig, plots = plt.subplots(10, 10) fig.set_size_inches(50, 50) plt.prism() for i, j in product(range(10), repeat=2): if i > j: continue X_ = X_train[(y_train == i) + (y_train == j)] y_ = y_train[(y_train == i) + (y_train == j)]
X = load('X.joblib') y = load('y.joblib') # %% to_graphviz(clf, num_trees=0, rankdir='LR') # %% classification_report(clf, X, y) # %% visualizer = ROCAUC(clf, classes=class_names) visualizer.score(X, y) visualizer.poof() # %% visualizer = ClassPredictionError(clf, classes=class_names) visualizer.score(X, y) visualizer.poof() # %% visualizer = DiscriminationThreshold(clf) visualizer.fit(X, y) visualizer.poof() # %% keep = [263, 268, 287, 288, 300, 302, 307, 308, 313, 315] # %% seed = 15 test_size = 0.33 Xt, Xv, yt, yv = \
def evaluate_visualizer(self, classes=None, params={}): LOGGER.info('Initializing plot model') if os.path.isdir(os.path.join(os.getcwd(), 'visualizer/')) == False: os.makedirs(os.path.join(os.getcwd(), 'visualizer/')) if classes is None: classes = pd.value_counts(self.y.values.flatten()).index.tolist() visualizers = [] for idx, (name_model, estimator) in enumerate(self.estimator.items()): X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( self.X, self.y, test_size=0.2, stratify=self.y, random_state=24) try: LOGGER.info('Visualizer ClassificationReport') visualizer = ClassificationReport(model=estimator, classes=classes) if visualizer.__class__.__name__ in params.keys(): visualizer = ClassificationReport( **params[visualizer.__class__.__name__]) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.show(outpath=os.path.join( os.getcwd(), f'visualizer/{visualizer.__class__.__name__}_{estimator.__class__.__name__}.png' )) plt.cla() except: LOGGER.warn('ERROR ClassificationReport') try: LOGGER.info('Visualizer ConfusionMatrix') visualizer = ConfusionMatrix(model=estimator, classes=classes) if visualizer.__class__.__name__ in params.keys(): visualizer = ConfusionMatrix( **params[visualizer.__class__.__name__]) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.show(outpath=os.path.join( os.getcwd(), f'visualizer/{visualizer.__class__.__name__}_{estimator.__class__.__name__}.png' )) plt.cla() except: LOGGER.warn('ERROR ConfusionMatrix') try: LOGGER.info('Visualizer ROCAUC') visualizer = ROCAUC(model=estimator, classes=classes) if visualizer.__class__.__name__ in params.keys(): visualizer = ROCAUC( **params[visualizer.__class__.__name__]) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.show(outpath=os.path.join( os.getcwd(), f'visualizer/{visualizer.__class__.__name__}_{estimator.__class__.__name__}.png' )) plt.cla() except: LOGGER.warn('ERROR ROCAUC') try: LOGGER.info('Visualizer PrecisionRecallCurve') visualizer = PrecisionRecallCurve(model=estimator, per_class=True, classes=classes) if visualizer.__class__.__name__ in params.keys(): visualizer = PrecisionRecallCurve( **params[visualizer.__class__.__name__]) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.show(outpath=os.path.join( os.getcwd(), f'visualizer/{visualizer.__class__.__name__}_{estimator.__class__.__name__}.png' )) plt.cla() except: LOGGER.warn('ERROR PrecisionRecallCurve') try: LOGGER.info('Visualizer ClassPredictionError') visualizer = ClassPredictionError(model=estimator, classes=classes) if visualizer.__class__.__name__ in params.keys(): visualizer = ClassPredictionError( **params[visualizer.__class__.__name__]) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.show(outpath=os.path.join( os.getcwd(), f'visualizer/{visualizer.__class__.__name__}_{estimator.__class__.__name__}.png' )) plt.cla() except: LOGGER.warn('ERROR ClassPredictionError') try: LOGGER.info('Visualizer Discrimination Threshold') visualizer = DiscriminationThreshold(model=estimator, classes=classes) if visualizer.__class__.__name__ in params.keys(): visualizer = DiscriminationThreshold( **params[visualizer.__class__.__name__]) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.show(outpath=os.path.join( os.getcwd(), f'visualizer/{visualizer.__class__.__name__}_{estimator.__class__.__name__}.png' )) plt.cla() except: LOGGER.warn('ERROR Discrimination Threshold')
roc = ROCAUC(rf, classes=cancer.target_names) roc.fit(X_train, y_train) roc.score(X_test, y_test) roc.poof() ### Confusion Matrix from yellowbrick.classifier import ConfusionMatrix classes = cancer.target_names conf_matrix = ConfusionMatrix(rf, classes=classes, label_encoder={ 0: 'benign', 1: 'malignant' }) conf_matrix.fit(X_train, y_train) conf_matrix.score(X_test, y_test) conf_matrix.poof() ### Class Prediction Error from yellowbrick.classifier import ClassPredictionError visualizer = ClassPredictionError(rf, classes=classes) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof()
def train(experiment_id, run_name, xtrain, xtest, ytrain, ytest): np.random.seed(100) with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run: tfid_vect =TfidfVectorizer(analyzer='word', tokenizer=nltk.tokenize.word_tokenize, stop_words='english', min_df=5) my_pipeline = Pipeline(steps=[('vectorizer', tfid_vect), ('lr', LogisticRegression(random_state=42))]) my_pipeline.fit(xtrain, ytrain) predictions = my_pipeline.predict(xtest) joblib.dump(my_pipeline, 'pipeline_lr.pkl') accuracy = accuracy_score(ytest, predictions) f1score = f1_score(ytest, predictions) auc_score = roc_auc_score(ytest, predictions) class_report = classification_report(ytest, predictions) print(f'Accuracy : {round(accuracy, 2)}') print(f'f1_score : {round(f1score, 2)}') print(f'auc_score : {round(auc_score, 2)}') print(f'class_report : \n {class_report}') mlflow.log_metric('Accuracy', round(accuracy, 2)) mlflow.log_metric('f1_score', round(f1score, 2)) mlflow.log_metric('auc_score', round(auc_score, 2)) fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4) visualizer = ClassificationReport(my_pipeline, ax=ax1, classes=[0,1]) visualizer.fit(xtrain, ytrain) visualizer.score(xtest, ytest) a=visualizer.poof(outpath="image/classification_report.png") print(' ') mlflow.log_artifact("image/classification_report.png") # The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(my_pipeline, ax=ax2, classes=[0,1]) cm.fit(xtrain, ytrain) cm.score(xtest, ytest) b=cm.poof(outpath="image/confusionmatrix.png") mlflow.log_artifact("image/confusionmatrix.png") print(' ') vis = ROCAUC(my_pipeline, ax=ax3, classes=[0,1]) vis.fit(xtrain, ytrain) # Fit the training data to the visualizer vis.score(xtest, ytest) # Evaluate the model on the test data c = vis.poof(outpath="image/rocauc.png") # Draw/show/poof the data print(' ') mlflow.log_artifact("image/rocauc.png") visual = ClassPredictionError(my_pipeline, ax=ax4, classes=[0,1]) visual.fit(xtrain, ytrain) visual.score(xtest, ytest) g = visual.poof(outpath="image/ClassificationError.png") print(' ') mlflow.log_artifact("image/ClassificationError.png") return run.info.run_uuid
def draw_prediction_error(self): visualizer = ClassPredictionError(self.model, classes=self.le.classes_) visualizer.fit(self.training_data, self.training_labels) visualizer.score(self.test_data, self.test_labels) visualizer.poof()
# # # #### load a model in...if starting this notebook from scratch just load pre trained models to visualise # In[64]: #insert the trained classifier from above in here fitted_classifier_for_visualization = XG_clf_finetuned # In[65]: # seems to be predicting non loyal pretty well, however loyal is kind of hit or miss from yellowbrick.classifier import ClassPredictionError visualizer_entropy = ClassPredictionError(fitted_classifier_for_visualization, classes=class_names) visualizer_entropy.fit(X_train, y_train) visualizer_entropy.score(X_test, y_test) g = visualizer_entropy.poof() # #### To get the visualization of ROC and AUC curves plug in the CLF object from Section 2.3 to visualize these curves for the specific model that was trained # In[66]: from yellowbrick.classifier import ROCAUC visualizer_entropy = ROCAUC(fitted_classifier_for_visualization, classes=class_names) visualizer_entropy.fit(X_train,
print(f"f1值:{f1_score_value}") confusion_matrix_value = confusion_matrix(y_test, y_pred) print(f"混淆矩阵:{confusion_matrix_value}") report = classification_report(y_test, y_pred) print(f"分类报告:{report}") # 可视化 # ROCAUC visualizer = ROCAUC(model) visualizer.score(X_test, y_test) visualizer.show() # 分类预测 visualizer = ClassPredictionError(model) visualizer.score(X_test, y_test) visualizer.show() # 分类报告 visualizer = ClassificationReport(model) visualizer.score(X_test, y_test) visualizer.show() # 混淆矩阵 visualizer = ConfusionMatrix(model) visualizer.score(X_test, y_test) visualizer.show() # 阈值选择 visualizer = DiscriminationThreshold(model)
def score_model_outcome(X_train, y_train, X_test, y_test, model, **kwargs): """ A function that returns the different metrics of accuracy, confusion matrix and other model reports depending on the type of model that is asked. This function is for prognosis Parameters ---------- X_train: matrix of training features y_train: vector of training labels X_test: matrix of test features y_test: vector of test labels Returns ------- - Accuracy, F1 score and ROC_AUC for the train and test set - Confusion matrix - ClassificationReport - PrecisionRecallCurve - ClassPredictionError """ # Train the model model.fit(X_train, y_train, **kwargs) # Predict on the train set prediction_train = model.predict(X_train) # Compute metrics for the train set accuracy_train = accuracy_score(y_train, prediction_train) # False Positive Rate, True Positive Rate, Threshold fpr_train, tpr_train, thresholds_train = roc_curve(y_train, prediction_train) auc_train = auc(fpr_train, tpr_train) f1_score_train = f1_score(y_train, prediction_train) # Predict on the test set prediction_test = model.predict(X_test) accuracy_test = accuracy_score(y_test, prediction_test) fpr_test, tpr_test, thresholds_test = roc_curve(y_test, prediction_test) auc_test = auc(fpr_test, tpr_test) f1_score_test = f1_score(y_test, prediction_test) print("{}:".format(model.__class__.__name__)) # Compute and return F1 (harmonic mean of precision and recall) print( "On training we get an Accuracy {}, an AUC {} and F1 score {} ".format( accuracy_train, auc_train, f1_score_train)) print("For test we get an Accuracy {}, an AUC {} and F1 score {}".format( accuracy_test, auc_test, f1_score_test)) fig, axes = plt.subplots(3, 2, figsize=(20, 20)) visualgrid = [ ConfusionMatrix(model, ax=axes[0][0], classes=['Death', 'Survival'], cmap="YlGnBu"), ClassificationReport( model, ax=axes[0][1], classes=['Death', 'Survival'], cmap="YlGn", ), PrecisionRecallCurve(model, ax=axes[1][0]), ClassPredictionError(model, classes=['Death', 'Survival'], ax=axes[1][1]), ] for viz in visualgrid: viz.fit(X_train, y_train) viz.score(X_test, y_test) viz.finalize() try: roc_auc(model, X_train, y_train, X_test=X_test, y_test=y_test, classes=['Death', 'Survival'], ax=axes[2][0]) except: print('Can plot ROC curve for this model') try: viz = FeatureImportances(model, ax=axes[2][1], stack=True, relative=False) viz.fit(X_train, y_train) viz.score(X_test, y_test) viz.finalize() except: print('Don\'t have feature importance') plt.show() print('\n')
### ROC-AUC from yellowbrick.classifier import ROCAUC visualizer = ROCAUC(LogisticRegression(), classes=classes) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof() ### Class Prediction Error from yellowbrick.classifier import ClassPredictionError visualizer = ClassPredictionError(LogisticRegression(), classes=classes) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof() ### Discrimination Threshold # Only works for binary classification from yellowbrick.classifier import DiscriminationThreshold visualizer = DiscriminationThreshold(LogisticRegression()) visualizer.fit(X, y) visualizer.poof()