def generateClassificationReport(clf, class_names): try: visualizer = ClassificationReport(clf, classes=class_names, support=True) visualizer.score(data_test, target_test) visualizerPath = os.path.join(tree_evaluations_out, str(classifierName) + '_classificationReport.png') g = visualizer.poof(outpath=visualizerPath, clear_figure=True) except KeyError: print(('Warning, not enough data for classification report: ') + str(classifierName))
def naiveBayesClassifierTest(X_train, X_test, y_train, y_test, X_1_df, Y_1_df): path = Path(__file__).parent.absolute() #Creates a new directory under svm-linear if it doesn't exist Path("output/GaussianNB/").mkdir(parents=True, exist_ok=True) gnb = GaussianNB() print('-----------------------------') print('Naive Bayes Classifier Test was Called. Wait...') # capture the start time start = time.time() y_pred = gnb.fit(X_train, np.ravel(y_train)).predict(X_test) # capture the end time of calculation end = time.time() print("Time taken to train model and prediction :", end-start , "seconds") print("Number of mislabeled points out of a total %d points : %d" % (X_test.shape[0], (np.ravel(y_test)!= y_pred).sum())) # comparing actual response values (y_test) with predicted response values (y_pred) print("Gaussian Naive Bayes model accuracy(in %):", metrics.accuracy_score(y_test, y_pred)*100) #Printing the metrics/Generating visualization print("Classification report, class prediction error, Test accuracy, Running time for Naive Bayes is generated in the output folder") #Printing the classification report vizualizer = ClassificationReport(GaussianNB(), classes = [0,1,2,3,4,5]) vizualizer.fit(X_train, y_train.values.ravel()) vizualizer.score(X_test, y_test) strFile = str(path)+"/output/GaussianNB"+"/Classification Report.png" if os.path.isfile(strFile): os.remove(strFile) vizualizer.show(strFile) plt.clf()
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir): model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c) rfe = RFE(model, n_features_to_select=n) ## learning curve plt.clf() viz_LC = LearningCurve( rfe, scoring='f1_weighted', n_jobs=4 ) viz_LC.fit(X, y) viz_LC.show(outpath=outdir + '/LC.png') ## classification report plt.clf() viz_CR = ClassificationReport(rfe, classes=class_names, support=True) viz_CR.fit(X, y) viz_CR.score(X_test, y_test) viz_CR.show(outpath=outdir + '/CR.png') ## confusion matrix plt.clf() viz_CM = ConfusionMatrix(rfe, classes=class_names) viz_CM.fit(X, y) viz_CM.score(X_test, y_test) viz_CM.show(outpath=outdir + '/CM.png') ## precision recall curve plt.clf() viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True, fill_area=False, micro=False, classes=class_names) viz_PRC.fit(X, y) viz_PRC.score(X_test, y_test) viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720)) ## class prediction error plt.clf() viz_CPE = ClassPredictionError( rfe, classes=class_names ) viz_CPE.fit(X, y) viz_CPE.score(X_test, y_test) viz_CPE.show(outpath=outdir + '/CPE.png') ## ROCAUC plt.clf() viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720)) viz_RA.fit(X, y) viz_RA.score(X, y) viz_RA.show(outpath=outdir + '/RA.png') fit = rfe.fit(X,y) y_predict = fit.predict(X_test) f1 = f1_score(y_test, y_predict, average='weighted') features_retained_RFE = X.columns[rfe.get_support()].values feature_df =pd.DataFrame(features_retained_RFE.tolist()) feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False) return f1
def visualizeClassificationReport(classifier, features_train, labels_train, features_test, labels_test): visualizer = ClassificationReport(classifier) visualizer.fit(features_train, labels_train) visualizer.score(features_test, labels_test) visualizer.poof()
def create_classification_report_chart(classifier, X_train, X_test, y_train, y_test): """Create classification report chart. Tip: Check Sklearn-Neptune integration `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_ for the full example. Args: classifier (:obj:`classifier`): | Fitted sklearn classifier object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The classification target for training y_test (:obj:`ndarray`): | The classification target for testing Returns: ``neptune.types.File`` object that you can assign to run's ``base_namespace``. Examples: .. code:: python3 import neptune.new.integrations.sklearn as npt_utils rfc = RandomForestClassifier() rfc.fit(X_train, y_train) run = neptune.init(project='my_workspace/my_project') run['visuals/classification_report'] = \ npt_utils.create_classification_report_chart(rfc, X_train, X_test, y_train, y_test) """ assert is_classifier( classifier), 'classifier should be sklearn classifier.' chart = None try: fig, ax = plt.subplots() visualizer = ClassificationReport(classifier, support=True, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() chart = neptune.types.File.as_image(fig) plt.close(fig) except Exception as e: print('Did not log Classification Report chart. Error: {}'.format(e)) return chart
def classification_report(X, y, test_size=0.10, random_state=42): models = [ GaussianNB(), KNeighborsClassifier(), SGDClassifier(), BaggingClassifier(KNeighborsClassifier()), DecisionTreeClassifier(), LinearSVC(penalty="l1", dual=False) ] classes = ["not_passed", "passed"] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=random_state) Reg_len = len(models) i = 0 while i < Reg_len: model = models[i] model.fit(X_train, y_train) visualizer = ClassificationReport(model, classes=classes) visualizer.fit(X_train, y_train) # Fit the visualizer and the model visualizer.score(X_test, y_test) # Evaluate the model on the test data print("Coefficient of Determination: %0.6f" % model.score(X_test, y_test)) g = visualizer.poof() print('') i = i + 1
def visualize_model(X, y, estimator, **kwargs): y = LabelEncoder().fit_transform(y) model = Pipeline([('One_Hot_Encoder', OneHotEncoder()), ('estimator', estimator) ]) visualizer = ClassificationReport(model, classes=['edible', 'poisonous'], cmap='YlOrRd', support='count') visualizer.fit(X, y) visualizer.score(X, y) visualizer.show()
def store_experiment_data(self, X_test, y_test): class_report = ClassificationReport(self.model) score = class_report.score(X_test, y_test) class_report.poof( 'metrics/classification_report.png', clear_figure=True) self.ex.add_artifact('metrics/classification_report.png') confustion_matrix = ConfusionMatrix(self.model) confustion_matrix.score(X_test, y_test) confustion_matrix.poof( 'metrics/confusion_matrix.png', clear_figure=True) self.ex.add_artifact('metrics/confusion_matrix.png') cpd = ClassPredictionError(self.model) cpd.score(X_test, y_test) cpd.poof('metrics/class_prediction_error.png', clear_figure=True) self.ex.add_artifact('metrics/class_prediction_error.png') print('score=', score) self.ex.log_scalar('score', score)
def visual_model_selection(X, y, estimator): """ Function to plot classification report :param X: test set :param y: test set target :param estimator: model to analyze performance :return: plot of the different metrics f1 score, recall, precision """ visualizer = ClassificationReport(estimator, classes=['Low', 'Medium', 'High'], cmap='PRGn') visualizer.fit(X, y) visualizer.score(X, y) visualizer.poof()
def make_gb_report(path="images/classification_report.png"): X_train, X_test, y_train, y_test = make_dataset() _, ax = plt.subplots() bayes = GaussianNB() viz = ClassificationReport(bayes, ax=ax, classes=['unoccupied', 'occupied']) viz.fit(X_train, y_train) viz.score(X_test, y_test) viz.poof(outpath=path)
def log_classification_report_chart(classifier, X_train, X_test, y_train, y_test, experiment=None): """Log classification report chart. Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. Tip: Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example. Args: classifier (:obj:`classifier`): | Fitted sklearn classifier object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The classification target for training y_test (:obj:`ndarray`): | The classification target for testing experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): | Neptune ``Experiment`` object to control to which experiment you log the data. | If ``None``, log to currently active, and most recent experiment. Returns: ``None`` Examples: .. code:: python3 rfc = RandomForestClassifier() rfc.fit(X_train, y_train) neptune.init('my_workspace/my_project') exp = neptune.create_experiment() log_classification_report_chart(rfc, X_train, X_test, y_train, y_test, experiment=exp) """ assert is_classifier(classifier), 'classifier should be sklearn classifier.' exp = _validate_experiment(experiment) try: fig, ax = plt.subplots() visualizer = ClassificationReport(classifier, support=True, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() exp.log_image('charts_sklearn', fig, image_name='Classification Report') plt.close(fig) except Exception as e: print('Did not log Classification Report chart. Error: {}'.format(e))
def yb_classification_report(note, tree_clf, X_test, y_test): print(note) visualizer = ClassificationReport(tree_clf) visualizer.score(X_test, y_test) visualizer.show()
def classreport(): X, y = load_occupancy() X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2) oz = ClassificationReport(GaussianNB(), support=True, ax=newfig()) oz.fit(X_train, y_train) oz.score(X_test, y_test) savefig(oz, "classification_report")
def classification_report(model, classes, X_train, Y_train, X_test, Y_test): from yellowbrick.classifier import ClassificationReport # Instantiate the classification model and visualizer visualizer = ClassificationReport(model, classes=classes, support=True) visualizer.fit(X_train, Y_train) # Fit the visualizer and the model visualizer.score(X_test, Y_test) # Evaluate the model on the test data g = visualizer.poof() # Draw/show/poof the data
def classification_report(self) -> None: """Show precision, recall and F1 score by class """ visualizer = ClassificationReport(self.trained_model, cmap="YlGn", size=(600, 360)) visualizer.fit(self.X_train, self.y_train) visualizer.score(self.X_test, self.y_test) save_dir = f"{self.plots_dir}/classification_report_{self.model_id}.png" visualizer.show(outpath=save_dir) if not LOCAL: upload_to_s3(save_dir, f'plots/classification_report_{self.model_id}.png', bucket=S3_BUCKET_NAME) plt.clf()
def model_selection_and_performance(): st.title('Model Selection and Performance') selected_sampling_type = st.sidebar.selectbox('Select data sampling type', ['No sampling', 'SMOTEENN']) df_prep = load_data_prep() X, X_test, y_test = preprocess(df_prep) del X if selected_sampling_type == 'No sampling': selected_model = st.sidebar.selectbox('Select Model', [ 'Logistic Regression', 'Random Forest', 'SVC', 'XGB', 'Naive bayes', 'All models comparison' ]) if selected_model == 'All models comparison': st.info('ROC Curves comparison') roc_all = cv2.imread('images/base_models_comparison.jpg') st.image(roc_all, use_column_width=True) del roc_all else: model = load_base_models(selected_model) gc.collect() elif selected_sampling_type == 'SMOTEENN': selected_model = st.sidebar.selectbox('Select Model', [ 'Logistic Regression SM', 'Random Forest SM', 'SVC SM', 'XGB SM', 'Naive bayes SM', 'All models comparison' ]) if selected_model == 'All models comparison': st.info('ROC Curves comparison') roc_all = cv2.imread('images/sm_models_comparison.jpg') st.image(roc_all, use_column_width=True) del roc_all else: model = load_sm_models(selected_model) gc.collect() if selected_model != 'All models comparison': fig, ax = plt.subplots() visualizer = ClassificationReport(model, classes=['non-churn', 'churn'], support=True, ax=ax) visualizer.score(X_test, y_test) visualizer.show() st.pyplot(fig) st.info('Confusion Matrix') fig1, ax1 = plt.subplots() plot_confusion_matrix(model, X_test, y_test, ax=ax1) st.pyplot(fig1) st.info('ROC Curve') fig2, ax2 = plt.subplots() plot_roc_curve(model, X_test, y_test, ax=ax2) st.pyplot(fig2) del X_test, y_test, fig, ax, fig1, ax1, fig2, ax2, model gc.collect()
def yellowbrick_visualizations(model, classes, X_tr, y_tr, X_te, y_te): visualizer = ConfusionMatrix(model, classes=classes) visualizer.fit(X_tr, y_tr) visualizer.score(X_te, y_te) visualizer.show() visualizer = ClassificationReport(model, classes=classes, support=True) visualizer.fit(X_tr, y_tr) visualizer.score(X_te, y_te) visualizer.show() visualizer = ROCAUC(model, classes=classes) visualizer.fit(X_tr, y_tr) visualizer.score(X_te, y_te) visualizer.show()
def visual_model_selection(X, y, estimator, path): """ Test various estimators. """ model = Pipeline([('label_encoding', EncodeCategorical(X.keys())), ('one_hot_encoder', OneHotEncoder()), ('estimator', estimator)]) _, ax = plt.subplots() # Instantiate the classification model and visualizer visualizer = ClassificationReport(model, ax=ax, classes=['edible', 'poisonous']) visualizer.fit(X, y) visualizer.score(X, y) visualizer.poof(outpath=path)
def visualize_model(X, y, estimator, **kwargs): """ Test various estimators. """ y = LabelEncoder().fit_transform(y) model = Pipeline([('one_hot_encoder', OneHotEncoder()), ('estimator', estimator)]) # Instantiate the classification model and visualizer visualizer = ClassificationReport(model, classes=['edible', 'poisonous'], cmap="YlGn", size=(600, 360), **kwargs) visualizer.fit(X, y) visualizer.score(X, y) visualizer.poof()
def plot_precision_recall_f1(self, classes=['Won', 'Loss'], display=False): """ Plot Precision Recall F1 # Arguments: - classes: A list of all labels - display: boolean value for showing plot or not; default is False """ self.train() # Instantiate the classification model and visualizer visualizer = ClassificationReport(self.svc_model, classes=classes) visualizer.fit( self.data_train, self.label_train) # Fit the training data to the visualizer visualizer.score( self.data_test, self.label_test) # Evaluate the model on the test data visualizer.poof(outpath=self.cfg['plot_path'] + "linear-svc-report.png") # save the data if display: g = visualizer.poof() # show the data
def plot_classifier_metrics(self): fig, axes = plt.subplots(2, 2, figsize=(12, 8)) visualgrid = [ ConfusionMatrix(self.clf, ax=axes[0][0]), ClassificationReport(self.clf, ax=axes[0][1]), ROCAUC(self.clf, ax=axes[1][0]), ] fig.delaxes(axes[1, 1]) for viz in visualgrid: viz.fit(self.X_train, self.y_train) viz.score(self.X_test, self.y_test) viz.finalize() plt.savefig('../docs/metrics_classifier.png') plt.show()
def classificationreport(clf, classes, X_train, y_train, X_test, y_test): #classes = ['increase','little change', 'decrease'] img = io.BytesIO() #plt.switch_backend('Agg') #plt.style.use('ggplot') visualizer = ClassificationReport(clf, classes=classes, support=True) visualizer.fit(X_train, y_train) # Fit the visualizer and the model visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.show(outpath=img) # Finalize and show the figure plt.figure(figsize=(8, 8)) img.seek(0) graph_url = base64.b64encode(img.getvalue()).decode() return 'data:image/png;base64,{}'.format(graph_url)
def visual_model_selection(X_train, X_test, y_train, y_test, estimator, show_plot=True): """ Takes train and test data sets for both features and target plus an estimator and returns a visual classification report. """ from sklearn.pipeline import Pipeline from yellowbrick.classifier import ClassificationReport #y_train = preprocessing.LabelEncoder().fit_transform(y_train.values.ravel()) #y_test = preprocessing.LabelEncoder().fit_transform(y_test.values.ravel()) model = Pipeline([('estimator', estimator)]) # Instantiate the classification model and visualizer visualizer = ClassificationReport(model, classes=['on-time', 'delayed']) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof() return visualizer.scores
def DTC(X_train, y_train, X_test, y_test): dtc = DecisionTreeClassifier(random_state=2) dtc.fit(X_train, y_train) print("DecisionTreeClassifier:train set") y_pred = dtc.predict(X_train) pred = dtc.predict_proba(X_test) print("DecisionTreeClassifier:Confusion Matrix: ", confusion_matrix(y_train, y_pred)) print("DecisionTreeClassifier:Accuracy : ", accuracy_score(y_train, y_pred) * 100) print("DecisionTreeClassifier:Test set") y_pred = dtc.predict(X_test) print("DecisionTreeClassifier:Confusion Matrix: ", confusion_matrix(y_test, y_pred)) print("DecisionTreeClassifier:Accuracy : ", accuracy_score(y_test, y_pred) * 100) #Confusion Matrix matrix = confusion_matrix(y_test, y_pred) class_names = [0, 1] fig, ax = plt.subplots() tick_marks = np.arange(len(class_names)) plt.xticks(tick_marks, class_names) plt.yticks(tick_marks, class_names) sns.heatmap(pd.DataFrame(matrix), annot=True, cmap="YlGnBu", fmt='g') ax.xaxis.set_label_position("top") plt.tight_layout() plt.title('Confusion matrix', y=1.1) plt.ylabel('Actual label') plt.xlabel('Predicted label') plt.show() #ROC_AUC curve probs = dtc.predict_proba(X_test) probs = probs[:, 1] auc = roc_auc_score(y_test, probs) print('AUC: %.2f' % auc) le = preprocessing.LabelEncoder() y_test1 = le.fit_transform(y_test) fpr, tpr, thresholds = roc_curve(y_test1, probs) plot_roc_curve(fpr, tpr) #Classification Report target_names = ['Yes', 'No'] prediction = dtc.predict(X_test) print(classification_report(y_test, prediction, target_names=target_names)) classes = ["Yes", "No"] visualizer = ClassificationReport(dtc, classes=classes, support=True) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) g = visualizer.poof()
def RF_Model(X,Y,X1,Y1): global acc1 print("___________________________Random Forest__________________________________________") model1=RandomForestClassifier() model1.fit(X,Y) y_pred1 = model1.predict(X1) print("_____________Report___________________") acc1=cal_accuracy(Y1, y_pred1) # print("_____________user input ___________________") #confusion Matrix import matplotlib.pyplot as plt1 matrix =confusion_matrix(Y1, y_pred1) class_names=[0,1] fig, ax = plt.subplots() tick_marks = np.arange(len(class_names)) plt1.xticks(tick_marks, class_names) plt1.yticks(tick_marks, class_names) sns.heatmap(pd.DataFrame(matrix), annot=True, cmap="YlGnBu" ,fmt='g') ax.xaxis.set_label_position("top") plt1.tight_layout() plt1.title('Confusion matrix', y=1.1) plt1.ylabel('Actual label') plt1.xlabel('Predicted label') fig.canvas.set_window_title('RF') plt.show() #ROC_AUC curve probs = model1.predict_proba(X1) probs = probs[:, 1] auc = roc_auc_score(Y1, probs) print('AUC: %.2f' % auc) le = preprocessing.LabelEncoder() y_test1=le.fit_transform(Y1) fpr1, tpr1, thresholds = roc_curve(y_test1, probs) #fig.canvas.set_window_title('XGBoost') plot_roc_curve(fpr1, tpr1) #Classification Report target_names = ['Yes', 'No'] prediction=model1.predict(X1) #print(classification_report(Y1, prediction, target_names=target_names)) classes = ["Yes", "No"] visualizer1 = ClassificationReport(model1, classes=classes, support=True) visualizer1.fit(X, Y) visualizer1.score(X1, Y1) #fig.canvas.set_window_title('XGBoost') g = visualizer1.poof()
def __init__(self, X_train, X_test, y_train, y_test, labels, model, viz_selection, upsampled=False): """ Class for yellowbrick classifier visualizer Args: X_train: numpy ndarray of model features training data values X_test: numpy ndarray of model features test data values y_train: numpy ndarray of model target variable training data values y_test: numpy ndarray of model target variable test data values labels: list of class labels for binary classification model: sklearn estimator for classification viz_selection: string value used to reference yellowbrick classification visualizer upsampled: binary value to determine to which subdirectory output image should be saved """ self.labels = labels self.model = model self.viz_selection = viz_selection self.upsampled = upsampled self.X_train, self.X_test, self.y_train, self.y_test = X_train, X_test, y_train, y_test if self.viz_selection == 'ClassificationReport': self.visualizer = ClassificationReport(self.model, classes=self.labels, support=True) elif self.viz_selection == 'ROCAUC': self.visualizer = ROCAUC(self.model, classes=self.labels, support=True) elif self.viz_selection == 'PrecisionRecallCurve': self.visualizer = PrecisionRecallCurve(self.model) elif self.viz_selection == 'ConfusionMatrix': self.visualizer = ConfusionMatrix(model, classes=self.labels) else: return print( "Error: viz_selection does not match accepted values. View Visualizer Class for accepted values." )
def classifier_report(classifier, X_test, y_test): classes = np.unique(y_test) cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) cm.fit(X_test, y_test) cm.score(X_test, y_test) filename = classifier.__class__.__name__ + '_confusion_matrix.png' cm.poof(outpath=filename, clear_figure=True, kwargs=dict(transparent=False, dpi=80, inches='tight')) ex.add_artifact(filename) visualizer = ClassificationReport(classifier, classes=classes, support=True) visualizer.fit(X_test, y_test) visualizer.score(X_test, y_test) visualizer.poof(outpath="classification_report.png", clear_figure=True, kwargs=dict(transparent=False, dpi=80, inches='tight')) ex.add_artifact('classification_report.png')
def visualize_model(X, y, estimator, path, **kwargs): """ Test various estimators. """ y = LabelEncoder().fit_transform(y) model = Pipeline([("one_hot_encoder", OneHotEncoder()), ("estimator", estimator)]) _, ax = plt.subplots() # Instantiate the classification model and visualizer visualizer = ClassificationReport(model, classes=["edible", "poisonous"], cmap="YlGn", size=(600, 360), ax=ax, **kwargs) visualizer.fit(X, y) visualizer.score(X, y) visualizer.poof(outpath=path)
def ClassReport_Graph(Classif, Data_train, Target_train, Data_test, Target_test, Class, ModelName='Classifier', Accur=False, Predict=None): """ Function imports method to report and analyse predictions from different scikit-learn model implementations INPUT: training examples' features, training examples' outputs, testing examples' features, testing examples' outputs and list with the names of the classes """ try: from yellowbrick.classifier import ClassificationReport if(Accur==True): print((ModelName+" accuracy: %0.4f")%(metrics.accuracy_score(Target_test, Predict, normalize=True))) view_graph = ClassificationReport(Classif, classes=Class, size=(900, 720)) #Object for classification model and visualization view_graph.fit(Data_train, Target_train) # Fit the training data to the visualizer view_graph.score(Data_test, Target_test) # Evaluate the model on the test data graph = view_graph.poof() # Draw/show/poof the data return graph except: print("CLASSIFICATION-REPORT_ERROR\n")
def visual_model_selection(X, y, estimator, path): """ Test various estimators. """ model = Pipeline([ ('label_encoding', EncodeCategorical(X.keys())), ('one_hot_encoder', OneHotEncoder()), ('estimator', estimator) ]) _, ax = plt.subplots() # Instantiate the classification model and visualizer visualizer = ClassificationReport(model, ax=ax, classes=['edible', 'poisonous']) visualizer.fit(X, y) visualizer.score(X, y) visualizer.poof(outpath=path)
def visualize_model(X, y, estimators,pred=False,disc=False, conf=False, bal=False,**kwargs): """ Visualize models using the yellowbrick plotting library. """ # Instantiate the classification model and visualizer visualizer = ClassificationReport( estimators, classes=['Reach, 1 Reach, or L/R Reach', 'Null, Multiple Reaches, Or Multiple Arms'], cmap="YlGn", size=(600, 360), **kwargs ) visualizer.fit(X, y) visualizer.score(X, y) visualizer.show() if pred: class_prediction_errors(X, y, estimators, **kwargs) if disc: discrimination_thresholding(X, y, estimators, **kwargs) if conf: confusion_matrix(X, y, estimators, **kwargs) if bal: plot_class_balance(y, **kwargs)
def classification(fname="classification.png"): # Create side-by-side axes grid _, axes = plt.subplots(ncols=2, figsize=(18,6)) # Add ClassificationReport to the reft data = load_spam(split=True) oz = ClassificationReport(MultinomialNB(), classes=["ham", "spam"], ax=axes[0]) oz.fit(data.X.train, data.y.train) oz.score(data.X.test, data.y.test) oz.finalize() # Add DiscriminationThreshold to the right data = load_spam(split=False) oz = DiscriminationThreshold(LogisticRegression(), ax=axes[1]) oz.fit(data.X, data.y) oz.finalize() # Save figure path = os.path.join(FIGURES, fname) plt.tight_layout() plt.savefig(path)
files=files, data=data, target=target, ) # Load the data and create document vectors corpus = load_corpus('hobbies') tfidf = TfidfVectorizer() docs = tfidf.fit_transform(corpus.data) labels = corpus.target X_train, X_test, y_train, y_test = train_test_split(docs.toarray(), labels, test_size=0.2, random_state=42) visualizer = ClassificationReport(GaussianNB(), classes=corpus.categories) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.poof() visualizer = ClassificationReport(SGDClassifier(), classes=corpus.categories) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.poof() visualizer = ConfusionMatrix(LogisticRegression(), classes=corpus.categories) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.poof() visualizer = ConfusionMatrix(MultinomialNB(), classes=corpus.categories)