Ejemplo n.º 1
0
def get_classification_reports(models, model_names, X_train, X_test, y_train,
                               y_test):
    """
    Inputs: list of fit models, list of model names, training data, and testing data. Note: data should be pre-processed (scaled etc).
    Outputs a classification report for specified model. Models are fit to training set and scored on testing set.
    Note: requires binarized data.
    """

    for name, model in list(zip(model_names, models)):
        print(name)
        print('\n')

        # Accounting for one vs rest target data
        if y_train.shape[1] != 1:
            model = OneVsRestClassifier(model)

        # fit model
        if model == xgb:
            xgb_param = model.get_xgb_params()
            xgb_param['num_class'] = 3
        model.fit(X_train, y_train)

        # Predicting on test set
        y_predict = model.predict(X_test)

        # Classification report for model: Precision, recall, F1-score, support for each class,
        # as well as averages for these metrics.
        print(classification_report(y_test, y_predict))

        print('-' * 60)
        print('\n')
Ejemplo n.º 2
0
def get_ROC_curve(models, model_names, X_train, X_test, y_train, y_test):
    """
    Inputs: list of fit models, list of model names, training data, and testing data. Note: data should be pre-processed (scaled etc).
    Outputs a graph of n ROC curves for each model, where n=num classes. Models are fit to training set and scored on testing set.
    Note: requires binarized data.
    """
    for model, name in list(zip(models, model_names)):
        # print(name)
        # print('\n')

        # Multiple classes?
        if y_train.shape[1] != 1:
            model = OneVsRestClassifier(model)

        # fit model
        if model == xgb:
            xgb_param = model.get_xgb_params()
            xgb_param['num_class'] = 3
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)

        # Get class probabilities
        probs = model.predict_proba(X_test)

        # getting rates by class
        n_classes = y_train.shape[1]

        fpr = dict()
        tpr = dict()
        roc_auc = dict()

        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_test[:, i], probs[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
            classes = ['Bad', 'Good', 'Neutral']
            plt.plot(fpr[i],
                     tpr[i],
                     label="{}, area: {}".format(classes[i],
                                                 round(roc_auc[i], 2)))

        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.0])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC Curves by class for {}'.format(name))
        plt.legend()
        plt.show()
Ejemplo n.º 3
0
def get_scores(models, model_names, X_train, X_test, y_train, y_test):
    ## run model to get class reports
    # requires binarized data
    for model, name in list(zip(models, model_names)):
        print(name)
        print('\n')

        # Accounting for one vs rest target data
        if y_train.shape[1] != 1:
            model = OneVsRestClassifier(model)

        # fit model
        if model == xgb:
            xgb_param = model.get_xgb_params()
            xgb_param['num_class'] = 3
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)


        accuracies_across_models, precisions_across_models, recalls_across_models, f1s_across_models = [], [], [], []

        acc = accuracy_score(y_test, y_pred)
        pre = precision_score(y_test, y_pred, average="weighted")
        rec = recall_score(y_test, y_pred, average="weighted")
        f1 = f1_score(y_test, y_pred, average="weighted")

        accuracies_across_models.append(acc)
        precisions_across_models.append(pre)
        recalls_across_models.append(rec)
        f1s_across_models.append(f1)

        print(f'Accuracy:  {np.mean(acc):.3f} +- {np.std(acc):3f}')
        print(f'Precision:  {np.mean(pre):.3f} +- {np.std(pre):3f}')
        print(f'Recall:  {np.mean(rec):.3f} +- {np.std(rec):3f}')
        print(f'f1-score:  {np.mean(f1):.3f} +- {np.std(f1):3f}')

        print('-' * 60)
        print('\n')