Ejemplo n.º 1
0
def show_classification_metrics(pred,
                                label,
                                use_matric=None,
                                label_index=None,
                                matrics_list=None,
                                display=True):
    """calc metrics for classification model using PYCM

    Args:
        pred (numpy.array) : pred label for each batch (batch_size X number of pred classes)
        label (numpy.array) : label for each batch (batch_size X number of classes)
        label_index (dict) : class name (default=None)
        matrics_list (list) : add matrics name (refer to the pycm metrics list) (defailt=None)
        display (bool) :  Whether to print the overall result (default=True)
    
    Returns:
        metrics (dict) : contain the 2-level result (overall_stat, class_stat)

    """
    # pred = pred.reshape(-1)
    # label = label.reshape(-1)

    cm = ConfusionMatrix(pred, label)
    if label_index is not None:
        cm.relabel(mapping=label_index)

    default_matrics_list = cm.recommended_list
    if matrics_list is not None:
        default_matrics_list.extend(matrics_list)

    if display:
        cm.stat(summary=True)
        print("[Matrix]")
        cm.print_matrix()
        print("[Normalized Matrix]")
        cm.print_normalized_matrix()

    overall_stat = cm.overall_stat
    class_stat = cm.class_stat

    filter_overall_stat = {
        k: v
        for k, v in overall_stat.items() if k in default_matrics_list
    }
    filter_class_stat = {
        k: v
        for k, v in class_stat.items() if k in default_matrics_list
    }
    output = dict()
    output["overall_stat"] = filter_overall_stat
    output["class_stat"] = filter_class_stat
    return output
Ejemplo n.º 2
0
features = ['CATEGORY', 'TITLE']

train_df = pd.read_table('./data/train.txt', header=None, names=features)
test_df = pd.read_table('./data/test.txt', header=None, names=features)

clf = pickle.load(open('./models/52lr.pickle', mode='rb'))
vectorizer = pickle.load(open('./models/51vectorizer.pickle', mode='rb'))
le = pickle.load(open('./models/52le.pickle', mode='rb'))
sc = pickle.load(open('./models/52sc.pickle', mode='rb'))

X_train_sparse = load_npz("./feature/train.feature.npz")
X_valid_sparse = load_npz("./feature/valid.feature.npz")
X_test_sparse = load_npz("./feature/test.feature.npz")
X_train, X_valid, X_test = X_train_sparse.toarray(), \
    X_valid_sparse.toarray(), X_test_sparse.toarray()

#訓練データでの混同行列
labels_train_pred = clf.predict(sc.transform(X_train))
labels_train_true = le.transform(train_df['CATEGORY'].values)
cm_train = ConfusionMatrix(le.inverse_transform(labels_train_true)\
    , le.inverse_transform(labels_train_pred))
print('By training data\n')
cm_train.print_matrix()

#評価データでの混同行列
labels_test_pred = clf.predict(sc.transform(X_test))
labels_test_true = le.transform(test_df['CATEGORY'].values)
cm_test = ConfusionMatrix(le.inverse_transform(labels_test_true)\
    , le.inverse_transform(labels_test_pred))
print('By test data\n')
cm_test.print_matrix()
Ejemplo n.º 3
0
        rf_clf = deepcopy(clf.best_estimator_)

        print("RF params:", len(rf_clf.estimators_))




        ###################
        # Error Evaluations
        ###################

        print("RF Ntrain %s RnSeed %s Train Score %s" %(ntrain, rnseed, rf_clf.score(X_train, y_train)))
        pred = rf_clf.predict(X_train)
        print('Unweighted Accuracy',accuracy(y_pred=pred,y_true=y_train))
        print('Weighted Accuracy',bal_score(y_pred=pred,y_true=y_train))

        cm = ConfusionMatrix(actual_vector=y_train, predict_vector=pred)
        cm.print_matrix()


        print("RF Ntrain %s RnSeed %s Test Score %s" %(ntrain, rnseed, rf_clf.score(X_test, y_test)))
        pred = rf_clf.predict(X_test)
        print('Unweighted Accuracy',accuracy(y_pred=pred,y_true=y_test))
        print('Weighted Accuracy',bal_score(y_pred=pred,y_true=y_test))

        cm = ConfusionMatrix(actual_vector=y_test, predict_vector=pred)
        cm.print_matrix()
        print(cm)

        sys.stdout.flush()
Ejemplo n.º 4
0
def showStats(true_labels,
              predictions,
              labels_dir,
              showExtraStats,
              writeToFile=''):
    """ 
	Prints various statistics about model preformance.

	Arguments:

	true_labels: true labels of the predictions 

	predictions: predicted labels

	labels_dir: directory to text file of labels to integers

	Returns:

	N/A 
	"""

    #Print basic info
    print("Labels:")
    label_dict = fileToLabelDict(labels_dir)
    print(label_dict)
    print("\nPredictions:")
    print(predictions)
    print("\nActual:")
    print(true_labels)
    print("\n")

    #Builds confusion matrix and additional stats
    my_inverted_dict = {}
    for elem in label_dict.keys():
        my_inverted_dict[elem] = int(label_dict[elem])
    my_inverted_dict = dict(
        zip(my_inverted_dict.values(), my_inverted_dict.keys()))
    cm = ConfusionMatrix(actual_vector=true_labels, predict_vector=predictions)
    cm.relabel(mapping=my_inverted_dict)
    cm.print_matrix()
    if showExtraStats:
        #print("Micro F1 Score: ",cm.overall_stat['F1 Micro'])
        #print("Macro F1 Score: ",cm.overall_stat['F1 Macro'])
        #print("Cross Entropy: ",cm.overall_stat['Cross Entropy'])
        #print("95% CI: ",cm.overall_stat['95% CI'])
        print("AUC: ", cm.AUC)
        print("AUC quality:", cm.AUCI)
    #Outputs to output.txt
    if writeToFile == '':
        pass
    else:
        with open(writeToFile, 'w') as f:
            f.write("Labels:\n\n")
            f.write(str(label_dict))
            f.write("\n\nPredictions::\n\n")
            f.write(str(predictions))
            f.write("\n\nActual:\n\n")
            f.write(str(true_labels))
            f.write("\n\n")
            f.write(dictToString(cm.matrix))
            f.write("\n\n")
            f.write("AUC: \n\n")
            f.write(str(cm.AUC))
            f.write("AUC Quality: \n\n")
            f.write(str(cm.AUCI))
            f.write("\n\n")