def calculate_accuracy(csv_filename):

    # Loading csv information into a data frame
    data = pd.read_csv(csv_filename)
    # assigning actual sentiment data to y_test
    y_test = data['Actual_Statement']
    # assigning predicted sentiment data to y_pred
    y_pred = data['Prediction']

    score = accuracy_score(y_test, y_pred)
    # calling accuracy_score method to get the accuracy_score
    print 'Accuracy Score : ', score

    # calling confusion_matrix method from pandas_ml to show the output
    confusion_matrix = ConfusionMatrix(y_test, y_pred)
    output = confusion_matrix.to_dataframe()

    writer = pd.ExcelWriter("azure_text_confusion_matrix_output.xlsx")
    output.to_excel(writer, startrow=4, startcol=0)
    Acuracy_Score = 'Accuracy Score : ' + str(score)
    worksheet = writer.sheets['Sheet1']
    worksheet.write(1, 0, Acuracy_Score)

    writer.save()

    print("Confusion matrix:\n%s" % confusion_matrix)
Beispiel #2
0
    def validate_epoch(self, val_model, epoch_cm):
        """
        Computes the batch validation confusion matrix
        and then updates the epoch confusion matrix.
        """
        # Loop through validation set
        for n in range(self.validation_steps):

            # Grab next batch
            X, y_true, _ = next(self.validation_data)

            # Make prediction with model
            y_pred = val_model([X])[0]

            # Find highest classes prediction
            y_true = np.argmax(y_true, axis=-1)
            y_pred = np.argmax(y_pred, axis=-1)

            # Flatten batch into single array
            y_true = np.ndarray.flatten(y_true)
            y_pred = np.ndarray.flatten(y_pred)

            # Create batch CM
            batch_cm = ConfusionMatrix(y_true, y_pred)

            # Get all classes in batch
            all_classes = list(batch_cm.classes)

            batch_cm = batch_cm.to_array()

            # Update epoch CM
            for i in all_classes:
                for j in all_classes:
                    epoch_cm[i, j] += batch_cm[all_classes.index(i), all_classes.index(j)]
Beispiel #3
0
def compute_metrics(task_name, preds, labels):
    assert len(preds) == len(labels)
    if task_name == "cola":
        return {"mcc": matthews_corrcoef(labels, preds)}
    elif task_name == "sst-2":
        return {"acc": simple_accuracy(preds, labels)}
    elif task_name == "mrpc":
        return acc_and_f1(preds, labels)
    elif task_name == "sts-b":
        return pearson_and_spearman(preds, labels)
    elif task_name == "qqp":
        return acc_and_f1(preds, labels)
    elif task_name == "mnli":
        return {"acc": simple_accuracy(preds, labels)}
    elif task_name == "mnli-mm":
        return {"acc": simple_accuracy(preds, labels)}
    elif task_name == "qnli":
        return {"acc": simple_accuracy(preds, labels)}
    elif task_name == "rte":
        return {"acc": simple_accuracy(preds, labels)}
    elif task_name == "wnli":
        return {"acc": simple_accuracy(preds, labels)}
    elif task_name == "sa" or task_name == 'sa_csv':
        from pandas_ml import ConfusionMatrix
        pcm = ConfusionMatrix(labels, preds)
        pcm.print_stats()
        precision, recall, f1, _ = precision_recall_fscore_support(
            labels, preds, average='weighted')
        #return {"acc": simple_accuracy(preds, labels)}
        return {"acc": pcm.stats_overall['Accuracy']}
    elif task_name == "arg_mining":
        return {}
    else:
        raise KeyError(task_name)
    def test_pandas_confusion_binary_cm_inverse(self):
        y_true = [True, True, False, False, False, True, False, True, True,
                  False, True, False, False, False, False, False, True, False,
                  True, True, True, True, False, False, False, True, False,
                  True, False, False, False, False, True, True, False, False,
                  False, True, True, True, True, False, False, False, False,
                  True, False, False, False, False, False, False, False, False,
                  False, True, True, False, True, False, True, True, True,
                  False, False, True, False, True, False, False, True, False,
                  False, False, False, False, False, False, False, True, False,
                  True, True, True, True, False, False, True, False, True,
                  True, False, True, False, True, False, False, True, True,
                  False, False, True, True, False, False, False, False, False,
                  False, True, True, False]

        y_pred = [False, False, False, False, False, True, False, False, True,
                  False, True, False, False, False, False, False, False, False,
                  True, True, True, True, False, False, False, False, False,
                  False, False, False, False, False, True, False, False, False,
                  False, True, False, False, False, False, False, False, False,
                  True, False, False, False, False, False, False, False, False,
                  False, True, False, False, False, False, False, False, False,
                  False, False, True, False, False, False, False, True, False,
                  False, False, False, False, False, False, False, True, False,
                  False, True, False, False, False, False, True, False, True,
                  True, False, False, False, True, False, False, True, True,
                  False, False, True, True, False, False, False, False, False,
                  False, True, False, False]

        binary_cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(binary_cm, pdml.confusion_matrix.BinaryConfusionMatrix)
        bcm_sum = binary_cm.sum()

        binary_cm_r = binary_cm.inverse()  # reverse not in place
        assert bcm_sum == binary_cm_r.sum()
    def test_pandas_confusion_binary_cm_inverse(self):
        y_true = [True, True, False, False, False, True, False, True, True,
                  False, True, False, False, False, False, False, True, False,
                  True, True, True, True, False, False, False, True, False,
                  True, False, False, False, False, True, True, False, False,
                  False, True, True, True, True, False, False, False, False,
                  True, False, False, False, False, False, False, False, False,
                  False, True, True, False, True, False, True, True, True,
                  False, False, True, False, True, False, False, True, False,
                  False, False, False, False, False, False, False, True, False,
                  True, True, True, True, False, False, True, False, True,
                  True, False, True, False, True, False, False, True, True,
                  False, False, True, True, False, False, False, False, False,
                  False, True, True, False]

        y_pred = [False, False, False, False, False, True, False, False, True,
                  False, True, False, False, False, False, False, False, False,
                  True, True, True, True, False, False, False, False, False,
                  False, False, False, False, False, True, False, False, False,
                  False, True, False, False, False, False, False, False, False,
                  True, False, False, False, False, False, False, False, False,
                  False, True, False, False, False, False, False, False, False,
                  False, False, True, False, False, False, False, True, False,
                  False, False, False, False, False, False, False, True, False,
                  False, True, False, False, False, False, True, False, True,
                  True, False, False, False, True, False, False, True, True,
                  False, False, True, True, False, False, False, False, False,
                  False, True, False, False]

        binary_cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(binary_cm, pdml.confusion_matrix.BinaryConfusionMatrix)
        bcm_sum = binary_cm.sum()

        binary_cm_r = binary_cm.inverse()  # reverse not in place
        assert bcm_sum == binary_cm_r.sum()
Beispiel #6
0
    def test(self, test, test_targets, pdconf=False, filename="", legend=None):

        if self.net_type == "classification":
            pred = self.forward_classification(test)
            acc = self.cal_acc(pred, test_targets)
            conf = self.confusion_table(pred, test_targets)
            if pdconf:
                temp_pred = self.predict(test)
                if legend != None:
                    predict = np.empty(len(temp_pred))
                    targets = np.empty(len(test_targets))
                    for i in range(len(targets)):
                        predict[i] = legend[np.argmax(temp_pred[i])]
                        targets[i] = legend[np.argmax(test_targets[i])]
                confus = ConfusionMatrix(targets, predict, display_sum=True)
        elif self.net_type == "regression":
            pred = self.forward_regression(test)
            r2 = self.cal_r2(pred, test_targets)

        err = self.cal_err(pred, test_targets, self.cost_function)

        print("The test error is: ", err)

        if self.net_type == "classification":
            print("The test accuracy is: ", acc)
            print("Confusion matrix:")
            print(conf)
            if pdconf:
                confus.plot(backend="seaborn")
                plt.savefig(filename)
                plt.clf()
            return err, acc, conf
        elif self.net_type == "regression":
            print("The test R2-score is: ", r2)
            return err, r2
Beispiel #7
0
    def save_confusion_matrix(self, truth_res, pred_res):

        #truth_res = [self.label_map[i+1] for i in truth_res]
        #pred_res = [self.label_map[i+1] for i in pred_res]
        '''
        print(len(truth_res))
        print(len(pred_res))
        confusion_matrix = ConfusionMatrix(truth_res, pred_res)
        plt.figure(dpi=200, figsize=(10, 7))
        confusion_matrix.plot()
        plt.savefig(self.confusion_matrix_file_path)
        '''
        s = sklearn.metrics.confusion_matrix(truth_res, pred_res)
        list_label = self.label_map[1:]
        df_cm = pd.DataFrame(data=s, columns=list_label, index=list_label)
        plt.figure(dpi=100)

        heatmap = sns.heatmap(df_cm, annot=True, fmt='d')
        heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(),
                                     rotation=70,
                                     ha='right',
                                     fontsize=5)
        heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(),
                                     rotation=20,
                                     ha='right',
                                     fontsize=5)

        plt.savefig(self.confusion_matrix_file_path)

        confusion_matrix = ConfusionMatrix(truth_res, pred_res)
        confusion_matrix.print_stats()
def random_forest():
    l=1
    if(l==1):
        print("------------------------RANDOM FOREST-----------------------")
        df = pd.read_csv(var.get(), low_memory=False)
        df = df.sample(frac=1).reset_index(drop=True)
        frauds = df.loc[df['Class'] == 1]
        non_frauds = df.loc[df['Class'] == 0]
        print("\nWe have", len(frauds), "fraud data points and", len(non_frauds), "nonfraudulent data points.")
        X = df.iloc[:,:-1]
        y = df['Class']

        print("X and y sizes, respectively:", len(X), len(y))
    
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35)
        print("Train and test sizes, respectively:", len(X_train), len(y_train), "|", len(X_test), len(y_test))
        print("Total number of frauds:", len(y.loc[df['Class'] == 1]))
        print("Number of frauds on y_test:", len(y_test.loc[df['Class'] == 1]))
        print("Number of frauds on y_train:", len(y_train.loc[df['Class'] == 1]))
        clf= RandomForestClassifier()
        clf.fit(X_train, y_train)
        y_predicted1 =np.array(clf.predict(X_test))
        y_right1=np.array(y_test)
        confusion_matrix1=ConfusionMatrix(y_right1,y_predicted1)
        print("\n\nConfusion matrix:\n%s" % confusion_matrix1)
        #confusion_matrix1.plot(normalized=True)
        T = Text(root, height=60, width=60)
        T.pack(pady=20,side=BOTTOM, fill=Y)
        for l in confusion_matrix1.stats():
            T.insert(END,[l,confusion_matrix1.stats()[l]])
            T.insert(END,"\n")
        d['ACC'].append(confusion_matrix1.stats()['ACC']*100)
        d['TPR'].append(confusion_matrix1.stats()['TPR']*100)
        fpr,tpr,thresholds=roc_curve(y_right1, y_predicted1)
        aucarr['auc'].append(auc(fpr,tpr))
def logistic_regression():
    print("------------------------LOGISTIC REGRESSION-----------------------")
    df = pd.read_csv(var.get(), low_memory=False)
    df = df.sample(frac=1).reset_index(drop=True)
    frauds = df.loc[df['Class'] == 1]
    non_frauds = df.loc[df['Class'] == 0]
    print("\n")
    print("We have", len(frauds), "fraud data points and", len(non_frauds), "nonfraudulent data points.\n")
    X = df.iloc[:,:-1]
    y = df['Class']
    print("X and y sizes, respectively:", len(X), len(y))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35)
    '''print("\nTrain and test sizes, respectively:", len(X_train), len(y_train), "|", len(X_test), len(y_test))
    print("Total number of frauds:", len(y.loc[df['Class'] == 1]))
    print("Number of frauds on y_test:", len(y_test.loc[df['Class'] == 1]))
    print("Number of frauds on y_train:", len(y_train.loc[df['Class'] == 1]))'''
    logistic = linear_model.LogisticRegression(C=1e5)
    logistic.fit(X_train, y_train)
    print("\nScore: ", logistic.score(X_test, y_test))
    y_predicted = np.array(logistic.predict(X_test))
    y_right = np.array(y_test)
    confusion_matrix = ConfusionMatrix(y_right, y_predicted)
    print("\n\nConfusion matrix:\n%s" % confusion_matrix)
    #confusion_matrix.plot(normalized=True)
    T = Text(root, height=60, width=60)
    T.pack(pady=20,side=BOTTOM, fill=Y)
    for l in confusion_matrix.stats():
        T.insert(END,[l,confusion_matrix.stats()[l]])
        T.insert(END,"\n")
    d['ACC'].append(confusion_matrix.stats()['ACC']*100)
    d['TPR'].append(confusion_matrix.stats()['TPR']*100)
    fpr,tpr,thresholds=roc_curve(y_right, y_predicted)
    aucarr['auc'].append(auc(fpr,tpr))
def process_results(mode, file, thrshld):
    threshold = thrshld
    with open(file) as json_file:  
        data = json.load(json_file)
        accuracy = 0.0
        actual = []
        predicted = []
        for p in data['results']:
            labellingScore = int(p['labellingScore']) 
            score = float(p['score'])
            if labellingScore == 1 and score > threshold:
                accuracy = accuracy + 1
            elif labellingScore == 0 and score < threshold:
                accuracy = accuracy + 1
            if labellingScore == 1:
                actual.append(1)
            else:
                actual.append(0)
            if score > threshold:
                predicted.append(1)
            else:
                predicted.append(0)
    if mode is 1:
        cm = ConfusionMatrix(actual, predicted)
        cm.print_stats()
    return accuracy/len(data['results'])
def test_value_counts():
    df = pd.DataFrame({
        'Height': [
            150, 150, 151, 151, 152, 155, 155, 157, 157, 157, 157, 158, 158,
            159, 159, 159, 160, 160, 162, 162, 163, 164, 165, 168, 169, 169,
            169, 170, 171, 171, 173, 173, 174, 176, 177, 177, 179, 179, 179,
            179, 179, 181, 181, 182, 183, 184, 186, 190, 190
        ],
        'Weight': [
            54, 55, 55, 47, 58, 53, 59, 60, 56, 55, 62, 56, 55, 55, 64, 61, 59,
            59, 63, 66, 64, 62, 66, 66, 72, 65, 75, 71, 70, 70, 75, 65, 79, 78,
            83, 75, 84, 78, 74, 75, 74, 90, 80, 81, 90, 81, 91, 87, 100
        ],
        'Size': [
            'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S',
            'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M',
            'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
            'L', 'L', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL'
        ],
        'SizePred': [
            'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S',
            'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M',
            'M', 'M', 'M', 'M', 'L', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
            'L', 'L', 'XL', 'L', 'L', 'XL', 'L', 'XL', 'XL', 'XL'
        ],
    })
    cm = ConfusionMatrix(df["Size"], df["SizePred"])
    assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

    assert (cm.true - df.Size.value_counts()).sum() == 0
    assert (cm.pred - df.SizePred.value_counts()).sum() == 0
    cm.print_stats()
Beispiel #12
0
    def confusion_matrix(self, ground_truth, predictions, display=True):
        matrix = ConfusionMatrix(ground_truth, predictions)
        if display == True:
            print("Confusion matrix:\n%s" % matrix)

        if self.save_plots == True:
            matrix.plot()
            plt.savefig(self.evaluation_path)
 def test_pandas_confusion_cm_int(self):
     y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
     y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]
     labels = ["ant", "bird", "cat"]
     cm = ConfusionMatrix(y_true, y_pred, labels=labels)
     assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)
     print("Confusion matrix:\n%s" % cm)
     asserts(y_true, y_pred, cm)
     assert cm.len() == len(labels)
def test_pandas_confusion_cm_int():
    y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]
    labels = ["ant", "bird", "cat"]
    cm = ConfusionMatrix(y_true, y_pred, labels=labels)
    assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)
    print("Confusion matrix:\n%s" % cm)
    asserts(y_true, y_pred, cm)
    assert cm.len() == len(labels)
    def test_pandas_confusion_normalized(self):
        y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
        y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]
        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        df = cm.to_dataframe()
        df_norm = cm.to_dataframe(normalized=True)
        assert(df_norm.sum(axis=1).sum() == len(df))
def test_pandas_confusion_normalized():
    y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]
    cm = ConfusionMatrix(y_true, y_pred)
    assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

    df = cm.to_dataframe()
    df_norm = cm.to_dataframe(normalized=True)
    assert (df_norm.sum(axis=1).sum() == len(df))
Beispiel #17
0
def run():
    start_time = time()
    data_cand, data_part, full_data = load_data()
    # numeric_parties  = full_data.party.map(party_map)
    train_c, test_c = train_test_split(data_cand, test_size=0.2)
    train_p, test_p = train_test_split(data_part, test_size=0.2)
    candidatos_clf = Classifier(train_c.drop('candidatoId', axis=1), train_c.candidatoId)
    partidos_clf = Classifier(train_p.drop('idPartido', axis=1), train_p.idPartido)

    cand_solver = candidatos_clf._predict()
    n_cand, pca_cand_solver = candidatos_clf._pca()
    part_solver = partidos_clf._predict()
    n_part, pca_part_solver = partidos_clf._pca()

    cand_pred = candidatos_clf.classify(test_c.drop('candidatoId', axis=1), test_c.candidatoId, cand_solver)
    pca_cand_pred = candidatos_clf.classify(test_c.drop('candidatoId', axis=1), test_c.candidatoId, pca_cand_solver, n_cand)
    part_pred = partidos_clf.classify(test_p.drop('idPartido', axis=1), test_p.idPartido, part_solver)
    pca_part_pred = partidos_clf.classify(test_p.drop('idPartido', axis=1), test_p.idPartido, pca_part_solver, n_part)

    output_results(f'CANDIDATOS | {cand_solver}', test_c.candidatoId, cand_pred)
    output_results(f'CANDIDATOS_PCA | {pca_cand_solver}, {n_cand}', test_c.candidatoId, pca_cand_pred)
    output_results(f'PARTIDOS | {part_solver}', test_p.idPartido, part_pred)
    output_results(f'PARTIDOS_PCA | {pca_part_solver}, {n_part}', test_p.idPartido, pca_part_pred)
    cand_part_target, cand_part_pred = candidato_mapper(test_c.candidatoId, cand_pred)
    output_results(f'PARTIDOS CON CANDIDATO | {cand_solver}', cand_part_target, cand_part_pred)

    cm_cand = ConfusionMatrix(test_c.candidatoId, cand_pred)
    cm_pca_cand = ConfusionMatrix(test_c.candidatoId, pca_cand_pred)
    cm_part = ConfusionMatrix(test_p.idPartido, part_pred)
    cm_pca_part = ConfusionMatrix(test_p.idPartido, pca_part_pred)
    cm_cand_part = ConfusionMatrix(cand_part_target, cand_part_pred)

    elapsed_time = time() - start_time
    print(f'----------------------------------------')
    print(f'TOTAL TIME: {datetime.timedelta(seconds=elapsed_time)}')

    result = {
        'data': {
            'candidatos': (train_c, test_c),
            'partidos': (train_p, test_p),
        },
        'results': {
            'candidatos': (test_c.candidatoId, cand_pred),
            'candidatos_pca': (test_c.candidatoId, pca_cand_pred),
            'partidos': (test_p.idPartido, part_pred),
            'partidos_pca': (test_p.idPartido, pca_part_pred),
            'partidos_candidatos': (cand_part_target, cand_part_pred)
        },
        'matrices': {
            'candidatos': cm_cand,
            'candidatos_pca': cm_pca_cand,
            'partidos': cm_part,
            'partidos_pca': cm_pca_part,
            'partidos_candidatos': cm_cand_part
        }
    }
    return result
Beispiel #18
0
def confusion_matrix(ground_truth,
                     predictions,
                     display=True,
                     output_path=None):
    matrix = ConfusionMatrix(ground_truth, predictions)
    if display is True: print("Confusion matrix:\n%s" % matrix)

    if output_path is not None:
        matrix.plot()
        plt.savefig(output_path)
def plot_confusion_matrix(clf, X_best_test, y_best_test, matrix_name):
    # Doing the confusion matrix for the best K-validated training set
    y_best_pred = clf.predict(X_best_test)
    confusion_matrix = ConfusionMatrix(y_best_test, y_best_pred)
    #print("Confusion matrix:\n{}".format(confusion_matrix))
    confusion_matrix.plot(normalized=True)
    plt.savefig('confusion_matrixes/K-fold_matrix_{}.png'.format(matrix_name))
    print('Saved Confusion matrix of the previous test to confusion_matrixes/K-fold_matrix_{}.png\n'.format(matrix_name))

    return confusion_matrix
    def test_pandas_confusion_cm_stats_integers(self):
        y_true = [600, 200, 200, 200, 200, 200, 200, 200, 500, 500, 500, 200, 200, 200, 200, 200, 200, 200, 200, 200]
        y_pred = [100, 200, 200, 100, 100, 200, 200, 200, 100, 200, 500, 100, 100, 100, 100, 100, 100, 100, 500, 200]
        print("y_true: %s" % y_true)
        print("y_pred: %s" % y_pred)
        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert isinstance(cm.stats(), OrderedDict)
        cm.print_stats()
    def test_pandas_confusion_cm_stats_integers(self):
        y_true = [600, 200, 200, 200, 200, 200, 200, 200, 500, 500, 500, 200, 200, 200, 200, 200, 200, 200, 200, 200]
        y_pred = [100, 200, 200, 100, 100, 200, 200, 200, 100, 200, 500, 100, 100, 100, 100, 100, 100, 100, 500, 200]
        print("y_true: %s" % y_true)
        print("y_pred: %s" % y_pred)
        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert isinstance(cm.stats(), OrderedDict)
        cm.print_stats()
Beispiel #22
0
def metrics(y_true, y_pred, y_pred_proba=False):
    target_names = ['KEEP', 'UP', 'DOWN']

    if y_pred_proba is not False:
        print('Cross Entropy: {}'.format(log_loss(y_true, y_pred_proba)))
    print('Accuracy: {}'.format(accuracy_score(y_true, y_pred)))
    print('Coefficient Kappa: {}'.format(cohen_kappa_score(y_true, y_pred)))
    print('Report: {}'.format(
        classification_report(y_true, y_pred, target_names=target_names)))
    cm = ConfusionMatrix(y_true.tolist(), y_pred.tolist())
    cm.print_stats()
    def test_pandas_confusion_max_min(self):
        y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit']
        y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit']
        print("y_true: %s" % y_true)
        print("y_pred: %s" % y_pred)

        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert cm.max() == 3
        assert cm.min() == 0
    def test_pandas_confusion_max_min(self):
        y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit']
        y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit']
        print("y_true: %s" % y_true)
        print("y_pred: %s" % y_pred)

        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert cm.max() == 3
        assert cm.min() == 0
Beispiel #25
0
def plotconfusion(truth, predictions):
    """
    Function to plot the confusion fuction between the
    truth and predictions array.

    :type truth: np array
    :type predictions: np array
    """
    cm = ConfusionMatrix(truth, predictions)
    _ = plt.figure(figsize=(10, 10))
    _ = cm.plot()
    _ = plt.show()
def test_pandas_confusion_cm_empty_row():
    y_true = [2, 0, 2, 2, 0, 0]
    y_pred = [0, 0, 2, 2, 1, 2]
    # cm = LabeledConfusionMatrix(y_true, y_pred)
    cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"])
    assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

    cm = ConfusionMatrix(y_true, y_pred)
    assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

    print("Confusion matrix:\n%s" % cm)
    asserts(y_true, y_pred, cm)
    def test_value_counts(self):
        df = pd.DataFrame({
            'Height': [150, 150, 151, 151, 152, 155, 155, 157, 157, 157, 157, 158, 158, 159, 159, 159, 160, 160, 162, 162, 163, 164, 165, 168, 169, 169, 169, 170, 171, 171, 173, 173, 174, 176, 177, 177, 179, 179, 179, 179, 179, 181, 181, 182, 183, 184, 186, 190, 190],
            'Weight': [54, 55, 55, 47, 58, 53, 59, 60, 56, 55, 62, 56, 55, 55, 64, 61, 59, 59, 63, 66, 64, 62, 66, 66, 72, 65, 75, 71, 70, 70, 75, 65, 79, 78, 83, 75, 84, 78, 74, 75, 74, 90, 80, 81, 90, 81, 91, 87, 100],
            'Size': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL'],
            'SizePred': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'L', 'L', 'XL', 'L', 'XL', 'XL', 'XL'],
        })
        cm = ConfusionMatrix(df["Size"], df["SizePred"])
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert (cm.true - df.Size.value_counts()).sum() == 0
        assert (cm.pred - df.SizePred.value_counts()).sum() == 0
        cm.print_stats()
Beispiel #28
0
def get_accuracy(results, array):
    act_arr = array
    predicted_arr = results
    confusion_matrix = ConfusionMatrix(act_arr, predicted_arr)

    print("Confusion matrix:\n%s" % confusion_matrix)
    confusion_matrix.plot()
    plt.show()
    print("\n")
    a = perf_measure(act_arr, predicted_arr)
    b = close_measure(act_arr, predicted_arr)
    print("The Accuracy is: " + str(a * 100) + "%")
    print("The close range Accuracy is: " + str(b * 100) + "%")
Beispiel #29
0
 def confusion_matrix(self):
     df = DataFrame(index=range(0, 50), columns=['true', 'predict'])
     for i in range(0, 50):
         song = song_list[i]['song']
         true_genre = str(song.genre)
         run_all(rule_list=self.rules,
                 defined_variables=SongVariables(song),
                 defined_actions=SongActions(song),
                 stop_on_first_trigger=False)
         df.loc[i] = [true_genre, song.genre]
     cnf_matrix = ConfusionMatrix(df['true'], df['predict'])
     cnf_matrix.plot()
     plt.show()
Beispiel #30
0
def test(x_test, y_test):
    """
    Tests the network to see how well the network has trained

    :param x_test: input to the test function
    :type x_test: list
    :param y_test: labels for the test function
    :type y_test: list
    :return: None
    """
    print("Testing Now")
    with tf.Session(graph=train_graph) as sess:
        checkpoint = "./saves/best_model.ckpt"
        all_preds = []

        # with tf.Session() as sess:
        saver = tf.train.Saver()
        # Load the model
        saver.restore(sess, checkpoint)
        state = sess.run(graph.initial_state)
        print("Total Batches: %d" % (len(x_test) // args.batch_size))
        for ii, x in enumerate(
                utils.get_test_batches(x_test, args.batch_size,
                                       tokenizer.word2int), 1):
            if ii % 100 == 0:
                print("%d batches" % ii)
            feed = {
                graph.input_data: x,
                graph.keep_prob: args.keep_prob,
                graph.initial_state: state
            }

            test_preds = sess.run(graph.predictions, feed_dict=feed)

            for i in range(len(test_preds)):
                all_preds.append(test_preds[i, :])

    all_preds = np.asarray(all_preds)
    y_predictions = np.argmax(all_preds, axis=1)
    y_true = y_test.argmax(axis=1)
    y_true = y_true[:y_predictions.shape[0]]

    cm = ConfusionMatrix(y_true, y_predictions)
    cm.plot(backend='seaborn', normalized=True)
    plt.title('Confusion Matrix Stars prediction')
    plt.figure(figsize=(12, 10))

    test_correct_pred = np.equal(y_predictions, y_true)
    test_accuracy = np.mean(test_correct_pred.astype(float))

    print("Test accuracy is: " + str(test_accuracy))
def test_plot():

    try:
        import matplotlib.pyplot  # noqa
    except ImportError:
        import nose
        raise nose.SkipTest()

    y_true = [
        'rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit',
        'rabbit', 'cat', 'dog', 'rabbit'
    ]
    y_pred = [
        'cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit',
        'cat', 'rabbit', 'rabbit'
    ]

    cm = ConfusionMatrix(y_true, y_pred)

    # check plot works
    cm.plot()
    cm.plot(backend='seaborn')

    with tm.assertRaises(ValueError):
        cm.plot(backend='xxx')
Beispiel #32
0
def Test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    cm_targets = []
    cm_predicted = []
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_aaa):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)

            cm_targets.extend(targets.cpu().numpy())
            cm_predicted.extend(predicted.cpu().numpy())

            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            progress_bar(
                batch_idx, len(testloader),
                'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
                (test_loss /
                 (batch_idx + 1), 100. * correct / total, correct, total))

    matrix = confusion_matrix(cm_targets, cm_predicted)
    plot_confusion_matrix(matrix, classes_str)

    from pandas_ml import ConfusionMatrix
    cm = ConfusionMatrix(cm_targets, cm_predicted)
    cm.print_stats()

    # Save checkpoint.
    acc = 100. * correct / total
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': net.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/vggckpt.pth')
        best_acc = acc
Beispiel #33
0
def accuracy(result):
    true = 0
    total = len(result)
    cm_expected = []
    cm_predicted = []
    for i in range(len(result)):
        if result[i][0] == result[i][1]:
            true += 1
        cm_expected.append(result[i][1])
        cm_predicted.append(result[i][0])
    misclassified = total - true;
    cm = ConfusionMatrix(cm_expected, cm_predicted)
    cm.print_stats()
    print("----------------------------------------")
    return cm, total, true, misclassified, true/len(result)*100
Beispiel #34
0
def get_pd_ml_cf_matrix(y_actual, y_predicted):

    data = {'y_Actual': y_actual, 'y_Predicted': y_predicted}
    df = pd.DataFrame(data, columns=['y_Actual', 'y_Predicted'])
    pd_ml_cf_matrix = ConfusionMatrix(df['y_Actual'], df['y_Predicted'])

    return pd_ml_cf_matrix
    def test_pandas_confusion_cm_binarize(self):
        y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit']
        y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit']

        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        print("Confusion matrix:\n%s" % cm)
        select = ['cat', 'dog']

        print("Binarize with %s" % select)
        binary_cm = cm.binarize(select)

        print("Binary confusion matrix:\n%s" % binary_cm)

        assert cm.sum() == binary_cm.sum()
def one_vs_all():
    X_train0, X_train1, X_train2, X_train3, X_train4, X_train5, X_train6, X_train7, X_train8, X_train9 = data_clustering(
        X_train, y_train)
    numpy_predict = []

    for number in range(10):
        train_number, train_rest, test_number, test_rest = join_cluster(
            X_train0, X_train1, X_train2, X_train3, X_train4, X_train5,
            X_train6, X_train7, X_train8, X_train9, number)
        training_data = np.vstack((train_number, train_rest))
        test_data = np.hstack((test_number, test_rest))
        clf = SVM(C=0.1)
        clf.train(training_data, test_data)
        y_predict = clf.compute(X_test)
        numpy_predict.append(y_predict)

    prediction = np.argmax(np.array(numpy_predict), axis=0)
    correct = np.sum(prediction == y_test)
    confusion_matrix = ConfusionMatrix(y_test, prediction)
    print("Confusion matrix:\n%s" % confusion_matrix)
    size = len(y_predict)
    accuracy = (correct / float(size)) * 100
    print "%d out of %d predictions correct" % (correct, len(y_predict))
    print "The accuracy in percentage is  "
    print(accuracy)
Beispiel #37
0
def trainingHMM(training_set):
	# Count of words from training data
	freqOfWords = countFreqOfWords(training_set)
	# Extract unique tags from training data 
	uniqTags = countUniqTags(training_set)
	# Add a value of 0 for key '<UNK>'
	freqOfWords['<UNK>'] = 0
	training_set_mod, freqOfWords_mod = handlingUNK(training_set, freqOfWords)
	# Count tag frequency
	tagFrequencyList = countTagFrequency(training_set_mod)
	# Calculate bigram list
	tagtagBigram, tagWordsBigram = calcBigram(training_set_mod)
	# Calculate transition and emission probability
	transitionList,emissionList = hmm_train_tagger(freqOfWords_mod, tagtagBigram, tagWordsBigram, uniqTags, tagFrequencyList, len(training_set)-1)
	# Decoding and Apply viterbi
	applyViterbi(uniqTags, testing_set, transitionList, emissionList, freqOfWords_mod)
	# Evaluation Script
	ourPredict = [line.rstrip('\n') for line in open('predict_out.txt')]
	samplePredict = [line.rstrip('\n') for line in open('predict_out.txt')]
	# Our Predictions
	predictSet = []
	for eachPair in ourPredict:
		if eachPair:
			predictSet.append(eachPair.split()[1])
	# Sample Set
	sampleSet = []
	for eachPair in samplePredict:
		if eachPair:
			sampleSet.append(eachPair.split()[1])
	#confusion matrix
	cm = ConfusionMatrix(sampleSet, predictSet)
	print cm
    def test_pandas_confusion_normalized_issue1(self):
        # should insure issue 1 is fixed
        # see http://stackoverflow.com/questions/19233771/sklearn-plot-confusion-matrix-with-labels/31720054#31720054

        y_true = ['business', 'business', 'business', 'business', 'business',
                  'business', 'business', 'business', 'business', 'business',
                  'business', 'business', 'business', 'business', 'business',
                  'business', 'business', 'business', 'business', 'business']

        y_pred = ['health', 'business', 'business', 'business', 'business',
                  'business', 'health', 'health', 'business', 'business', 'business',
                  'business', 'business', 'business', 'business', 'business',
                  'health', 'health', 'business', 'health']

        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.BinaryConfusionMatrix)

        df = cm.to_dataframe()
        df_norm = cm.to_dataframe(normalized=True)
        assert(df_norm.sum(axis=1, skipna=False).fillna(1).sum() == len(df))
Beispiel #39
0
  def on_epoch_end(self, epoch, logs=None):
    y_true, y_pred = [], []
    for i in range(self.validation_steps):
      X_batch, y_true_batch = next(self.validation_data)
      y_pred_batch = self.model.predict(X_batch)

      y_true.extend(y_true_batch)
      y_pred.extend(y_pred_batch)

    y_true = np.float32(y_true)
    y_pred = np.float32(y_pred)
    val_loss = log_loss(y_true, y_pred)
    # map integer labels to strings
    y_true = list(y_true.argmax(axis=-1))
    y_pred = list(y_pred.argmax(axis=-1))
    y_true = [self.int2label[y] for y in y_true]
    y_pred = [self.int2label[y] for y in y_pred]
    confusion = ConfusionMatrix(y_true, y_pred)
    accs = self.accuracies(confusion._df_confusion.values)
    acc = self.accuracy(confusion._df_confusion.values)
    # same for wanted words
    y_true = [y if y in self.wanted_words else '_unknown_' for y in y_true]
    y_pred = [y if y in self.wanted_words else '_unknown_' for y in y_pred]
    wanted_words_confusion = ConfusionMatrix(y_true, y_pred)
    wanted_accs = self.accuracies(wanted_words_confusion._df_confusion.values)
    acc_line = ('\n[%03d]: val_categorical_accuracy: %.2f, '
                'val_mean_categorical_accuracy_wanted: %.2f') % (
                    epoch, acc, wanted_accs.mean())  # noqa
    with open('confusion_matrix.txt', 'a') as f:
      f.write('%s\n' % acc_line)
      f.write(confusion.to_dataframe().to_string())

    with open('wanted_confusion_matrix.txt', 'a') as f:
      f.write('%s\n' % acc_line)
      f.write(wanted_words_confusion.to_dataframe().to_string())

    logs['val_loss'] = val_loss
    logs['val_categorical_accuracy'] = acc
    logs['val_mean_categorical_accuracy_all'] = accs.mean()
    logs['val_mean_categorical_accuracy_wanted'] = wanted_accs.mean()
    def test_pandas_confusion_get(self):
        y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit']
        y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit']
        print("y_true: %s" % y_true)
        print("y_pred: %s" % y_pred)

        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert cm.get("cat") == cm.get("cat", "cat")
        assert cm.get("cat") == 3
        assert cm.get("dog") == 1
        assert cm.get("rabbit") == 3
        assert cm.get("dog", "rabbit") == 2
    def test_plot(self):

        try:
            import matplotlib.pyplot        # noqa
        except ImportError:
            import nose
            raise nose.SkipTest()

        y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog',
                  'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit']
        y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog',
                  'cat', 'rabbit', 'cat', 'rabbit', 'rabbit']

        cm = ConfusionMatrix(y_true, y_pred)

        # check plot works
        cm.plot()
        cm.plot(backend='seaborn')

        with self.assertRaises(ValueError):
            cm.plot(backend='xxx')
    def test_pandas_confusion_cm_stats_animals(self):
        y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit']
        y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit']
        print("y_true: %s" % y_true)
        print("y_pred: %s" % y_pred)

        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert isinstance(cm.stats(), OrderedDict)
        assert cm.population == len(y_true)  # 12
        cm.print_stats()
        cm_stats = cm.stats()  # noqa

        assert cm.binarize("cat").TP == cm.get("cat")  # cm.get("cat", "cat")
        assert cm.binarize("cat").TP == 3
        assert cm.binarize("dog").TP == cm.get("dog")  # 1
        assert cm.binarize("rabbit").TP == cm.get("rabbit")  # 3