def random_forest():
    l=1
    if(l==1):
        print("------------------------RANDOM FOREST-----------------------")
        df = pd.read_csv(var.get(), low_memory=False)
        df = df.sample(frac=1).reset_index(drop=True)
        frauds = df.loc[df['Class'] == 1]
        non_frauds = df.loc[df['Class'] == 0]
        print("\nWe have", len(frauds), "fraud data points and", len(non_frauds), "nonfraudulent data points.")
        X = df.iloc[:,:-1]
        y = df['Class']

        print("X and y sizes, respectively:", len(X), len(y))
    
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35)
        print("Train and test sizes, respectively:", len(X_train), len(y_train), "|", len(X_test), len(y_test))
        print("Total number of frauds:", len(y.loc[df['Class'] == 1]))
        print("Number of frauds on y_test:", len(y_test.loc[df['Class'] == 1]))
        print("Number of frauds on y_train:", len(y_train.loc[df['Class'] == 1]))
        clf= RandomForestClassifier()
        clf.fit(X_train, y_train)
        y_predicted1 =np.array(clf.predict(X_test))
        y_right1=np.array(y_test)
        confusion_matrix1=ConfusionMatrix(y_right1,y_predicted1)
        print("\n\nConfusion matrix:\n%s" % confusion_matrix1)
        #confusion_matrix1.plot(normalized=True)
        T = Text(root, height=60, width=60)
        T.pack(pady=20,side=BOTTOM, fill=Y)
        for l in confusion_matrix1.stats():
            T.insert(END,[l,confusion_matrix1.stats()[l]])
            T.insert(END,"\n")
        d['ACC'].append(confusion_matrix1.stats()['ACC']*100)
        d['TPR'].append(confusion_matrix1.stats()['TPR']*100)
        fpr,tpr,thresholds=roc_curve(y_right1, y_predicted1)
        aucarr['auc'].append(auc(fpr,tpr))
def logistic_regression():
    print("------------------------LOGISTIC REGRESSION-----------------------")
    df = pd.read_csv(var.get(), low_memory=False)
    df = df.sample(frac=1).reset_index(drop=True)
    frauds = df.loc[df['Class'] == 1]
    non_frauds = df.loc[df['Class'] == 0]
    print("\n")
    print("We have", len(frauds), "fraud data points and", len(non_frauds), "nonfraudulent data points.\n")
    X = df.iloc[:,:-1]
    y = df['Class']
    print("X and y sizes, respectively:", len(X), len(y))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35)
    '''print("\nTrain and test sizes, respectively:", len(X_train), len(y_train), "|", len(X_test), len(y_test))
    print("Total number of frauds:", len(y.loc[df['Class'] == 1]))
    print("Number of frauds on y_test:", len(y_test.loc[df['Class'] == 1]))
    print("Number of frauds on y_train:", len(y_train.loc[df['Class'] == 1]))'''
    logistic = linear_model.LogisticRegression(C=1e5)
    logistic.fit(X_train, y_train)
    print("\nScore: ", logistic.score(X_test, y_test))
    y_predicted = np.array(logistic.predict(X_test))
    y_right = np.array(y_test)
    confusion_matrix = ConfusionMatrix(y_right, y_predicted)
    print("\n\nConfusion matrix:\n%s" % confusion_matrix)
    #confusion_matrix.plot(normalized=True)
    T = Text(root, height=60, width=60)
    T.pack(pady=20,side=BOTTOM, fill=Y)
    for l in confusion_matrix.stats():
        T.insert(END,[l,confusion_matrix.stats()[l]])
        T.insert(END,"\n")
    d['ACC'].append(confusion_matrix.stats()['ACC']*100)
    d['TPR'].append(confusion_matrix.stats()['TPR']*100)
    fpr,tpr,thresholds=roc_curve(y_right, y_predicted)
    aucarr['auc'].append(auc(fpr,tpr))
def plot_confusion_matrix_with_accuracy(classes, y_true, y_pred, title,
                                        sum_overall_accuracy,
                                        total_predictions):
    cm = ConfusionMatrix(y_true, y_pred)
    print('Current Overall accuracy: ' +
          str(cm.stats()['overall']['Accuracy']))
    if total_predictions != 0:
        print('Total Overall Accuracy: ' +
              str(sum_overall_accuracy / total_predictions))
    else:
        print('Total Overall Accuracy: ' +
              str(cm.stats()['overall']['Accuracy']))

    conf_matrix = confusion_matrix(y_true, y_pred)
    plt.figure()
    plot_confusion_matrix(conf_matrix, classes=classes, title=title)
    plt.show()
def logistic_reg_smote():
    l=1
    if(l==1):
        print("------------------------LOGISTIC REGRESSION WITH SMOTE-----------------------")
        df = pd.read_csv(var.get(), low_memory=False)
        df = df.sample(frac=1).reset_index(drop=True)
        frauds = df.loc[df['Class'] == 1]
        non_frauds = df.loc[df['Class'] == 0]
        print("\nWe have", len(frauds), "fraud data points and", len(non_frauds), "nonfraudulent data points.")
        X = df.iloc[:,:-1]
        y = df['Class']

        print("X and y sizes, respectively:", len(X), len(y))
    
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35)
        print("Train and test sizes, respectively:", len(X_train), len(y_train), "|", len(X_test), len(y_test))
        print("Total number of frauds:", len(y.loc[df['Class'] == 1]))
        print("Number of frauds on y_test:", len(y_test.loc[df['Class'] == 1]))
        print("Number of frauds on y_train:", len(y_train.loc[df['Class'] == 1]))
        df2 = pdml.ModelFrame(X_train, target=y_train)
        sampler = df2.imbalance.over_sampling.SMOTE()
        sampled = df2.fit_sample(sampler)
        print("\nSize of training set after over sampling:", len(sampled))
        X_train_sampled = sampled.iloc[:,1:]
        y_train_sampled = sampled['Class']


        logistic = linear_model.LogisticRegression(C=1e5)
        logistic.fit(X_train_sampled, y_train_sampled)
        print("Score: ", logistic.score(X_test, y_test))
        y_predicted1 = np.array(logistic.predict(X_test))
        y_right1 = np.array(y_test)

        confusion_matrix1 = ConfusionMatrix(y_right1, y_predicted1)
        print("\n\nConfusion matrix:\n%s" % confusion_matrix1)
        #confusion_matrix1.plot(normalized=True)
        T = Text(root, height=60, width=60)
        T.pack(pady=20,side=BOTTOM, fill=Y)
        for l in confusion_matrix1.stats():
            T.insert(END,[l,confusion_matrix1.stats()[l]])
            T.insert(END,"\n")
        d['ACC'].append(confusion_matrix1.stats()['ACC']*100)
        d['TPR'].append(confusion_matrix1.stats()['TPR']*100)
        fpr,tpr,thresholds=roc_curve(y_right1, y_predicted1)
        aucarr['auc'].append(auc(fpr,tpr))
    def test_pandas_confusion_cm_stats_animals(self):
        y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit']
        y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit']
        print("y_true: %s" % y_true)
        print("y_pred: %s" % y_pred)

        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert isinstance(cm.stats(), OrderedDict)
        assert cm.population == len(y_true)  # 12
        cm.print_stats()
        cm_stats = cm.stats()  # noqa

        assert cm.binarize("cat").TP == cm.get("cat")  # cm.get("cat", "cat")
        assert cm.binarize("cat").TP == 3
        assert cm.binarize("dog").TP == cm.get("dog")  # 1
        assert cm.binarize("rabbit").TP == cm.get("rabbit")  # 3
    def test_pandas_confusion_cm_stats_animals(self):
        y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit']
        y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit']
        print("y_true: %s" % y_true)
        print("y_pred: %s" % y_pred)

        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert isinstance(cm.stats(), OrderedDict)
        assert cm.population == len(y_true)  # 12
        cm.print_stats()
        cm_stats = cm.stats()  # noqa

        assert cm.binarize("cat").TP == cm.get("cat")  # cm.get("cat", "cat")
        assert cm.binarize("cat").TP == 3
        assert cm.binarize("dog").TP == cm.get("dog")  # 1
        assert cm.binarize("rabbit").TP == cm.get("rabbit")  # 3
    def test_pandas_confusion_cm_stats_integers(self):
        y_true = [600, 200, 200, 200, 200, 200, 200, 200, 500, 500, 500, 200, 200, 200, 200, 200, 200, 200, 200, 200]
        y_pred = [100, 200, 200, 100, 100, 200, 200, 200, 100, 200, 500, 100, 100, 100, 100, 100, 100, 100, 500, 200]
        print("y_true: %s" % y_true)
        print("y_pred: %s" % y_pred)
        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert isinstance(cm.stats(), OrderedDict)
        cm.print_stats()
    def test_pandas_confusion_cm_stats_integers(self):
        y_true = [600, 200, 200, 200, 200, 200, 200, 200, 500, 500, 500, 200, 200, 200, 200, 200, 200, 200, 200, 200]
        y_pred = [100, 200, 200, 100, 100, 200, 200, 200, 100, 200, 500, 100, 100, 100, 100, 100, 100, 100, 500, 200]
        print("y_true: %s" % y_true)
        print("y_pred: %s" % y_pred)
        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert isinstance(cm.stats(), OrderedDict)
        cm.print_stats()
Beispiel #9
0
def calc_general_stats(rows):
    y_true = []
    y_pred = []
    for i in range(len(rows)):
        row = rows[i]
        className_true = row.split('/')[4]
        numBoxes = int(row.split(' ')[1])
        className_pred = 'none'
        if numBoxes > 0:
            className_pred = row.split(' ')[1 + numBoxes].split(',')[4]

        y_true.append(className_true)
        y_pred.append(className_pred)

    # stats
    cm = ConfusionMatrix(y_true, y_pred)
    cm.print_stats()
    cm.stats()

    # other report...
    target_names = [
        'arrabida', 'camara', 'clerigos', 'musica', 'none', 'serralves'
    ]
    print(classification_report(y_true, y_pred, target_names=target_names))

    # plot
    cm = confusion_matrix(y_true, y_pred)
    classes = ['arrabida', 'camara', 'clerigos', 'musica', 'none', 'serralves']
    df_cm = pd.DataFrame(cm, index=classes, columns=classes)
    plt.figure(figsize=(10, 7))
    sn.set(font_scale=1.4)
    ax = sn.heatmap(cm,
                    annot=True,
                    annot_kws={"size": 16},
                    yticklabels=classes,
                    xticklabels=classes,
                    cmap='Blues',
                    fmt='g')
    plt.show()
print(encoder.classes_)
y_train = onehot(encoder.transform(y_train1))
y_valid = onehot(encoder.transform(y_valid1))

# In[113]:

# trainning process
print("Evaluation on Training Dataset")
Predictions = model.predict(x_train)
print(Predictions.shape)
Prediction = np.argmax(Predictions, axis=1)
print(Prediction.shape)
print(np.sum(Prediction == encoder.transform(y_train1)))

cm = ConfusionMatrix(Prediction, encoder.transform(y_train1))
ConfusionMatrix = cm.stats()['cm']
ClassStatistics = cm.stats()['class']
OverallStatistics = cm.stats()['overall']
ClassStatistics.to_csv("trainStatsResnet.csv")
print(OverallStatistics)
print(ConfusionMatrix)

print("Evaluation on Test Dataset")
Predictions = model.predict(x_test)
print(Predictions.shape)
Prediction = np.argmax(Predictions, axis=1)
print(Prediction.shape)
print(np.sum(Prediction == encoder.transform(y_test1)))

cm = ConfusionMatrix(Prediction, encoder.transform(y_test1))
ConfusionMatrix = cm.stats()['cm']
Beispiel #11
0
    most_accurate_percentile, stats = best_model_threshold_by_roc(
        df, percentiles=np.arange(99., -1, -1))

    df['actual_class'], r_bins = pd.qcut(df['rome'],
                                         10,
                                         labels=list(range(1, 11)),
                                         retbins=True)
    df['predic_class'] = pd.qcut(df['predicted'],
                                 10,
                                 labels=list(range(1, 11)))
    # df['predic_class'] = pd.cut(df['predicted'], bins=r_bins, labels=list(range(1, 11)))

    cm = ConfusionMatrix(df['actual_class'].to_list(),
                         df['predic_class'].to_list())
    cm.print_stats()
    statdict = cm.stats()
    cm_stats = statdict['class']

    matrix = cm.to_dataframe()
    matrix.index.rename('ROME decile', inplace=True)
    matrix.columns.rename('ROME$_\mathrm{NN}$ decile', inplace=True)

    plt.close()
    # sns.heatmap(matrix / (len(predicted)//10) , cmap='Greys', annot=matrix, fmt='d')
    ax = sns.heatmap(matrix / (len(predicted) // 10),
                     annot=matrix,
                     fmt='d',
                     cmap='gray_r',
                     vmin=0.0,
                     vmax=0.56)
    cbar = ax.collections[0].colorbar
Beispiel #12
0
    accuracy = macro_accuracy

    confusion_matrix = ConfusionMatrix(actual_classes, predicted_classes)

    print()
    print('Macro-accuracy:',
          str(accuracy) + '%. Details (considering MICRO-accuracy):')
    confusion_matrix.print_stats()

    #time
    print()
    end_time = time.time()
    elapsed_time = time_format(end_time - start_time)
    print('Testing elapsed time:', elapsed_time)

    os.makedirs(os.path.join('results', 'test'), exist_ok=True)
    with open(
            os.path.join(
                'results', 'test',
                test_data_path.replace(os.sep, '_').replace('.', '_') + '--' +
                model_file.replace('.pth', '.txt')), 'w') as results_txt:
        results_txt.write('Macro-accuracy: ' + str(accuracy) +
                          '%. Details (considering MICRO-accuracy):\n\n')
        results_txt.write(str(confusion_matrix.stats()))
        results_txt.write('\n\nWRONG PREDICTIONS:\n\n')
        for wrong_prediction in wrong_predictions:
            path, label, prediction = wrong_prediction
            results_txt.write(path + ' is ' + label +
                              ' and was predicted as ' + prediction + '\n')
        results_txt.write('\n\nTime: ' + elapsed_time)
Beispiel #13
0
# to keep time short, set epochs = 1
model.fit(train_data, train_labels, epochs=1, workers=4, use_multiprocessing=True)

#Making Predictions
predictions = model.predict(eval_data)

# Extracting max probability
predictions_number = np.array([])
for row_num in range(predictions.shape[0]): # row_num = 0
    predictions_number = np.append(predictions_number, np.argmax(predictions[row_num]))

# Just precaution
predictions_number = predictions_number.astype(int)

confusion_matrix = ConfusionMatrix(eval_labels, predictions_number)
cms = confusion_matrix.stats()
print("Overall Accuracy is ", round(cms['overall']['Accuracy'], 2),", Kappa is ", round(cms['overall']['Kappa'], 2))
# none: Overall Accuracy is  0.99 , Kappa is  0.99


#%% Weight regularization: L2 (weight decay) -> cost added is proportional to the square of the value of the weights coefficients
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(64, (5, 5), activation='relu', input_shape=(im_wh, im_wh, 1), kernel_regularizer= tf.keras.regularizers.l2(0.001)))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Conv2D(64, (5, 5), activation='relu', kernel_regularizer= tf.keras.regularizers.l2(0.001)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64, activation='relu', kernel_regularizer= tf.keras.regularizers.l2(0.001)))
model.add(tf.keras.layers.Dense(10, activation='softmax'))

model.summary() # Note shape of images is going down. Note: it'll not go down when padding="same"
predicted = model.predict(testdata)

cm = ConfusionMatrix(expected, predicted)
print(expected.shape)
print(predicted.shape)
expected = np.array(expected)
predicted = np.array(predicted)
cm.print_stats()

np.savetxt('expected.txt', expected, fmt='%01d')
np.savetxt('predicted.txt', predicted, fmt='%01d')

print(cm)
print(expected.shape)
print(predicted.shape)
cm.stats()
print("***************************************************************")

# fit a k-nearest neighbor model to the data
model = KNeighborsClassifier()
model.fit(traindata, trainlabel)
print(model)
# make predictions
expected = testlabel
predicted = model.predict(testdata)
# summarize the fit of the model

cm = metrics.confusion_matrix(expected, predicted)
print(cm)
tpr = float(cm[0][0]) / np.sum(cm[0])
fpr = float(cm[1][1]) / np.sum(cm[1])
    else:
        final_predict = item[best_index]
    final_result.append(final_predict)

all_results_matrix = confusion_matrix(y_actual_set, np.array(final_result))
correct_result = 0
for i in range(27):
    correct_result = correct_result + all_results_matrix[i][i]

accuracy_ensemble = correct_result / len(monitorList)

target_names = [
    'Swipt Left', 'Swipe Right', 'Wave', 'Clap', 'Throw', 'Arm Cross',
    'Basketball shoot', 'Draw X', 'Draw Circle CW', 'Draw Circle CCW',
    'Draw Triangle', 'Bowling', 'Boxing', 'Baseball Swing', 'Tennis Swing',
    'Arm Curl', 'Tennis Serve', 'Push', 'Knock', 'Catch', 'Pickup Throw',
    'Jog', 'Walk', 'Sit to Stand', 'Stand to Sit', 'lunge', 'Squad'
]
print(
    classification_report(y_actual_set,
                          np.array(final_result),
                          target_names=target_names))

cm = ConfusionMatrix(y_actual_set, np.array(final_result))
cm.plot()
stats = cm.stats()

cmstats = dict(stats)
cmstats2 = cmstats['class']
cmstats2.to_csv('ensemble.csv', sep=',')
        def performance_eval(results, test_data, risk_pref, dfb):  #
            '''
            A function to conduct model evaluation with pseudo timestamp labels
            '''
            prediction = []
            label = []
            confidence = []
            threshold = dfb

            #print(test_data)
            if test_data == "1.10train" or test_data == "0.10train":
                for i in results:
                    if risk_pref == 'TPR':
                        if i[1] >= threshold:
                        #if i[1] <= threshold:
                            # FP
                            prediction.append(1)
                            label.append(0)
                            confidence.append(round(i[1], 3))
                        elif i[1] < threshold:
                        #elif i[1] > threshold:
                            # TN
                            prediction.append(0)
                            label.append(0)
                            confidence.append(round(i[1], 3))
                        else:
                            print("Error")
                    else:
                        if i[1] <= threshold:
                            # FP
                            prediction.append(1)
                            label.append(0)
                            confidence.append(round(i[1], 3))
                        elif i[1] > threshold:
                            # TN
                            prediction.append(0)
                            label.append(0)
                            confidence.append(round(i[1], 3))
                        else:
                            print("Error")
            else:
                attack_timestamps = filepath + test_data + '.csv'
                with open(attack_timestamps, 'r') as f:
                    reader = csv.reader(f)
                    attack_times = list(reader)
                timestamp = [int(attack_times[0][0]), int(attack_times[0][1])]

                for i in results:
                    if risk_pref == 'TPR':
                        if timestamp[0] <= i[0] <= timestamp[1] and i[1] >= threshold:
                        #if timestamp[0] <= i[0] <= timestamp[1] and i[1] <= threshold:
                            # TP
                            prediction.append(1)
                            label.append(1)
                            confidence.append(round(i[1], 3))

                        elif timestamp[0] <= i[0] <= timestamp[1] and i[1] < threshold:
                        #elif timestamp[0] <= i[0] <= timestamp[1] and i[1] > threshold:
                            # FN
                            prediction.append(0)
                            label.append(1)
                            confidence.append(round(i[1], 3))

                        elif i[0] < timestamp[0] and i[1] >= threshold or i[0] > timestamp[1] and i[1] >= threshold:
                        #elif i[0] < timestamp[0] and i[1] <= threshold or i[0] > timestamp[1] and i[1] <= threshold:
                            # FP
                            prediction.append(1)
                            label.append(0)
                            confidence.append(round(i[1], 3))

                        elif i[0] < timestamp[0] and i[1] < threshold or i[0] > timestamp[1] and i[1] < threshold:
                        #elif i[0] < timestamp[0] and i[1] > threshold or i[0] > timestamp[1] and i[1] > threshold:
                            # TN
                            prediction.append(0)
                            label.append(0)
                            confidence.append(round(i[1], 3))

                        else:
                            print("Error")
                            print(timestamp, i)
                        # print(timestamp, i, test_pred, test_label)
                    else:
                        if timestamp[0] <= i[0] <= timestamp[1] and i[1] <= threshold:
                            # TP
                            prediction.append(1)
                            label.append(1)
                            confidence.append(round(i[1], 3))

                        elif timestamp[0] <= i[0] <= timestamp[1] and i[1] > threshold:
                            # FN
                            prediction.append(0)
                            label.append(1)
                            confidence.append(round(i[1], 3))

                        elif i[0] < timestamp[0] and i[1] <= threshold or i[0] > timestamp[1] and i[1] <= threshold:
                            # FP
                            prediction.append(1)
                            label.append(0)
                            confidence.append(round(i[1], 3))

                        elif i[0] < timestamp[0] and i[1] > threshold or i[0] > timestamp[1] and i[1] > threshold:
                            # TN
                            prediction.append(0)
                            label.append(0)
                            confidence.append(round(i[1], 3))

                        else:
                            print("Error")
                            print(timestamp, i)
                        # print(timestamp, i, test_pred, test_label)
            # calculate accuracy for a purely naive classifier
            naive = sum(label) / len(label)
            naive1 = 1 - naive
            if naive1 > naive:
                nc = naive1
            else:
                nc = naive

            #array = np.column_stack = (prediction, label)

            prediction = np.asarray(prediction)
            label = np.asarray(label)
            ACC = accuracy_score(label, prediction)
            # reward = np.mean(confidence)

            cubed = []
            '''
            for x in confidence:
                if presence == 1:
                    if x == 0:
                        pass
                    else:
                        cubed.append(x)
                else:
                    #cubed.append(x)
                    x = x ** 3
                    cubed.append(x)

            '''

            '''
            for x in confidence:
                if x == 0:
                    pass
                else:
                    cubed.append(x)
                    # x = x ** 3
                    # cubed.append(x)
            '''

            for x in confidence:
                x = x ** 3
                cubed.append(x)

            #'''
            cubes = sum(cubed)
            conf = sum(confidence)
            if conf == 0:
                pass
            else:
                test = (cubes / conf)
            # reward = test **(1./3.)
                conf = math.sqrt(test)
            #'''

            #conf = np.mean(confidence)

            #if risk_pref == 'TPR':
                #reward = conf

            print("Variance: " + str(round(np.var(confidence), 3)))
            reward = round(np.var(confidence), 3)
            #if conf > dfb:
            #    reward = conf
            #else:
            #    reward = 0
            #else:
                #conf = 1 - conf
                #reward = conf
                #if conf < dfb:
                #    #reward = conf
                #    reward = conf
                #else:
                #    reward = 0
            #'''

            #print('Subset Accuracy (J): ' + str(ACC))
            if test_data == "1.10train" or test_data == "0.10train":
                TPR = 'N/A'
                TNR = 'N/A'
                PREC = 'N/A'
                F1 = 'N/A'
            else:
                cm = ConfusionMatrix(prediction, label)
                stats = cm.stats()
                print(stats)
                TPR = stats['TPR']
                TNR = stats['TNR']
                PREC = stats['PPV']
                F1 = stats['F1_score']
            print('Reward: ' + str(reward))
            return reward, ACC, F1, TPR, TNR, PREC, nc, conf
Beispiel #17
0
print(Y_test)
#evaluate train set error
#train_error = classifier.score(X_train, Y_train)
#test_error = classifier.score(X_test, Y_test)
#print(train_error, test_error)
#-------------> EVALUATION

evaluation_file.write('DT_EVALUATION:')
evaluation_file.write('\n\nConfusion_matrix:\n')
evaluation_file.write(str(confusion_matrix(Y_test, Y_pred)))
evaluation_file.write('\n\nClassification report:\n')
evaluation_file.write(str(classification_report(Y_test, Y_pred)))
#evaluation_file.write("\nAccuracy is "+str(accuracy_score(Y_test,Y_pred)*100) + '\n')
evaluation_file.write('\n\n')
cm = ConfusionMatrix(Y_test, Y_pred)
evaluation_file.write(str(cm.stats()))

sn.heatmap(confusion_matrix(Y_test, Y_pred), annot=True, cmap='YlGnBu')
plt.savefig(path_results + title + '/' + 'Confusion_Matrix.pdf')
#plt.show()
plt.close()

print("\nAccuracy is " + str(accuracy_score(Y_test, Y_pred) * 100) + '\n')

#X_test_plot = scaling.inverse_transform(X_test)
value = 1.5
width = 100

plot_decision_regions(
    X=X_test,
    y=Y_test,
def evaluate(textFile,
             valueFile=None,
             varStatusBar=None,
             varCmOutput=None,
             varOutput=None):
    timestamp = strftime("%Y-%m-%d:%H-%M-%S")
    reportFile = "./reports/" + timestamp + ".txt"
    outputFile = "./evaluations/" + timestamp + ".csv"
    statsFile = "./statistics/" + timestamp + ".txt"
    wf = WordFilter()
    totalReal = []
    totalPred = []
    with open("./data/Priors.csv", "r") as priorFile:
        print(priorFile)
        priors = priorFile.readline().strip().split(',')[1:]
        priors = [log10(float(x)) for x in priors]

    testSize = 0
    lst = []
    lst.append(("Real Emotion", "Predicted Emotion", "Tweet"))
    for line in tqdm(textFile):
        testSize += 1

        lineID = line.split(',')[0]
        words = wf.filterWords(line)

        predValues = []
        unfound = []

        for word in words:
            try:
                values = evaluateWord(word)
            except IOError:
                varStatusBar.set(
                    "WordMap not found. Please train system first.")
                raise
            if values is not None:
                predValues.append(values)
            else:
                unfound.append(word)
        predValues = map(sum, zip(*predValues))
        predProb = map(sum, zip(priors, predValues))
        predEmotion = guessEmotion(predProb)
        valueFormat = ",".join("%.2f" % n for n in predValues)

        if valueFile:
            realValues = [
                float(i) for i in valueFile.readline().strip().split(',')[1:]
            ]
            realEmotion = guessEmotion(realValues)
            if predEmotion != "No Words Found":
                totalReal.append(realEmotion)
                totalPred.append(predEmotion)

                if realEmotion != predEmotion:
                    lst.append((realEmotion, predEmotion, line))

        with open(outputFile, "a+") as output:
            output.write("{},{},{}\n".format(lineID, predEmotion, valueFormat))

        with open(reportFile, "a+") as report:
            report.write("{}\n".format(line))
            report.write("Filtered: {}\n".format(words))
            report.write("Words not found:{}\n".format(unfound))
            report.write("Emotion probabilities: {}\n".format(valueFormat))
            report.write("Predicted emotion: {}\n".format(predEmotion))
            if valueFile:
                report.write("Correct emotion: {}\n".format(realEmotion))
            report.write("-" * 70)
            report.write("\n")

    if valueFile:
        varStatusBar.set("Evaluation Complete.")

        with open('./data/RealPred.csv', 'w') as realpredFile:
            writer = csv.writer(realpredFile, delimiter=',')
            writer.writerows(lst)

        cm = ConfusionMatrix(totalReal, totalPred)
        viewPlot = tkMessageBox.askyesno("Confusion Matrix",
                                         "View confusion matrix plot?")
        if viewPlot:
            normaliseData = tkMessageBox.askyesno("Confusion Matrix",
                                                  "Normalise plot?")

            varOutput.set("Accuracy: " +
                          str(cm.stats()['overall']['Accuracy']))
            varCmOutput.set("Confusion Matrix: \n" + str(cm.stats()['cm']))

            data = cm.stats()
            for key, value in data.items():
                print(key, value)

            cm.plot(normalized=normaliseData)
            plt.show()

        with open(statsFile, "w+") as report:
            report.seek(0)
            report.write(str(cm))
            report.write("\n")
for i in range(0, len(dataset['Rapor Aciklamasi'])):
    text = re.sub('[^a-zA-Z]', '', dataset['Rapor Aciklamasi'][i])
    text = text.lower()
    text = text.split()
    ps = PorterStemmer()
    text = ''.join(text)
    corpus.append(text)

cv = CountVectorizer(max_features=4000)
X = cv.fit_transform(corpus).toarray()
y = dataset.iloc[:, 0].values

X_train, X_validation, Y_train, Y_validation = train_test_split(X,
                                                                y,
                                                                test_size=0.2,
                                                                random_state=7)

clf_entropy = tarin_using_entropy(X_train, X_validation, Y_train)

print("Results Using Entropy:")
# Prediction using entropy
y_pred_entropy = prediction(X_validation, clf_entropy)

from pandas_ml import ConfusionMatrix
import pandas
pandas.set_option('display.max_colwidth', 15)
pandas.set_option('display.max_columns', 5)
cm = ConfusionMatrix(Y_validation, y_pred_entropy)
cm.print_stats()
print(cm.stats())
Beispiel #20
0
def best_model_threshold_by_roc(df, percentiles):
    # True and False positive rates
    tpr, fpr = [], []
    # markedness, accuracy, f1_score = [], [], []
    stats_list = []

    assert 0 <= percentiles.min()
    assert percentiles.max() <= 100

    for perc in percentiles:
        # Highest decile of ROME and predictions serve as targets
        df['actual_target'] = df['rome'].gt(df['rome'].quantile(90 / 100))
        df['predic_target'] = df['predicted'].gt(df['predicted'].quantile(
            perc / 100))

        # confusion_matrix = pd.crosstab(df['actual_target'], df['predic_target'], rownames=['ROME'], colnames=['Predicted'])
        # print(confusion_matrix)
        # sns.heatmap(confusion_matrix, annot=True)
        # plt.show()

        # ConfusionMatrix has nice stats, but throws an error if one array has exclusively True or False
        if (len(df['actual_target'].unique()) > 1) and (len(
                df['predic_target'].unique()) > 1):
            cm = ConfusionMatrix(df['actual_target'].to_list(),
                                 df['predic_target'].to_list())
            tpr.append(cm.stats()['TPR'])
            fpr.append(cm.stats()['FPR'])

            # stats = [cm.stats()[key] for key in cm.stats()]
            # stats_list.append(stats)
            stats_list.append(cm.stats().values())
        else:
            df['actual_target'] = df['actual_target'].astype(int)
            df['predic_target'] = df['predic_target'].astype(int)
            cm = pd.crosstab(df['actual_target'],
                             df['predic_target'],
                             rownames=['ROME'],
                             colnames=['Predicted'])

            # only True predictions
            if cm.columns.get_values() == 1:
                tpr.append(1)
                fpr.append(
                    1
                )  #(cm.loc[0] / cm[1].sum())  # cm.loc[0] are the False target which are predicted True
            # ony False predictions
            elif cm.columns.get_values() == 0:
                tpr.append(0)
                fpr.append(0)
            else:
                raise ValueError('Targets consist of only True or only False.')

    plt.plot([0] + fpr, [0] + tpr, c='r', ls='', marker='o', ms=0.5)
    # loop through each x,y pair
    for i, xy in enumerate(zip(fpr, tpr)):
        corr = 0.  #-0.05 # adds a little correction to put annotation in marker's centrum
        plt.annotate(str(percentiles[i].astype(int)),
                     xy=(xy[0] + corr, xy[1] + corr),
                     fontsize=2)
    plt.xlabel('False positive rate (False positives / Target negative)')
    plt.ylabel('True positive rate (True positives / Target positive)')
    plt.title(
        'Predictions of ROME by KitchenSink-NN and varying threshold for high organisation.'
    )
    plt.axes().set_aspect('equal')
    plt.savefig(home + '/Desktop/ROC', dpi=400, bbox_inches='tight')

    # Distance to point (0, 1)
    dist = np.sqrt((0 - np.array(fpr))**2 + (1 - np.array(tpr))**2)
    best_threshold = percentiles[dist.argmin()]

    stats = pd.DataFrame(stats_list)
    stats.rename(dict(zip(list(range(26)),
                          cm.stats().keys())),
                 axis='columns',
                 inplace=True)
    stats.index.rename('Threshold', inplace=True)
    stats.columns.rename('Statistics', inplace=True)
    stats = stats[::-1].reset_index(drop=True)

    return best_threshold, stats
Beispiel #21
0
def model_func(alg, X_train, X_test, Y_train, Y_test, target, predictors, filename):
    #Fit the algorithm on the data
    algorithm=alg.fit(X_train, Y_train)
    print (algorithm)
    
    #Predict training set:
    dtrain_predictions = alg.predict(X_train)

    #Perform cross-validation:
    cv_score = model_selection.cross_val_score(alg, X_train, Y_train , cv=10, scoring='neg_mean_squared_error') 
	#scoring='accuracy', cv=kfold
    #kfold = model_selection.KFold(n_splits=10, random_state=seed) #(n=num_instances, n_folds=num_folds=n_splits=n-fold CV)
    cv_score = np.sqrt(np.abs(cv_score))
    
    #Print model report:
    print("\nModel Report")
    print("RMSE : %.4g" % np.sqrt(metrics.mean_squared_error((Y_train), dtrain_predictions)))
    print("CV Score : Mean - %.4g | Std - %.4g | Min - %.4g | Max - %.4g" % (np.mean(cv_score),
									     np.std(cv_score), 
									     np.min(cv_score),
									     np.max(cv_score)))
    
    #Predict on testing data and computing error rate:
    #dtest[target] = alg.predict(dtest[predictors])
    predictions = alg.predict(X_test)
    errors = predictions !=  Y_test
    number_of_errors = errors.sum()
    error_rate = errors.sum() / len(predictions)
    
    
    #Print R-squared
    sse = ((Y_test - predictions) ** 2).sum(axis=0)
    tse = ((Y_test  - np.average(Y_test , axis=0)) ** 2).sum(axis=0))
    print("R-squared : %.4g" % (1 - (sse / tse)))
    print("RMSE Test : %.4g" % np.sqrt(metrics.mean_squared_error((Y_test), predictions)))
    
    
    print("Score:%s" % (alg.score(X_train, Y_train)))
    print("Decision Function:\n%s" % (alg.decision_function(X_test)))
    print("Intercept:%s" %(alg.intercept_))
    print("Coefficents:\n%s"%(alg.coef_))
    print("Number of errors=%i, error rate=%.2f" % (number_of_errors, error_rate))
    print("Classification Report:\n%s" % (classification_report(Y_test, predictions, labels=np.unique(predictions))))
    
    CMat = ConfusionMatrix(Y_test, predictions)
    print("Statistics regarding classification model =%s" % (CMat.stats()))
    
    # ROC-AUC score: closer to 1 the better. This metric dosen't work in multinomial cases.
    #k_fold = model_selection.KFold(n_splits=10, random_state=7)
    #results = model_selection.cross_val_score(alg, predictors, target, cv = k_fold, scoring='roc_auc')
    #print("AUC: %.3f (%.3f)") % (results.mean(), results.std())
          
    #Plot the confusion matrix
    mat = confusion_matrix(Y_test, RMC_y_pred)
    sns.heatmap(mat, annot=True, fmt='d', cbar=True, xticklabels=label, yticklabels=label, linewidths=.5)
    plt.xlabel('true label')
    plt.ylabel('predicted label');
    
    # Graph on testing data: line/model
    plt.scatter(Y_test, predictions , marker='+', color='r')
    plt.xlabel("True Values")
    plt.ylabel("Predictions")
    plt.show()
Beispiel #22
0
def evaluate_model(trainX, trainy, testX, testy):
    
    verbose, epochs, batch_size = 2, 10, 6
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
    model = Sequential()
    # reshape output into [samples, timesteps, features]
    trainy = trainy.reshape((trainy.shape[0], trainy.shape[1]))
    testy = testy.reshape((testy.shape[0], testy.shape[1]))
    # define model
    model = Sequential()
    model.add(Conv1D(filters=512, kernel_size=15, padding='same', activation='relu', input_shape=(n_timesteps, n_features)))
    model.add(MaxPooling1D(pool_size=8, padding='same'))
    model.add(Conv1D(filters=512, kernel_size=15, padding='same', activation='relu', input_shape=(n_timesteps, n_features)))
    model.add(MaxPooling1D(pool_size=8, padding='same',))
    model.add(Conv1D(filters=1024, kernel_size=15, padding='same', activation='relu', input_shape=(n_timesteps, n_features)))
    model.add(MaxPooling1D(pool_size=8, padding='same'))
    model.add(Flatten())
    model.add(RepeatVector(n_outputs))
    model.add(Bidirectional(LSTM(200, activation='relu', return_sequences=True, input_shape=(n_timesteps, n_features))))
    model.add(Dropout(0.5))
    model.add(Bidirectional(LSTM(200, activation='relu', return_sequences=True, input_shape=(n_timesteps, n_features))))
    model.add(Dropout(0.5))
    model.add(Bidirectional(LSTM(200, activation='relu', return_sequences=True, input_shape=(n_timesteps, n_features))))
    model.add(Dropout(0.5))
    model.add(Bidirectional(LSTM(200, activation='relu', return_sequences=False, input_shape=(n_timesteps, n_features))))
    #model.add((Dense(200, activation='relu')))
    model.add((Dense(n_outputs, activation='sigmoid')))
    opt = optimizers.Adam(lr=1e-6)
    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['binary_accuracy'])
    class_weight = {0: 1., 1: 3.}
    # fit network
    history = model.fit(trainX, trainy, shuffle=False, epochs=epochs, validation_split=0.1, class_weight=class_weight, batch_size=batch_size, verbose=verbose)
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
    # list all data in history
    #print(history.history.keys())
    # make predictions
    #trainPredict = model.predict(trainX)
    testPredict = model.predict(testX)
    
    # compute confusion matrix
    y_pred = argmax(testPredict, axis=1, out=None)
    # save prediction dataset
    #prediction = DataFrame(trainPredict).to_csv('train_predictLSTM.csv')
    DataFrame(testPredict).to_csv('./test_predictLSTM.csv')
        
    
    y_actu = argmax(testy, axis=1, out=None)
    y_pred = y_pred.reshape(y_pred.shape[0],)
    y_actu = y_actu.reshape(y_actu.shape[0],)
    df = stack((y_pred, y_actu))
    df = df.transpose()
    df = df.reshape(df.shape[0], 2)
    DataFrame(df).to_csv('./classification.csv')
        #y_pred = np.delete(y_pred, 1)
    print(y_actu.shape, y_pred.shape)
    cm = ConfusionMatrix(y_actu, y_pred)
    cm.print_stats()
    d = cm.stats()
    f1 = list(d.items())[17]
    f1 = f1[1]
    print(f1)
    return accuracy
Beispiel #23
0
def test_model(test_model, test_dataloader):
    print("Testing started..")
    test_model.eval()
    correct = 0
    total = 0
    all_labels_d = torch.tensor([], dtype=torch.long).to(device)
    all_predictions_d = torch.tensor([], dtype=torch.long).to(device)
    all_predictions_probabilities_d = torch.tensor(
        [], dtype=torch.float).to(device)

    if batch_size == 1:
        all_timePerFrame_host = []

    else:
        print("Please set batch size to 1....")
        exit(0)

    with torch.no_grad():
        for inputs, labels in test_dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            frame_time_start = datetime.datetime.now()  # frame start time

            outputs = test_model(inputs)
            outputs = F.softmax(outputs, 1)
            #print(outputs)
            predicted_probability, predicted = torch.max(outputs.data, 1)

            frame_time_end = datetime.datetime.now()  # frame end time

            time_per_image = (frame_time_end -
                              frame_time_start).total_seconds()
            #print((predicted == labels).sum())
            total += labels.size(0)
            correct += (predicted == labels).sum()
            all_labels_d = torch.cat((all_labels_d, labels), 0)
            all_predictions_d = torch.cat((all_predictions_d, predicted), 0)
            all_predictions_probabilities_d = torch.cat(
                (all_predictions_probabilities_d, predicted_probability), 0)
            all_timePerFrame_host = all_timePerFrame_host + [time_per_image]

    print('copying some data back to cpu for generating confusion matrix...')
    y_true = all_labels_d.cpu()
    y_predicted = all_predictions_d.cpu()  # to('cpu')
    testset_predicted_probabilites = all_predictions_probabilities_d.cpu(
    )  # to('cpu')

    class_names = test_datasets.classes  # taking class names for plotting confusion matrix
    cm = confusion_matrix(y_true, y_predicted,
                          target_number_labels)  # confusion matrix

    print('Accuracy of the network on the %d test images: %f %%' %
          (total, (100.0 * correct / total)))

    print(cm)

    print("taking class names to plot CM")

    print("Generating confution matrix")

    plot_confusion_matrix(cm, classes=class_names, title='my confusion matrix')
    #plot_confusion_matrix(cm, classes=target_number_labels, title='my confusion matrix')

    # print('confusion matrix saved to ', plot_dir)

    ##################################################################
    # classification report
    #################################################################
    #print(classification_report(y_true, y_predicted, target_names=target_number_labels))

    ##################################################################
    # Standard metrics for medico Task
    #################################################################
    print("Printing standard metric for medico task")

    print("Accuracy =", mtc.accuracy_score(y_true, y_predicted))
    print("Precision score =",
          mtc.precision_score(y_true, y_predicted, average="weighted"))
    print("Recall score =",
          mtc.recall_score(y_true, y_predicted, average="weighted"))
    print("F1 score =", mtc.f1_score(y_true, y_predicted, average="weighted"))
    print("Specificity =")
    print("MCC =", mtc.matthews_corrcoef(y_true, y_predicted))

    ##################################################################
    # Standard metrics for medico Task
    #################################################################
    print("Printing standard metric for medico task")

    print("1. Recall score (REC) =",
          mtc.recall_score(y_true, y_predicted, average="weighted"))
    print("2. Precision score (PREC) =",
          mtc.precision_score(y_true, y_predicted, average="weighted"))
    print("3. Specificity (SPEC) =")
    print("4. Accuracy (ACC) =", mtc.accuracy_score(y_true, y_predicted))
    print("5. Matthews correlation coefficient(MCC) =",
          mtc.matthews_corrcoef(y_true, y_predicted))

    print("6. F1 score (F1) =",
          mtc.f1_score(y_true, y_predicted, average="weighted"))

    panda_cm_data = ConfusionMatrix(y_true, y_predicted)
    panda_cm_data.print_stats()
    cm_dictionary = panda_cm_data.stats()
    print("cm _ dictionary saving")
    f = open(os.path.join(history_dir, "20_5_cm_dictionary.pkl"), "wb")
    pickle.dump(cm_dictionary['class'], f)
    f.close()

    print('Finished.. ')

    print('Finished.. ')

    return y_predicted, testset_predicted_probabilites, all_timePerFrame_host
confusion_matrix.plot(normalized=True)
plt.show()


# In[176]:


confusion_matrix.print_stats()


# ### Percentage of  Fraud Trasaction which model detected incorrectly

# In[177]:


print("FNR is {0}".format(confusion_matrix.stats()['FNR']))


# #### Plotting ROC curve

# In[178]:

logit_roc_auc = roc_auc_score(y_test, lr.predict(X_test))
fpr, tpr, thresholds = roc_curve(y_predicted, lr.predict_proba(X_test)[:,1])

plt.figure()
plt.plot(fpr, tpr, label='Logistic Regression (area = %0.2f)' % logit_roc_auc)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
Beispiel #25
0
def test_model(test_model, test_dataloader):
    print("Testing started..")
    test_model.eval()
    correct = 0
    total = 0
    all_labels_d = torch.tensor([], dtype=torch.long).to(device)
    all_predictions_d = torch.tensor([], dtype=torch.long).to(device)

    with torch.no_grad():
        for inputs, labels in test_dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = test_model(inputs)
            #outputs = (outputs1*0.6 + outputs2*0.4)/2
            _, predicted = torch.max(outputs.data, 1)
            print((predicted == labels).sum())
            total += labels.size(0)
            correct += (predicted == labels).sum()
            all_labels_d = torch.cat((all_labels_d, labels), 0)
            all_predictions_d = torch.cat((all_predictions_d, predicted), 0)

    print('copying some data back to cpu for generating confusion matrix...')
    testset_labels = all_labels_d.cpu()
    testset_predicted_labels = all_predictions_d.cpu()  # to('cpu')

    cm = confusion_matrix(testset_labels,
                          testset_predicted_labels)  # confusion matrix

    print('Accuracy of the network on the %d test images: %f %%' %
          (total, (100.0 * correct / total)))

    print(cm)

    print("taking class names to plot CM")

    class_names = test_datasets.classes  # taking class names for plotting confusion matrix

    print("Generating confution matrix")

    plot_confusion_matrix(cm, classes=class_names, title='my confusion matrix')

    print('confusion matrix saved to ', plot_dir)

    ##################################################################
    # classification report
    #################################################################
    print(
        classification_report(testset_labels,
                              testset_predicted_labels,
                              target_names=class_names))

    ##################################################################
    # Standard metrics for medico Task
    #################################################################
    print("Printing standard metric for medico task")

    weights = [
        1 / 53, 1 / 81, 1 / 138, 1 / 125, 1 / 134, 1 / 11, 1 / 125, 1 / 132,
        1 / 132, 1 / 4, 1 / 184, 1 / 72, 1 / 120, 1 / 39, 1 / 110, 1 / 138
    ]

    print(
        "1. Recall score (REC) =",
        mtc.recall_score(testset_labels,
                         testset_predicted_labels,
                         average="weighted"))
    print(
        "2. Precision score (PREC) =",
        mtc.precision_score(testset_labels,
                            testset_predicted_labels,
                            average="weighted"))
    print("3. Specificity (SPEC) =")
    print(
        "4. Accuracy (ACC) =",
        mtc.accuracy_score(testset_labels, testset_predicted_labels, weights))
    print("5. Matthews correlation coefficient(MCC) =",
          mtc.matthews_corrcoef(testset_labels, testset_predicted_labels))

    print(
        "6. F1 score (F1) =",
        mtc.f1_score(testset_labels,
                     testset_predicted_labels,
                     average="weighted"))

    panda_cm_data = ConfusionMatrix(testset_labels, testset_predicted_labels)
    panda_cm_data.print_stats()
    cm_dictionary = panda_cm_data.stats()
    print("cm _ dictionary saving")
    f = open(os.path.join(history_dir, "24_3_cm_dictionary.pkl"), "wb")
    pickle.dump(cm_dictionary['class'], f)
    f.close()

    print('Finished.. ')
scores = cross_val_score(clf, X, y, cv=5)
print scores.mean(), scores

from sklearn.metrics import classification_report
le = preprocessing.LabelEncoder()
le.fit(y.as_matrix())
target_names = le.classes_

print classification_report(y_test, y_predictions, target_names=target_names)

print collections.Counter(y_test.factorize()[0])
'''
This will plot the correlation between the attributes
from pandas.tools.plotting import scatter_matrix
scatter_matrix(df, alpha=0.2, figsize=(6, 6), diagonal='kde')
plt.show()

'''
'''


-----------Printing the stats--------------
cm.stats()
'''
from pandas.tools.plotting import scatter_matrix
scatter_matrix(df, alpha=0.2, figsize=(6, 6), diagonal='kde')
plt.show()

#show plots
#plt.show()