def result(train_fraction, test_fraction): all_data = main.get_data(train_fraction, test_fraction) # 0 = train 1 = test testing_data = all_data[1] all_models = train_bayes.get_models(all_data[0]) tp = 0 fp = 0 tn = 0 fn = 0 #p = high #n = low for i in testing_data: if train_bayes.predict(all_models, i[0:len(i) - 1]) == i[-1]: if i[-1] == 3: tp += 1 else: tn += 1 else: if i[-1] == 3: fn += 1 else: fp += 1 # tpr = high_correct/(high_correct+high_wrong) # far = confusion_matrix = [[tn, fp], [fn, tp]] return confusion_matrix
def plotRoc(): tprList = [] farList = [] ac_list = [] ml = [] x = [] runs = 100 aux_list = (main.get_shuffle_data()) # 0 = train 1 = test part = main.partition_data(aux_list, 0, int(len(aux_list) * 0.3)) all_models = train_bayes.get_models(part[0]) for i in range(runs): x.append(i) cm = roc_result(all_models, part[1], 1 / (runs - i)) tp = cm[1][1] fp = cm[0][1] tn = cm[0][0] fn = cm[1][0] ac_list.append(((cm[0][0] + cm[1][1]) / (cm[0][0] + cm[1][1] + cm[0][1] + cm[1][0])) * 100) # tprList.append(tp/(tp+fn)) # farList.append(fp/(fp+tn)) ml.append((fp / (fp + tn), tp / (tp + fn))) for i in range(runs): farList.append(ml[i][0]) tprList.append(ml[i][1]) plt.figure(2) plt.plot(farList, tprList) plt.ylabel('TPR') plt.title("100 Run FAR vs TPR") plt.xlabel('FAR') plt.show()
def kFold(): accuracy_list = [] aux_list = (main.get_shuffle_data()) # 0 = train 1 = test for i in range(0, len(aux_list), int(len(aux_list) / 5)): if i + int(len(aux_list) / 5) <= len(aux_list): part = main.partition_data(aux_list, i, i + int(len(aux_list) / 5)) all_models = train_bayes.get_models(part[0]) cm = result1(all_models, part[1]) accuracy_list.append( ((cm[0][0] + cm[1][1]) / (cm[0][0] + cm[1][1] + cm[0][1] + cm[1][0])) * 100) normal_acc = calc_accuracy(result(0.7, 0.3)) return [accuracy_list, normal_acc]
def compare_thirty_fifty(): thirty = [] fifty = [] t = [] for i in range(30): aux_list = (main.get_shuffle_data()) # 0 = train 1 = test part = main.partition_data(aux_list, 0, int(len(aux_list) * 0.3)) all_models = train_bayes.get_models(part[0]) cm1 = result1(all_models, part[1]) part = main.partition_data(aux_list, 0, int(len(aux_list) * 0.5)) all_models = train_bayes.get_models(part[0]) cm2 = result1(all_models, part[1]) thirty.append(calc_accuracy(cm1)) fifty.append(calc_accuracy(cm2)) t.append(i) plt.figure(3) plt.plot(t, thirty, label='70-30 split') plt.plot(t, fifty, label='50-50 split') plt.ylabel('Accuracy') plt.title("30 Run accuracy 70-30 and 50-50 split") plt.xlabel('Run number') plt.legend(bbox_to_anchor=(1, 1), bbox_transform=plt.gcf().transFigure) plt.show()
if __name__ == "__main__": s = """ low(predicted) high(predicted) low(actual) tn fp high(actual) fn tp """ print(s) print("Part A. Simple Test 70% training results:") aux_list = (main.get_shuffle_data()) # 0 = train 1 = test part = main.partition_data(aux_list, 0, int(len(aux_list) * 0.3)) all_models = train_bayes.get_models(part[0]) train_bayes.save_model(all_models) cm1 = result1(all_models, part[1]) print_ac(cm1) print("Accuracy graph for 100 runs") plot_hundred_runs() compare_thirty_fifty() kfold_list = kFold() print("Mean 5 fold = ", str(train_bayes.listMean(kfold_list[0]))) print("Std dev 5 fold = ", str(train_bayes.sigma(kfold_list[0]))) print("70-30 accuracy = ", str(kfold_list[1])) # print(kFold()) # result(0.7,0.3) # result(0.5,0.5)