def generate_result(doc_names, true_labels, pred_labels, title): count_dict = Counter(true_labels) num_labels = np.array([count_dict[lab] for lab in doc_names]) conf_matrix, conf_matrix_string = confusion_matrix(true_labels, pred_labels, doc_names) tpr,fpr = roc_tpr_fpr(true_labels,pred_labels) print tpr,fpr acc = getAccuracy(conf_matrix, num_labels) print acc, np.average(np.array(pred_labels) == np.array(true_labels)) print "---------------" display_confusion_matrix(conf_matrix, conf_matrix_string, num_labels, doc_names,title) return conf_matrix, conf_matrix_string, tpr, fpr, acc
ax = plt.gca() #i = 0 #for xy in zip(fpr_list, tpr_list): # thresh = thresh_list[i] # print thresh # ax.annotate(str(thresh), xy=xy, textcoords='data') plt.title("JACCARD ROC CURVE") plt.xlabel("FPR of Rumour") plt.ylabel("TPR of Rumour") pylab.savefig("roc_curve_jaccard.png",format="png") ''' # For displaying confusion matrix result = query_list_jacc(search_text_list, docMap, docSpace,thresh) #print "score ", score_dict #pickle.dump(result, open('resultwithna.p','w')) #print "\n".join(result) doc_names = ['R1','R3','R4','R5','R7','NA'] count_dict = Counter(true_labels) num_labels = np.array([count_dict[lab] for lab in doc_names]) conf_matrix, conf_matrix_string = confusion_matrix(true_labels, result, doc_names) tpr,fpr = roc_tpr_fpr(true_labels,result) print tpr,fpr display_confusion_matrix(conf_matrix, conf_matrix_string, num_labels, doc_names,"Jacc_" +str(thresh)) data = {'num_labels' : num_labels, 'conf_matrix':conf_matrix, 'conf_matrix_string':conf_matrix_string, 'true_labels':true_labels, 'pred_labels':result, 'doc_names':doc_names} pickle.dump(data,open('confwithna.p','w'))
# search_text_list = get_tweet_excel(tweet_filename, "test_jaccard", 2) # true_labels = get_tweet_excel(tweet_filename,"test_jaccard",3) data = pickle.load(open('rumour.p','r')) docMap = data['docMap'] docSpace = data['docSpace'] termMap = data['termMap'] index = data['index'] search_text_list = data['search_text_list'] true_labels = data['true_labels'] queryIndex = QueryIndex(index, docSpace, docMap, termMap) # pickle.dump({'docMap':docMap, 'docSpace':docSpace, # 'termMap':termMap, 'index':index, # 'search_text_list':search_text_list, 'true_labels':true_labels},open('rumour.p','w')) thresh = 0.6 # thresh_list = np.arange(0,4,0.1); # generate_roc_curve(search_text_list, docMap, queryIndex, thresh_list, true_labels, "VSM_ROC_CURVE") pred_labels = query_list_vsm(search_text_list, docMap, queryIndex, thresh) doc_names = ['R1','R3','R4','R5','R7','NA'] count_dict = Counter(true_labels) num_labels = np.array([count_dict[lab] for lab in doc_names]) conf_matrix, conf_matrix_string = confusion_matrix(true_labels, pred_labels, doc_names) tpr,fpr = roc_tpr_fpr(true_labels,pred_labels) print thresh print tpr,fpr acc = getAccuracy(conf_matrix, num_labels) print acc, np.average(np.array(pred_labels) == np.array(true_labels)) print "---------------" # # display_confusion_matrix(conf_matrix, conf_matrix_string, num_labels, doc_names,"VSM_" +str(thresh))
# ax.annotate(str(thresh), xy=xy, textcoords='data') plt.title("JACCARD ROC CURVE") plt.xlabel("FPR of Rumour") plt.ylabel("TPR of Rumour") pylab.savefig("roc_curve_jaccard.png",format="png") ''' # For displaying confusion matrix result = query_list_jacc(search_text_list, docMap, docSpace, thresh) #print "score ", score_dict #pickle.dump(result, open('resultwithna.p','w')) #print "\n".join(result) doc_names = ['R1', 'R3', 'R4', 'R5', 'R7', 'NA'] count_dict = Counter(true_labels) num_labels = np.array([count_dict[lab] for lab in doc_names]) conf_matrix, conf_matrix_string = confusion_matrix(true_labels, result, doc_names) tpr, fpr = roc_tpr_fpr(true_labels, result) print tpr, fpr display_confusion_matrix(conf_matrix, conf_matrix_string, num_labels, doc_names, "Jacc_" + str(thresh)) data = { 'num_labels': num_labels, 'conf_matrix': conf_matrix, 'conf_matrix_string': conf_matrix_string, 'true_labels': true_labels, 'pred_labels': result, 'doc_names': doc_names } pickle.dump(data, open('confwithna.p', 'w'))
# true_labels = get_tweet_excel(tweet_filename,"test_jaccard",3) data = pickle.load(open('rumour.p', 'r')) docMap = data['docMap'] docSpace = data['docSpace'] termMap = data['termMap'] index = data['index'] search_text_list = data['search_text_list'] true_labels = data['true_labels'] queryIndex = QueryIndex(index, docSpace, docMap, termMap) # pickle.dump({'docMap':docMap, 'docSpace':docSpace, # 'termMap':termMap, 'index':index, # 'search_text_list':search_text_list, 'true_labels':true_labels},open('rumour.p','w')) thresh = 0.6 # thresh_list = np.arange(0,4,0.1); # generate_roc_curve(search_text_list, docMap, queryIndex, thresh_list, true_labels, "VSM_ROC_CURVE") pred_labels = query_list_vsm(search_text_list, docMap, queryIndex, thresh) doc_names = ['R1', 'R3', 'R4', 'R5', 'R7', 'NA'] count_dict = Counter(true_labels) num_labels = np.array([count_dict[lab] for lab in doc_names]) conf_matrix, conf_matrix_string = confusion_matrix(true_labels, pred_labels, doc_names) tpr, fpr = roc_tpr_fpr(true_labels, pred_labels) print thresh print tpr, fpr acc = getAccuracy(conf_matrix, num_labels) print acc, np.average(np.array(pred_labels) == np.array(true_labels)) print "---------------" # # display_confusion_matrix(conf_matrix, conf_matrix_string, num_labels, doc_names,"VSM_" +str(thresh))