Beispiel #1
0
def generate_result(doc_names, true_labels, pred_labels, title):
    count_dict = Counter(true_labels)
    num_labels = np.array([count_dict[lab] for lab in doc_names])
    conf_matrix, conf_matrix_string = confusion_matrix(true_labels, pred_labels, doc_names)
    tpr,fpr = roc_tpr_fpr(true_labels,pred_labels)
    print tpr,fpr
    acc = getAccuracy(conf_matrix, num_labels)
    print acc, np.average(np.array(pred_labels) == np.array(true_labels))
    print "---------------"
    display_confusion_matrix(conf_matrix, conf_matrix_string, num_labels, doc_names,title)
    return conf_matrix, conf_matrix_string, tpr, fpr, acc
ax = plt.gca()
#i = 0
#for xy in zip(fpr_list, tpr_list): 
#    thresh = thresh_list[i]
#    print thresh
#    ax.annotate(str(thresh), xy=xy, textcoords='data')
plt.title("JACCARD ROC CURVE")
plt.xlabel("FPR of Rumour")
plt.ylabel("TPR of Rumour")
pylab.savefig("roc_curve_jaccard.png",format="png")
'''


# For displaying confusion matrix
result = query_list_jacc(search_text_list, docMap, docSpace,thresh)
#print "score ", score_dict
#pickle.dump(result, open('resultwithna.p','w'))
#print "\n".join(result)
doc_names = ['R1','R3','R4','R5','R7','NA']
count_dict = Counter(true_labels)
num_labels = np.array([count_dict[lab] for lab in doc_names])
conf_matrix, conf_matrix_string = confusion_matrix(true_labels, result, doc_names)
tpr,fpr = roc_tpr_fpr(true_labels,result)
print tpr,fpr
display_confusion_matrix(conf_matrix, conf_matrix_string, num_labels, doc_names,"Jacc_" +str(thresh))
data = {'num_labels' : num_labels, 'conf_matrix':conf_matrix, 
        'conf_matrix_string':conf_matrix_string, 'true_labels':true_labels,
        'pred_labels':result, 'doc_names':doc_names}

pickle.dump(data,open('confwithna.p','w'))
# search_text_list = get_tweet_excel(tweet_filename, "test_jaccard", 2)
# true_labels = get_tweet_excel(tweet_filename,"test_jaccard",3)

data = pickle.load(open('rumour.p','r'))
docMap = data['docMap']
docSpace = data['docSpace']
termMap = data['termMap']
index = data['index']
search_text_list = data['search_text_list']
true_labels = data['true_labels']
queryIndex = QueryIndex(index, docSpace, docMap, termMap)
# pickle.dump({'docMap':docMap, 'docSpace':docSpace, 
#              'termMap':termMap, 'index':index, 
#              'search_text_list':search_text_list, 'true_labels':true_labels},open('rumour.p','w'))
thresh = 0.6
# thresh_list  = np.arange(0,4,0.1);
# generate_roc_curve(search_text_list, docMap, queryIndex, thresh_list, true_labels, "VSM_ROC_CURVE")
pred_labels = query_list_vsm(search_text_list, docMap, queryIndex, thresh)
 
doc_names = ['R1','R3','R4','R5','R7','NA']
count_dict = Counter(true_labels)
num_labels = np.array([count_dict[lab] for lab in doc_names])
conf_matrix, conf_matrix_string = confusion_matrix(true_labels, pred_labels, doc_names)
tpr,fpr = roc_tpr_fpr(true_labels,pred_labels)
print thresh
print tpr,fpr
acc = getAccuracy(conf_matrix, num_labels)
print acc, np.average(np.array(pred_labels) == np.array(true_labels))
print "---------------"
# # display_confusion_matrix(conf_matrix, conf_matrix_string, num_labels, doc_names,"VSM_" +str(thresh))
Beispiel #4
0
#    ax.annotate(str(thresh), xy=xy, textcoords='data')
plt.title("JACCARD ROC CURVE")
plt.xlabel("FPR of Rumour")
plt.ylabel("TPR of Rumour")
pylab.savefig("roc_curve_jaccard.png",format="png")
'''

# For displaying confusion matrix
result = query_list_jacc(search_text_list, docMap, docSpace, thresh)
#print "score ", score_dict
#pickle.dump(result, open('resultwithna.p','w'))
#print "\n".join(result)
doc_names = ['R1', 'R3', 'R4', 'R5', 'R7', 'NA']
count_dict = Counter(true_labels)
num_labels = np.array([count_dict[lab] for lab in doc_names])
conf_matrix, conf_matrix_string = confusion_matrix(true_labels, result,
                                                   doc_names)
tpr, fpr = roc_tpr_fpr(true_labels, result)
print tpr, fpr
display_confusion_matrix(conf_matrix, conf_matrix_string, num_labels,
                         doc_names, "Jacc_" + str(thresh))
data = {
    'num_labels': num_labels,
    'conf_matrix': conf_matrix,
    'conf_matrix_string': conf_matrix_string,
    'true_labels': true_labels,
    'pred_labels': result,
    'doc_names': doc_names
}

pickle.dump(data, open('confwithna.p', 'w'))
Beispiel #5
0
# true_labels = get_tweet_excel(tweet_filename,"test_jaccard",3)

data = pickle.load(open('rumour.p', 'r'))
docMap = data['docMap']
docSpace = data['docSpace']
termMap = data['termMap']
index = data['index']
search_text_list = data['search_text_list']
true_labels = data['true_labels']
queryIndex = QueryIndex(index, docSpace, docMap, termMap)
# pickle.dump({'docMap':docMap, 'docSpace':docSpace,
#              'termMap':termMap, 'index':index,
#              'search_text_list':search_text_list, 'true_labels':true_labels},open('rumour.p','w'))
thresh = 0.6
# thresh_list  = np.arange(0,4,0.1);
# generate_roc_curve(search_text_list, docMap, queryIndex, thresh_list, true_labels, "VSM_ROC_CURVE")
pred_labels = query_list_vsm(search_text_list, docMap, queryIndex, thresh)

doc_names = ['R1', 'R3', 'R4', 'R5', 'R7', 'NA']
count_dict = Counter(true_labels)
num_labels = np.array([count_dict[lab] for lab in doc_names])
conf_matrix, conf_matrix_string = confusion_matrix(true_labels, pred_labels,
                                                   doc_names)
tpr, fpr = roc_tpr_fpr(true_labels, pred_labels)
print thresh
print tpr, fpr
acc = getAccuracy(conf_matrix, num_labels)
print acc, np.average(np.array(pred_labels) == np.array(true_labels))
print "---------------"
# # display_confusion_matrix(conf_matrix, conf_matrix_string, num_labels, doc_names,"VSM_" +str(thresh))