std_dict = bay.find_std(dict_info, mean_dict) prior = bay.find_priors(dict_info) #print(len(training_set)) #print(len(test_set)) _, predictions0 = bay.fun_bayes(training_set, test_set, training_label, test_label) acc_funbayes += _ micro_funbayes += f1_score(test_label, predictions0, average='micro') macro_funbayes += f1_score(test_label, predictions0, average='macro') _, predictions1 = nn.fun_knn(training_set, test_set, training_label, test_label) acc_funknn += _ micro_funknn += f1_score(test_label, predictions1, average='micro') macro_funknn += f1_score(test_label, predictions1, average='macro') _, predictions2 = nn.knn(training_set, training_label, test_set, test_label) acc_knn += _ micro_knn += f1_score(test_label, predictions2, average='micro') macro_knn += f1_score(test_label, predictions2, average='macro') _, predictions3 = bay.bayes(mean_dict, std_dict, test_set, test_label, prior) acc_bayes += _ micro_bayes += f1_score(test_label, predictions3, average='micro') macro_bayes += f1_score(test_label, predictions3, average='macro')
def pubmed(): train = load_data("E:\\pubmed.csv") m, n = train.shape labels = load_data("E:\\pubmed_label.csv") train_copy = train list_k_fold = man_split(train_copy, labels, 5) acc_bayes = 0 acc_funbayes = 0 acc_knn = 0 acc_funknn = 0 #print("done") micro_bayes = 0 micro_funbayes = 0 micro_knn = 0 micro_funknn = 0 macro_bayes = 0 macro_funbayes = 0 macro_knn = 0 macro_funknn = 0 for k1 in range(5): #print(k1) #print("k1=",end=' ') #print(k1) test_set = [] training_set = [] training_label = [] test_label = [] prior = {} label1 = [] #print(list_k_fold) for i in range(5): if i == k1: label1.extend(list_k_fold[i]) #print("done") for i2 in range(len(labels)): if i2 in label1: test_set.append(train_copy[i2]) test_label.append(labels[i2]) else: training_set.append(train_copy[i2]) #print(trainset) training_label.append(labels[i2]) #print("done") dict_info = {} dict_info = form_dict(training_set, training_label) mean_dict = bay.find_mean(dict_info) std_dict = bay.find_std(dict_info, mean_dict) prior = bay.find_priors(dict_info) _, predictions0 = bay.fun_bayes(training_set, test_set, training_label, test_label) acc_funbayes += _ micro_funbayes += f1_score(test_label, predictions0, average='micro') macro_funbayes += f1_score(test_label, predictions0, average='macro') #print(macro_funbayes) #print("a") _, predictions1 = nn.fun_knn(training_set, test_set, training_label, test_label) acc_funknn += _ micro_funknn += f1_score(test_label, predictions1, average='micro') macro_funknn += f1_score(test_label, predictions1, average='macro') #print("b") _, predictions2 = nn.knn(training_set, training_label, test_set, test_label) acc_knn += _ micro_knn += f1_score(test_label, predictions2, average='micro') macro_knn += f1_score(test_label, predictions2, average='macro') _, predictions3 = bay.bayes(mean_dict, std_dict, test_set, test_label, prior) acc_bayes += _ micro_bayes += f1_score(test_label, predictions3, average='micro') macro_bayes += f1_score(test_label, predictions3, average='macro') #print("c") file1.write("Test Accuracy on pubmed using inbuilt bayes ::" + str(acc_funbayes / 5) + "\n") file1.write("Test Accuracy on pubmed using inbuilt knn ::" + str(acc_funknn / 5) + "\n") file.write("Test Accuracy on pubmed using my bayes ::" + str(acc_bayes / 5) + "\n") file.write("Test Accuracy on pubmed using my knn ::" + str(acc_knn / 5) + "\n \n") file1.write("Test Macro F1 Score on pubmed using inbuilt bayes ::" + str(macro_funbayes / 5) + "\n") file1.write("Test Macro F1 Score on pubmed using inbuilt knn ::" + str(macro_funknn / 5) + "\n") file.write("Test Macro F1 Score on pubmed using my bayes ::" + str(macro_bayes / 5) + "\n") file.write("Test Macro F1 Score on pubmed using my knn ::" + str(macro_knn / 5) + "\n \n") file1.write("Test Micro F1 Score on pubmed using inbuilt bayes ::" + str(micro_funbayes / 5) + "\n") file1.write("Test Micro F1 Score on pubmed using inbuilt knn ::" + str(micro_funknn / 5) + "\n") file.write("Test Micro F1 Score on pubmed using my bayes ::" + str(micro_bayes / 5) + "\n") file.write("Test Micro F1 Score on pubmed using my knn ::" + str(micro_knn / 5) + "\n \n")
def start(training_set11,training_label11,test_set,test_label,ii): curr=os.getcwd() final=os.path.join(curr,r'LSH_files') if not os.path.exists(final): os.makedirs(final) file4=open(final + '/task7'+str(ii)+'.txt','w') n,m=training_set11.shape i3=2 x=[] a=[] b=[] c=[] list1=[] list2=[] list3=[] list4=[] list5=[] list6=[] while(i3<=m): #print(i3,end=" ") # list_k_fold=man_split(training_set11,training_label11,5) train_copy=lsh_ac(training_set11,i3,ii) test_copy=lsh_ac(test_set,i3,ii) train_copy_pca=np.array(pca(training_set11,i3)) test_copy_pca=np.array(pca(test_set,i3)) #print(len(train_copy)) accuracy=0 micro=0 macro=0 accuracy_pca=0 micro_pca=0 macro_pca=0 #print("-------------") #print(len(training_set)) find_dict=hash_iter1(train_copy,test_copy,20) _,predictions=start_lsh(find_dict,train_copy,training_label11,test_copy,test_label) #print(predictions) accuracy+= _ micro+=f1_score(test_label,predictions,average='micro') macro+=f1_score(test_label,predictions,average='macro') _,pred1=nn.fun_knn(train_copy_pca,test_copy_pca,training_label11,test_label) accuracy_pca+=_ micro_pca+=f1_score(test_label,pred1,average='micro') macro_pca+=f1_score(test_label,pred1,average='macro') x.append(i3) list1.append(accuracy) list2.append(macro) list3.append(micro) list4.append(accuracy_pca) list5.append(macro_pca) list6.append(micro_pca) file4.write("Test Accuracy on dataset " + str(ii)+ " using LSH on d = "+str(i3)+"="+str(accuracy) +"\n") file4.write("Test Accuracy on dataset " + str(ii)+ " using PCA on d = "+str(i3)+"="+str(accuracy_pca) +"\n") file4.write("Macro Score on dataset " + str(ii)+ " using LSH on d = "+str(i3)+"="+str(macro) +"\n") file4.write("Macro Score on dataset " + str(ii)+ " using PCA on d = "+str(i3)+"="+str(macro_pca) +"\n") file4.write("Micro Score on dataset " + str(ii)+ " using LSH on d = "+str(i3)+"="+str(micro) +"\n") file4.write("Micro Score on dataset " + str(ii)+ " using PCA on d = "+str(i3)+"="+str(micro_pca) +"\n") i3=i3*2 print(accuracy) # print(micro/5) # print(macro/5) # print(accuracy_pca/5) # print(micro_pca/5) # print(macro_pca/5) a.append(list1) a.append(list4) b.append(list2) b.append(list5) c.append(list3) c.append(list6) plot(x,a,1+(3*(ii-1))) plot(x,b,2+(3*(ii-1))) plot(x,c,3+(3*(ii-1)))