Esempio n. 1
0
            std_dict = bay.find_std(dict_info, mean_dict)
            prior = bay.find_priors(dict_info)

            #print(len(training_set))
            #print(len(test_set))
            _, predictions0 = bay.fun_bayes(training_set, test_set,
                                            training_label, test_label)
            acc_funbayes += _
            micro_funbayes += f1_score(test_label,
                                       predictions0,
                                       average='micro')
            macro_funbayes += f1_score(test_label,
                                       predictions0,
                                       average='macro')

            _, predictions1 = nn.fun_knn(training_set, test_set,
                                         training_label, test_label)
            acc_funknn += _
            micro_funknn += f1_score(test_label, predictions1, average='micro')
            macro_funknn += f1_score(test_label, predictions1, average='macro')

            _, predictions2 = nn.knn(training_set, training_label, test_set,
                                     test_label)
            acc_knn += _
            micro_knn += f1_score(test_label, predictions2, average='micro')
            macro_knn += f1_score(test_label, predictions2, average='macro')

            _, predictions3 = bay.bayes(mean_dict, std_dict, test_set,
                                        test_label, prior)
            acc_bayes += _
            micro_bayes += f1_score(test_label, predictions3, average='micro')
            macro_bayes += f1_score(test_label, predictions3, average='macro')
Esempio n. 2
0
def pubmed():
    train = load_data("E:\\pubmed.csv")
    m, n = train.shape
    labels = load_data("E:\\pubmed_label.csv")
    train_copy = train
    list_k_fold = man_split(train_copy, labels, 5)
    acc_bayes = 0
    acc_funbayes = 0
    acc_knn = 0
    acc_funknn = 0

    #print("done")

    micro_bayes = 0
    micro_funbayes = 0
    micro_knn = 0
    micro_funknn = 0
    macro_bayes = 0
    macro_funbayes = 0
    macro_knn = 0
    macro_funknn = 0
    for k1 in range(5):
        #print(k1)
        #print("k1=",end=' ')
        #print(k1)
        test_set = []
        training_set = []
        training_label = []
        test_label = []
        prior = {}
        label1 = []
        #print(list_k_fold)
        for i in range(5):
            if i == k1:
                label1.extend(list_k_fold[i])

        #print("done")

        for i2 in range(len(labels)):
            if i2 in label1:
                test_set.append(train_copy[i2])
                test_label.append(labels[i2])
            else:
                training_set.append(train_copy[i2])
                #print(trainset)
                training_label.append(labels[i2])

        #print("done")

        dict_info = {}
        dict_info = form_dict(training_set, training_label)
        mean_dict = bay.find_mean(dict_info)
        std_dict = bay.find_std(dict_info, mean_dict)
        prior = bay.find_priors(dict_info)

        _, predictions0 = bay.fun_bayes(training_set, test_set, training_label,
                                        test_label)

        acc_funbayes += _
        micro_funbayes += f1_score(test_label, predictions0, average='micro')
        macro_funbayes += f1_score(test_label, predictions0, average='macro')

        #print(macro_funbayes)
        #print("a")

        _, predictions1 = nn.fun_knn(training_set, test_set, training_label,
                                     test_label)
        acc_funknn += _
        micro_funknn += f1_score(test_label, predictions1, average='micro')
        macro_funknn += f1_score(test_label, predictions1, average='macro')
        #print("b")

        _, predictions2 = nn.knn(training_set, training_label, test_set,
                                 test_label)
        acc_knn += _

        micro_knn += f1_score(test_label, predictions2, average='micro')
        macro_knn += f1_score(test_label, predictions2, average='macro')

        _, predictions3 = bay.bayes(mean_dict, std_dict, test_set, test_label,
                                    prior)
        acc_bayes += _
        micro_bayes += f1_score(test_label, predictions3, average='micro')
        macro_bayes += f1_score(test_label, predictions3, average='macro')
        #print("c")

    file1.write("Test Accuracy on pubmed using inbuilt bayes  ::" +
                str(acc_funbayes / 5) + "\n")
    file1.write("Test Accuracy on pubmed using inbuilt knn ::" +
                str(acc_funknn / 5) + "\n")
    file.write("Test Accuracy on pubmed using my bayes ::" +
               str(acc_bayes / 5) + "\n")
    file.write("Test Accuracy on pubmed using my knn ::" + str(acc_knn / 5) +
               "\n \n")

    file1.write("Test Macro F1 Score on pubmed using inbuilt bayes ::" +
                str(macro_funbayes / 5) + "\n")
    file1.write("Test Macro F1 Score on pubmed using inbuilt knn ::" +
                str(macro_funknn / 5) + "\n")
    file.write("Test Macro F1 Score on pubmed using my bayes ::" +
               str(macro_bayes / 5) + "\n")
    file.write("Test Macro F1 Score on pubmed using my knn ::" +
               str(macro_knn / 5) + "\n \n")

    file1.write("Test Micro F1 Score on pubmed using inbuilt bayes ::" +
                str(micro_funbayes / 5) + "\n")
    file1.write("Test Micro F1 Score on pubmed using inbuilt knn ::" +
                str(micro_funknn / 5) + "\n")
    file.write("Test Micro F1 Score on pubmed using my bayes ::" +
               str(micro_bayes / 5) + "\n")
    file.write("Test Micro F1 Score on pubmed using my knn ::" +
               str(micro_knn / 5) + "\n \n")
Esempio n. 3
0
def start(training_set11,training_label11,test_set,test_label,ii):
    curr=os.getcwd()
    final=os.path.join(curr,r'LSH_files')
    if not os.path.exists(final):
        os.makedirs(final)


    file4=open(final + '/task7'+str(ii)+'.txt','w')


    n,m=training_set11.shape
    i3=2

    x=[]
    a=[]
    b=[]
    c=[]
    list1=[]
    list2=[]
    list3=[]
    list4=[]
    list5=[]
    list6=[]
    while(i3<=m):
        #print(i3,end=" ")
       # list_k_fold=man_split(training_set11,training_label11,5)
        train_copy=lsh_ac(training_set11,i3,ii)
        test_copy=lsh_ac(test_set,i3,ii)
        train_copy_pca=np.array(pca(training_set11,i3))
        test_copy_pca=np.array(pca(test_set,i3))

        #print(len(train_copy))

        accuracy=0
        micro=0
        macro=0
        accuracy_pca=0
        micro_pca=0
        macro_pca=0


            #print("-------------")
            #print(len(training_set))
        find_dict=hash_iter1(train_copy,test_copy,20)
        _,predictions=start_lsh(find_dict,train_copy,training_label11,test_copy,test_label)
        #print(predictions)
        accuracy+= _
        micro+=f1_score(test_label,predictions,average='micro')
        macro+=f1_score(test_label,predictions,average='macro')

        _,pred1=nn.fun_knn(train_copy_pca,test_copy_pca,training_label11,test_label)
        accuracy_pca+=_
        micro_pca+=f1_score(test_label,pred1,average='micro')
        macro_pca+=f1_score(test_label,pred1,average='macro')

        x.append(i3)
        list1.append(accuracy)

        list2.append(macro)
        list3.append(micro)
        list4.append(accuracy_pca)
        list5.append(macro_pca)
        list6.append(micro_pca)

        file4.write("Test Accuracy on dataset " + str(ii)+ " using LSH on d = "+str(i3)+"="+str(accuracy) +"\n")
        file4.write("Test Accuracy on dataset " + str(ii)+ " using PCA on d = "+str(i3)+"="+str(accuracy_pca) +"\n")
        file4.write("Macro Score on dataset " + str(ii)+ " using LSH on d = "+str(i3)+"="+str(macro) +"\n")
        file4.write("Macro Score on dataset " + str(ii)+ " using PCA on d = "+str(i3)+"="+str(macro_pca) +"\n")
        file4.write("Micro Score on dataset " + str(ii)+ " using LSH on d = "+str(i3)+"="+str(micro) +"\n")
        file4.write("Micro Score on dataset " + str(ii)+ " using PCA on d = "+str(i3)+"="+str(micro_pca) +"\n")
        i3=i3*2
        print(accuracy)
#       print(micro/5)
#      print(macro/5)
#     print(accuracy_pca/5)
#    print(micro_pca/5)
 #   print(macro_pca/5)
    a.append(list1)
    a.append(list4)
    b.append(list2)
    b.append(list5)
    c.append(list3)
    c.append(list6)
    plot(x,a,1+(3*(ii-1)))
    plot(x,b,2+(3*(ii-1)))
    plot(x,c,3+(3*(ii-1)))