Esempio n. 1
0
def queryExpanEva(extend,documentAddress,queryAddress,resultAddress):
    expandedTerm=userLogExpan(extend,documentAddress,queryAddress)
   # expandedTerm=queryExpand1(document,extend,queryAddress)
    print(expandedTerm)
    query1=calculate_query(get_query(queryAddress))
    result=readFile(resultAddress)
    for num in query1:
        for term in expandedTerm:
            query1[num][term]=query1[num].get(term,0) + 1
#print(query1)   
    for e in extend:
#        document1, length_by_docid1 = make_invidx(documentAddress, e)
#        simi1= cosine_distance(document1,query1,1000)
        document2 = make_invidx_by_OkapiBM25(documentAddress, e)
   # simi1= cosine_distance(document1,query1,50)
#    print(simi1)
        for num in query1:
#            print("tf-idf: ")
#            print(MRR(result, simi1[num]))
#            print(recall(result,simi1[num])) 
#            document3= make_index_AIG(documentAddress,e,query1[num])
 #           simi3=sum_score(document3,1000)
            simi2=sum_keyword(document2,query1[num],1000)
 #           print("AIG: ")
#            print(MRR(result, simi3))
#            print(recall(result,simi3))
            print("BM25: ")
            print(MRR(result, simi2))
Esempio n. 2
0
def get_topK(queryAddress,documentAddress,extend):
    for e in extend:
        document1, length_by_docid1 = make_invidx(documentAddress, e)
        document2 = make_invidx_by_OkapiBM25(documentAddress, e)
        
        
#print(document)
#print(length_by_docid)
#saveIndex(index, (document, length_by_docid))
#document, length_by_docid = loadIndex(index)
#get query
        query1=calculate_query(get_query(queryAddress))
        for num in query1:
            document3= make_index_AIG(documentAddress,e,query1[num])
            simi3=sum_score(document3,10)
            simi2=sum_keyword(document2,query1[num],10)
        #print(simi)     
        #document= make_index_AIG(documentAddress,e,query1[num])
        simi1= cosine_distance(document1,query1,10)
       # simi=sum_keyword(document, query1[num],10)
#print(query1)
#cosine similarity
#simi=score(document,query1,10)
#simi= cosine_distance(document,query1,10)

    #print(simi)
        for num in simi1:
            for weight1,docid in simi1[num]:
            #print(docid)
                for weight2,docid2 in simi2:
                    if docid==docid2:
                        print(docid2," : ",weight2)
                for weight3,docid3 in simi3:
                    if docid==docid3:
                        print(docid3,":",weight3)
            for weight2,docid2 in simi2:
                for weight3,docid3 in simi3:
                    if docid2==docid3:
                        print(docid3,":",weight3)
Esempio n. 3
0
def evaluation(extend, documentAddress, queryAddress,resultAddress):
    result=readFile(resultAddress)
    #print(result)
    #tf-idf
    for e in extend:
#        document1, length_by_docid1 = make_invidx(documentAddress, e)
        query1=calculate_query(get_query(queryAddress))
 #       simi1= cosine_distance(document1,query1,1000)
        retrieval =0.0
        relevance=0.0
        document2 = make_invidx_by_OkapiBM25(documentAddress, e)
        for num in query1:
#            print("tf-idf: ")
 #           print(MRR(result, simi1[num]))
            #print(recall(result,simi1[num]))
#            document3= make_index_AIG(documentAddress,e,query1[num])
#            simi3=sum_score(document3,1000)
#            print("AIG: ")
#            print(MRR(result, simi3))
            #print(recall(result,simi3))
            simi2=sum_keyword(document2,query1[num],1000)
            print("BM25: ")
            print(MRR(result, simi2))