def queryExpanEva(extend,documentAddress,queryAddress,resultAddress): expandedTerm=userLogExpan(extend,documentAddress,queryAddress) # expandedTerm=queryExpand1(document,extend,queryAddress) print(expandedTerm) query1=calculate_query(get_query(queryAddress)) result=readFile(resultAddress) for num in query1: for term in expandedTerm: query1[num][term]=query1[num].get(term,0) + 1 #print(query1) for e in extend: # document1, length_by_docid1 = make_invidx(documentAddress, e) # simi1= cosine_distance(document1,query1,1000) document2 = make_invidx_by_OkapiBM25(documentAddress, e) # simi1= cosine_distance(document1,query1,50) # print(simi1) for num in query1: # print("tf-idf: ") # print(MRR(result, simi1[num])) # print(recall(result,simi1[num])) # document3= make_index_AIG(documentAddress,e,query1[num]) # simi3=sum_score(document3,1000) simi2=sum_keyword(document2,query1[num],1000) # print("AIG: ") # print(MRR(result, simi3)) # print(recall(result,simi3)) print("BM25: ") print(MRR(result, simi2))
def get_topK(queryAddress,documentAddress,extend): for e in extend: document1, length_by_docid1 = make_invidx(documentAddress, e) document2 = make_invidx_by_OkapiBM25(documentAddress, e) #print(document) #print(length_by_docid) #saveIndex(index, (document, length_by_docid)) #document, length_by_docid = loadIndex(index) #get query query1=calculate_query(get_query(queryAddress)) for num in query1: document3= make_index_AIG(documentAddress,e,query1[num]) simi3=sum_score(document3,10) simi2=sum_keyword(document2,query1[num],10) #print(simi) #document= make_index_AIG(documentAddress,e,query1[num]) simi1= cosine_distance(document1,query1,10) # simi=sum_keyword(document, query1[num],10) #print(query1) #cosine similarity #simi=score(document,query1,10) #simi= cosine_distance(document,query1,10) #print(simi) for num in simi1: for weight1,docid in simi1[num]: #print(docid) for weight2,docid2 in simi2: if docid==docid2: print(docid2," : ",weight2) for weight3,docid3 in simi3: if docid==docid3: print(docid3,":",weight3) for weight2,docid2 in simi2: for weight3,docid3 in simi3: if docid2==docid3: print(docid3,":",weight3)
def evaluation(extend, documentAddress, queryAddress,resultAddress): result=readFile(resultAddress) #print(result) #tf-idf for e in extend: # document1, length_by_docid1 = make_invidx(documentAddress, e) query1=calculate_query(get_query(queryAddress)) # simi1= cosine_distance(document1,query1,1000) retrieval =0.0 relevance=0.0 document2 = make_invidx_by_OkapiBM25(documentAddress, e) for num in query1: # print("tf-idf: ") # print(MRR(result, simi1[num])) #print(recall(result,simi1[num])) # document3= make_index_AIG(documentAddress,e,query1[num]) # simi3=sum_score(document3,1000) # print("AIG: ") # print(MRR(result, simi3)) #print(recall(result,simi3)) simi2=sum_keyword(document2,query1[num],1000) print("BM25: ") print(MRR(result, simi2))