def queryExpanEva(extend,documentAddress,queryAddress,resultAddress): expandedTerm=userLogExpan(extend,documentAddress,queryAddress) # expandedTerm=queryExpand1(document,extend,queryAddress) print(expandedTerm) query1=calculate_query(get_query(queryAddress)) result=readFile(resultAddress) for num in query1: for term in expandedTerm: query1[num][term]=query1[num].get(term,0) + 1 #print(query1) for e in extend: # document1, length_by_docid1 = make_invidx(documentAddress, e) # simi1= cosine_distance(document1,query1,1000) document2 = make_invidx_by_OkapiBM25(documentAddress, e) # simi1= cosine_distance(document1,query1,50) # print(simi1) for num in query1: # print("tf-idf: ") # print(MRR(result, simi1[num])) # print(recall(result,simi1[num])) # document3= make_index_AIG(documentAddress,e,query1[num]) # simi3=sum_score(document3,1000) simi2=sum_keyword(document2,query1[num],1000) # print("AIG: ") # print(MRR(result, simi3)) # print(recall(result,simi3)) print("BM25: ") print(MRR(result, simi2))
def get_k(queryAddress,documentAddress,e): document1, length_by_docid1 = make_invidx(documentAddress, e) query1=calculate_query(get_query(queryAddress)) simi1= cosine_distance(document1,query1,100) result=readFile(resultAddress) for num in simi1: for weight1,docid in simi1[num]: if docid in result: print(docid)
def evaluation(extend, documentAddress, queryAddress,resultAddress): result=readFile(resultAddress) #print(result) #tf-idf for e in extend: # document1, length_by_docid1 = make_invidx(documentAddress, e) query1=calculate_query(get_query(queryAddress)) # simi1= cosine_distance(document1,query1,1000) retrieval =0.0 relevance=0.0 document2 = make_invidx_by_OkapiBM25(documentAddress, e) for num in query1: # print("tf-idf: ") # print(MRR(result, simi1[num])) #print(recall(result,simi1[num])) # document3= make_index_AIG(documentAddress,e,query1[num]) # simi3=sum_score(document3,1000) # print("AIG: ") # print(MRR(result, simi3)) #print(recall(result,simi3)) simi2=sum_keyword(document2,query1[num],1000) print("BM25: ") print(MRR(result, simi2))
def queryExpanEva1(extend,documentAddress,queryAddress,resultAddress): for e in extend: sim4=queryExpand1(documentAddress,e,queryAddress) result=readFile(resultAddress) print("precision:") print(MRR(result, sim4))