Esempio n. 1
0
def get_k(queryAddress,documentAddress,e):
    document1, length_by_docid1 = make_invidx(documentAddress, e)
    query1=calculate_query(get_query(queryAddress))
    simi1= cosine_distance(document1,query1,100)
    result=readFile(resultAddress)
    for num in simi1:
        for weight1,docid in simi1[num]:
            if docid in result:
                print(docid)
Esempio n. 2
0
def userLogExpan(extend,documentAddress,queryAddress):    
    ses=userFeedback("C://TweetStudy//12//12//userFeedback3.txt")
#    print(ses)
    ret={}
    for e in extend:
        document1, length_by_docid1 = make_invidx(documentAddress, e)
        tf,atid=read_file(documentAddress,e)
      #  query1=calculate_query(get_query(queryAddress))
        dic=get_query(queryAddress)
        for num in dic:
            re=coWeight(ses,nltk.word_tokenize(dic[num]),document1,tf,atid)
            ret=selectTopTerm(re,10)
    return ret
Esempio n. 3
0
def get_topK(queryAddress,documentAddress,extend):
    for e in extend:
        document1, length_by_docid1 = make_invidx(documentAddress, e)
        document2 = make_invidx_by_OkapiBM25(documentAddress, e)
        
        
#print(document)
#print(length_by_docid)
#saveIndex(index, (document, length_by_docid))
#document, length_by_docid = loadIndex(index)
#get query
        query1=calculate_query(get_query(queryAddress))
        for num in query1:
            document3= make_index_AIG(documentAddress,e,query1[num])
            simi3=sum_score(document3,10)
            simi2=sum_keyword(document2,query1[num],10)
        #print(simi)     
        #document= make_index_AIG(documentAddress,e,query1[num])
        simi1= cosine_distance(document1,query1,10)
       # simi=sum_keyword(document, query1[num],10)
#print(query1)
#cosine similarity
#simi=score(document,query1,10)
#simi= cosine_distance(document,query1,10)

    #print(simi)
        for num in simi1:
            for weight1,docid in simi1[num]:
            #print(docid)
                for weight2,docid2 in simi2:
                    if docid==docid2:
                        print(docid2," : ",weight2)
                for weight3,docid3 in simi3:
                    if docid==docid3:
                        print(docid3,":",weight3)
            for weight2,docid2 in simi2:
                for weight3,docid3 in simi3:
                    if docid2==docid3:
                        print(docid3,":",weight3)