コード例 #1
0
def sent_sim_score_num_nn_words(sent1,sent2):
    words1 = sent_to_words(sent1.lower())
    words2 = sent_to_words(sent2.lower())
    
    denom = math.log(len(set(words1))) + math.log(len(set(words2)))
    if denom < 2:
        return 0
    
    pos_tags1 = fn_assign_POS_tags(words1)
    pos_tags2 = fn_assign_POS_tags(words2)    
    num_meaningful = 0    
    for counter1 in range(len(words1)):
        for counter2 in range(len(words2)):
            if words1[counter1]==words2[counter2]:
                if pos_tags1[counter1] in ['ADJ','N','NP'] or pos_tags2[counter2] in ['ADJ','N','NP']:
                    num_meaningful+=1
    
    return num_meaningful/denom
コード例 #2
0
def enhanced_rouge(n,reference_sents,machine_sents,counter):
    
    ref_n_grams = set()
    for se in reference_sents:
        words = sent_to_words(se)
        for index in range(len(words)+1-n):
            ref_n_grams.add(' '.join(words[index:index+n]))
    
    if len(ref_n_grams)==0:
        return -1    
    
            
    mac_n_grams = set()
    for se in machine_sents:
        #print "se=",se
        words = sent_to_words(se)
        for index in range(len(words)+1-n):
            mac_n_grams.add(' '.join(words[index:index+n]))
            

    special = set()
    intersect = ref_n_grams&mac_n_grams
    if n > 1:
        uniq_mac = mac_n_grams - ref_n_grams
        for uniq in uniq_mac:
            ws = uniq.split()
            pos_tags = fn_assign_POS_tags(ws)
            for index in range(len(ws)):
                if pos_tags[index] in ['ADJ','N','NP'] :
                    
                    for ref in ref_n_grams:
                        if ref.find(ws[index])!=-1:
                            special.add(uniq)
                   
                            
                   
                
    num = len(special)/n
    return (len(intersect)+num)/len(ref_n_grams)