def sent_sim_score_num_nn_words(sent1,sent2): words1 = sent_to_words(sent1.lower()) words2 = sent_to_words(sent2.lower()) denom = math.log(len(set(words1))) + math.log(len(set(words2))) if denom < 2: return 0 pos_tags1 = fn_assign_POS_tags(words1) pos_tags2 = fn_assign_POS_tags(words2) num_meaningful = 0 for counter1 in range(len(words1)): for counter2 in range(len(words2)): if words1[counter1]==words2[counter2]: if pos_tags1[counter1] in ['ADJ','N','NP'] or pos_tags2[counter2] in ['ADJ','N','NP']: num_meaningful+=1 return num_meaningful/denom
def enhanced_rouge(n,reference_sents,machine_sents,counter): ref_n_grams = set() for se in reference_sents: words = sent_to_words(se) for index in range(len(words)+1-n): ref_n_grams.add(' '.join(words[index:index+n])) if len(ref_n_grams)==0: return -1 mac_n_grams = set() for se in machine_sents: #print "se=",se words = sent_to_words(se) for index in range(len(words)+1-n): mac_n_grams.add(' '.join(words[index:index+n])) special = set() intersect = ref_n_grams&mac_n_grams if n > 1: uniq_mac = mac_n_grams - ref_n_grams for uniq in uniq_mac: ws = uniq.split() pos_tags = fn_assign_POS_tags(ws) for index in range(len(ws)): if pos_tags[index] in ['ADJ','N','NP'] : for ref in ref_n_grams: if ref.find(ws[index])!=-1: special.add(uniq) num = len(special)/n return (len(intersect)+num)/len(ref_n_grams)