def syntax_matching(pair_attributes, distances, threshold): """ Calculates entailment values based on the tree edit distances and threshold """ n = len(pair_attributes) entailments = [0 for foo in range(n+1)] results = [0 for foo in range(n+1)] # Calculates entailments and accuracy for i in range(n): t,h,id_num,e,ta = pair_attributes[i] id_num = int(id_num) entails = distances[i] < threshold entailments[id_num] = "YES" if entails else "NO" results[id_num] = 1 if entailments[id_num] == e else 0 lexical.output_rte(entailments) print "Threshold: " + "%.3f"%threshold + " Accuracy: " + str(float(sum(results)) / float(n))
def bleu_matching(threshold, pairs): pair_attributes = pairs[:] n = len(pair_attributes) results = [0 for foo in range(n+1)] entailments = [0 for foo in range(n+1)] for i in range(n): t,h,id_num,e,ta = pair_attributes[i] id_num = int(id_num) t_lemmas,pos = xml_util.get_lemmas_from_text_node(t) h_lemmas,pos = xml_util.get_lemmas_from_text_node(h) entailments[id_num] = calculate_entailment(t_lemmas,h_lemmas,threshold) if e == entailments[id_num]: results[id_num] = 1 lexical.output_rte(entailments) print "Threshold: " + "%.2f"%threshold + " Accuracy: " + str(float(sum(results)) / float(n))
def bleu_matching(threshold, pairs): pair_attributes = pairs[:] n = len(pair_attributes) results = [0 for foo in range(n + 1)] entailments = [0 for foo in range(n + 1)] for i in range(n): t, h, id_num, e, ta = pair_attributes[i] id_num = int(id_num) t_lemmas, pos = xml_util.get_lemmas_from_text_node(t) h_lemmas, pos = xml_util.get_lemmas_from_text_node(h) entailments[id_num] = calculate_entailment(t_lemmas, h_lemmas, threshold) if e == entailments[id_num]: results[id_num] = 1 lexical.output_rte(entailments) print "Threshold: " + "%.2f" % threshold + " Accuracy: " + str( float(sum(results)) / float(n))
def idf_weighting(threshold, pairs): pair_attributes = pairs[:] words = [] documents = [] n = len(pair_attributes) results = [0 for foo in range(n + 1)] entailments = [0 for foo in range(n + 1)] # Starts by adding all the words to the list 'words' and then making a set of these words # Also makes a list of documents where each document is a set of all the words in a given # (text, hypothesis) pair. for i in range(n): t, h, id_num, e, ta = pair_attributes[i] t_lemmas, pos = xml_util.get_lemmas_from_text_node(t) h_lemmas, pos = xml_util.get_lemmas_from_text_node(h) doc = [] for word in t_lemmas: words.append(word.lower()) doc.append(word.lower()) for word in h_lemmas: words.append(word.lower()) doc.append(word.lower()) documents.append(set(doc)) word_set = set(words) # Creates a dictionary 'idf_dict' that can be used to count how many document each word is present in idf_dict = {} # Starts by initiating the count for all words to 0 for word in word_set: idf_dict[word] = 0 # Then calculates the number of documents each word in the word_set appears in for word in word_set: for document in documents: if word in document: idf_dict[word] += 1 print "dict done" for i in range(n): t, h, id_num, e, ta = pair_attributes[i] id_num = int(id_num) t_lemmas, pos = xml_util.get_lemmas_from_text_node(t) h_lemmas, pos = xml_util.get_lemmas_from_text_node(h) entailments[id_num] = calculate_entailment(t_lemmas, h_lemmas, idf_dict, threshold) results[id_num] = 1 if e == entailments[id_num] else 0 lexical.output_rte(entailments) print "Threshold: " + "%.2f" % threshold + " Accuracy: " + str( float(sum(results)) / float(n))
def idf_weighting(threshold, pairs): pair_attributes = pairs[:] words = [] documents = [] n = len(pair_attributes) results = [0 for foo in range(n + 1)] entailments = [0 for foo in range(n + 1)] # Starts by adding all the words to the list 'words' and then making a set of these words # Also makes a list of documents where each document is a set of all the words in a given # (text, hypothesis) pair. for i in range(n): t, h, id_num, e, ta = pair_attributes[i] t_lemmas, pos = xml_util.get_lemmas_from_text_node(t) h_lemmas, pos = xml_util.get_lemmas_from_text_node(h) doc = [] for word in t_lemmas: words.append(word.lower()) doc.append(word.lower()) for word in h_lemmas: words.append(word.lower()) doc.append(word.lower()) documents.append(set(doc)) word_set = set(words) # Creates a dictionary 'idf_dict' that can be used to count how many document each word is present in idf_dict = {} # Starts by initiating the count for all words to 0 for word in word_set: idf_dict[word] = 0 # Then calculates the number of documents each word in the word_set appears in for word in word_set: for document in documents: if word in document: idf_dict[word] += 1 print "dict done" for i in range(n): t, h, id_num, e, ta = pair_attributes[i] id_num = int(id_num) t_lemmas, pos = xml_util.get_lemmas_from_text_node(t) h_lemmas, pos = xml_util.get_lemmas_from_text_node(h) entailments[id_num] = calculate_entailment(t_lemmas, h_lemmas, idf_dict, threshold) results[id_num] = 1 if e == entailments[id_num] else 0 lexical.output_rte(entailments) print "Threshold: " + "%.2f" % threshold + " Accuracy: " + str(float(sum(results)) / float(n))
def lemma_matching(threshold, pairs): pair_attributes = pairs[:] n = len(pair_attributes) results = [0 for foo in range(n+1)] entailments = [0 for foo in range(n+1)] for i in range(n): t,h,id_num,e,ta = pair_attributes[i] id_num = int(id_num) t_lemmas,t_pos = xml_util.get_lemmas_from_text_node(t) h_lemmas,h_pos = xml_util.get_lemmas_from_text_node(h) text = [] for i in range(len(t_lemmas)): text.append((t_lemmas[i],t_pos[i])) hypothesis = [] for i in range(len(h_lemmas)): hypothesis.append((h_lemmas[i],h_pos[i])) entailments[id_num] = calculate_entailment(text,hypothesis,threshold) if (e == entailments[id_num]): results[id_num] = 1 else: results[id_num] = 0 lexical.output_rte(entailments) print "Threshold: " + "%.2f"%threshold + " Accuracy: " + str(float(sum(results)) / float(n))
def lemma_matching(threshold, pairs): pair_attributes = pairs[:] n = len(pair_attributes) results = [0 for foo in range(n + 1)] entailments = [0 for foo in range(n + 1)] for i in range(n): t, h, id_num, e, ta = pair_attributes[i] id_num = int(id_num) t_lemmas, t_pos = xml_util.get_lemmas_from_text_node(t) h_lemmas, h_pos = xml_util.get_lemmas_from_text_node(h) text = [] for i in range(len(t_lemmas)): text.append((t_lemmas[i], t_pos[i])) hypothesis = [] for i in range(len(h_lemmas)): hypothesis.append((h_lemmas[i], h_pos[i])) entailments[id_num] = calculate_entailment(text, hypothesis, threshold) if (e == entailments[id_num]): results[id_num] = 1 else: results[id_num] = 0 lexical.output_rte(entailments) print "Threshold: " + "%.2f" % threshold + " Accuracy: " + str( float(sum(results)) / float(n))