Esempio n. 1
0
def syntax_matching(pair_attributes, distances, threshold):
    """ Calculates entailment values based on the tree edit distances and threshold """
    n = len(pair_attributes)
    entailments = [0 for foo in range(n+1)]
    results = [0 for foo in range(n+1)]
    # Calculates entailments and accuracy
    for i in range(n):
        t,h,id_num,e,ta = pair_attributes[i]
        id_num = int(id_num)
        entails = distances[i] < threshold
        entailments[id_num] = "YES" if entails else "NO"
        results[id_num] = 1 if entailments[id_num] == e else 0
    lexical.output_rte(entailments)
    print "Threshold: " + "%.3f"%threshold + " Accuracy: " + str(float(sum(results)) / float(n))
Esempio n. 2
0
def bleu_matching(threshold, pairs):
    pair_attributes = pairs[:]
    n = len(pair_attributes)
    results = [0 for foo in range(n+1)]
    entailments = [0 for foo in range(n+1)]
    for i in range(n):
        t,h,id_num,e,ta = pair_attributes[i]
        id_num = int(id_num)
        t_lemmas,pos = xml_util.get_lemmas_from_text_node(t)
        h_lemmas,pos = xml_util.get_lemmas_from_text_node(h)
        entailments[id_num] = calculate_entailment(t_lemmas,h_lemmas,threshold)
        if e == entailments[id_num]:
            results[id_num] = 1
    lexical.output_rte(entailments)
    print "Threshold: " + "%.2f"%threshold + " Accuracy: " + str(float(sum(results)) / float(n))
Esempio n. 3
0
def bleu_matching(threshold, pairs):
    pair_attributes = pairs[:]
    n = len(pair_attributes)
    results = [0 for foo in range(n + 1)]
    entailments = [0 for foo in range(n + 1)]
    for i in range(n):
        t, h, id_num, e, ta = pair_attributes[i]
        id_num = int(id_num)
        t_lemmas, pos = xml_util.get_lemmas_from_text_node(t)
        h_lemmas, pos = xml_util.get_lemmas_from_text_node(h)
        entailments[id_num] = calculate_entailment(t_lemmas, h_lemmas,
                                                   threshold)
        if e == entailments[id_num]:
            results[id_num] = 1
    lexical.output_rte(entailments)
    print "Threshold: " + "%.2f" % threshold + " Accuracy: " + str(
        float(sum(results)) / float(n))
def idf_weighting(threshold, pairs):
    pair_attributes = pairs[:]
    words = []
    documents = []
    n = len(pair_attributes)
    results = [0 for foo in range(n + 1)]
    entailments = [0 for foo in range(n + 1)]
    # Starts by adding all the words to the list 'words' and then making a set of these words
    # Also makes a list of documents where each document is a set of all the words in a given
    # (text, hypothesis) pair.
    for i in range(n):
        t, h, id_num, e, ta = pair_attributes[i]
        t_lemmas, pos = xml_util.get_lemmas_from_text_node(t)
        h_lemmas, pos = xml_util.get_lemmas_from_text_node(h)
        doc = []
        for word in t_lemmas:
            words.append(word.lower())
            doc.append(word.lower())
        for word in h_lemmas:
            words.append(word.lower())
            doc.append(word.lower())
        documents.append(set(doc))
    word_set = set(words)
    # Creates a dictionary 'idf_dict' that can be used to count how many document each word is present in
    idf_dict = {}
    # Starts by initiating the count for all words to 0
    for word in word_set:
        idf_dict[word] = 0
    # Then calculates the number of documents each word in the word_set appears in
    for word in word_set:
        for document in documents:
            if word in document:
                idf_dict[word] += 1
    print "dict done"
    for i in range(n):
        t, h, id_num, e, ta = pair_attributes[i]
        id_num = int(id_num)
        t_lemmas, pos = xml_util.get_lemmas_from_text_node(t)
        h_lemmas, pos = xml_util.get_lemmas_from_text_node(h)
        entailments[id_num] = calculate_entailment(t_lemmas, h_lemmas,
                                                   idf_dict, threshold)
        results[id_num] = 1 if e == entailments[id_num] else 0
    lexical.output_rte(entailments)
    print "Threshold: " + "%.2f" % threshold + " Accuracy: " + str(
        float(sum(results)) / float(n))
def idf_weighting(threshold, pairs):
    pair_attributes = pairs[:]
    words = []
    documents = []
    n = len(pair_attributes)
    results = [0 for foo in range(n + 1)]
    entailments = [0 for foo in range(n + 1)]
    # Starts by adding all the words to the list 'words' and then making a set of these words
    # Also makes a list of documents where each document is a set of all the words in a given
    # (text, hypothesis) pair.
    for i in range(n):
        t, h, id_num, e, ta = pair_attributes[i]
        t_lemmas, pos = xml_util.get_lemmas_from_text_node(t)
        h_lemmas, pos = xml_util.get_lemmas_from_text_node(h)
        doc = []
        for word in t_lemmas:
            words.append(word.lower())
            doc.append(word.lower())
        for word in h_lemmas:
            words.append(word.lower())
            doc.append(word.lower())
        documents.append(set(doc))
    word_set = set(words)
    # Creates a dictionary 'idf_dict' that can be used to count how many document each word is present in
    idf_dict = {}
    # Starts by initiating the count for all words to 0
    for word in word_set:
        idf_dict[word] = 0
    # Then calculates the number of documents each word in the word_set appears in
    for word in word_set:
        for document in documents:
            if word in document:
                idf_dict[word] += 1
    print "dict done"
    for i in range(n):
        t, h, id_num, e, ta = pair_attributes[i]
        id_num = int(id_num)
        t_lemmas, pos = xml_util.get_lemmas_from_text_node(t)
        h_lemmas, pos = xml_util.get_lemmas_from_text_node(h)
        entailments[id_num] = calculate_entailment(t_lemmas, h_lemmas, idf_dict, threshold)
        results[id_num] = 1 if e == entailments[id_num] else 0
    lexical.output_rte(entailments)
    print "Threshold: " + "%.2f" % threshold + " Accuracy: " + str(float(sum(results)) / float(n))
Esempio n. 6
0
def lemma_matching(threshold, pairs):
    pair_attributes = pairs[:]
    n = len(pair_attributes)
    results = [0 for foo in range(n+1)]
    entailments = [0 for foo in range(n+1)]
    for i in range(n):
        t,h,id_num,e,ta = pair_attributes[i]
        id_num = int(id_num)
        t_lemmas,t_pos = xml_util.get_lemmas_from_text_node(t)
        h_lemmas,h_pos = xml_util.get_lemmas_from_text_node(h)
        text = []
        for i in range(len(t_lemmas)):
            text.append((t_lemmas[i],t_pos[i]))
        hypothesis = []
        for i in range(len(h_lemmas)):
            hypothesis.append((h_lemmas[i],h_pos[i]))
        entailments[id_num] = calculate_entailment(text,hypothesis,threshold)
        if (e == entailments[id_num]):
            results[id_num] = 1
        else:
            results[id_num] = 0
    lexical.output_rte(entailments)
    print "Threshold: " + "%.2f"%threshold + " Accuracy: " + str(float(sum(results)) / float(n))
Esempio n. 7
0
def lemma_matching(threshold, pairs):
    pair_attributes = pairs[:]
    n = len(pair_attributes)
    results = [0 for foo in range(n + 1)]
    entailments = [0 for foo in range(n + 1)]
    for i in range(n):
        t, h, id_num, e, ta = pair_attributes[i]
        id_num = int(id_num)
        t_lemmas, t_pos = xml_util.get_lemmas_from_text_node(t)
        h_lemmas, h_pos = xml_util.get_lemmas_from_text_node(h)
        text = []
        for i in range(len(t_lemmas)):
            text.append((t_lemmas[i], t_pos[i]))
        hypothesis = []
        for i in range(len(h_lemmas)):
            hypothesis.append((h_lemmas[i], h_pos[i]))
        entailments[id_num] = calculate_entailment(text, hypothesis, threshold)
        if (e == entailments[id_num]):
            results[id_num] = 1
        else:
            results[id_num] = 0
    lexical.output_rte(entailments)
    print "Threshold: " + "%.2f" % threshold + " Accuracy: " + str(
        float(sum(results)) / float(n))