Beispiel #1
0
    def readTestAttributesFromFile(self):
        dom_doc = xml_util.get_dom_from_xml("data/RTE2_test.preprocessed.xml")
        pair_nodes = xml_util.get_pair_nodes(dom_doc)

        entailment_values = [0 for _ in range(800 + 1)]
        document = xml_util.get_dom_from_xml("data/RTE2_test.annotated.xml")
        pair_nodes2 = xml_util.get_pair_nodes(document)
        pair_attributes = xml_util.get_attributes_from_pair_nodes(pair_nodes2)
        for i in range(len(pair_attributes)):
            t, h, id_num, e = pair_attributes[i]
            id_num = int(id_num)
            entailment_values[id_num] = 0

        def get_attributes_from_preprocessed_pair_nodes(pair_nodes):
            pairs = []
            for pair in pair_nodes:
                text = pair.getElementsByTagName("text")[0].childNodes
                hypothesis = pair.getElementsByTagName(
                    "hypothesis")[0].childNodes
                id_number = pair.getAttributeNode("id").value
                #entailment = pair.getAttributeNode("entailment").value
                entailment = entailment_values[int(id_number)]
                task = pair.getAttributeNode("task").value
                pairs.append((text, hypothesis, id_number, entailment, task))
            return pairs

        pair_attributes = get_attributes_from_preprocessed_pair_nodes(
            pair_nodes)
        return pair_attributes
    def readTestAttributesFromFile(self):
            dom_doc = xml_util.get_dom_from_xml("data/RTE2_test.preprocessed.xml")
            pair_nodes = xml_util.get_pair_nodes(dom_doc)

            entailment_values = [0 for _ in range(800+1)]
            document = xml_util.get_dom_from_xml("data/RTE2_test.annotated.xml")
            pair_nodes2 = xml_util.get_pair_nodes(document)
            pair_attributes = xml_util.get_attributes_from_pair_nodes(pair_nodes2)
            for i in range(len(pair_attributes)):
                t,h,id_num,e = pair_attributes[i]
                id_num = int(id_num)
                entailment_values[id_num] = 0


            def get_attributes_from_preprocessed_pair_nodes(pair_nodes):
                pairs = []
                for pair in pair_nodes:
                    text = pair.getElementsByTagName("text")[0].childNodes
                    hypothesis = pair.getElementsByTagName("hypothesis")[0].childNodes
                    id_number = pair.getAttributeNode("id").value
                    #entailment = pair.getAttributeNode("entailment").value
                    entailment = entailment_values[int(id_number)]
                    task = pair.getAttributeNode("task").value
                    pairs.append((text,hypothesis,id_number,entailment,task))
                return pairs

            pair_attributes = get_attributes_from_preprocessed_pair_nodes(pair_nodes)
            return pair_attributes
Beispiel #3
0
 def readProcessedAttributesFromFile(self):
     # Be aware that it takes quite some time to read the preprocessed xml file
     dom_doc = xml_util.get_dom_from_xml("data/RTE2_dev.preprocessed.xml")
     pair_nodes = xml_util.get_pair_nodes(dom_doc)
     pair_attributes = xml_util.get_attributes_from_preprocessed_pair_nodes(
         pair_nodes)
     return pair_attributes
Beispiel #4
0
def threshold_iterator(threshold):
    document = xml_util.get_dom_from_xml("data/RTE2_dev.xml")
    pair_nodes = xml_util.get_pair_nodes(document)
    pair_attributes = xml_util.get_attributes_from_pair_nodes(pair_nodes)
    if threshold == -1:
        for i in range(100):
            threshold = 1.0 - (0.01 * i)
            word_matching(threshold, pair_attributes)
    else:
        word_matching(threshold,pair_attributes)
def threshold_iterator(threshold):
    document = xml_util.get_dom_from_xml("data/RTE2_dev.xml")
    pair_nodes = xml_util.get_pair_nodes(document)
    pair_attributes = xml_util.get_attributes_from_pair_nodes(pair_nodes)
    if threshold == -1:
        for i in range(100):
            threshold = 1.0 - (0.01 * i)
            word_matching(threshold, pair_attributes)
    else:
        word_matching(threshold, pair_attributes)
def threshold_iterator(threshold):
    dom_doc = xml_util.get_dom_from_xml("data/RTE2_dev.preprocessed.xml")
    pair_nodes = xml_util.get_pair_nodes(dom_doc)
    pair_attributes = xml_util.get_attributes_from_preprocessed_pair_nodes(pair_nodes)
    if threshold == -1:
        for i in range(100):
            threshold = 1.0 - (0.01 * i)
            idf_weighting(threshold, pair_attributes)
    else:
        idf_weighting(threshold, pair_attributes)
def threshold_iterator(threshold):
    dom_doc = xml_util.get_dom_from_xml("data/RTE2_dev.preprocessed.xml")
    pair_nodes = xml_util.get_pair_nodes(dom_doc)
    pair_attributes = xml_util.get_attributes_from_preprocessed_pair_nodes(
        pair_nodes)
    if threshold == -1:
        for i in range(100):
            threshold = 1.0 - (0.01 * i)
            idf_weighting(threshold, pair_attributes)
    else:
        idf_weighting(threshold, pair_attributes)
Beispiel #8
0
def threshold_iterator(threshold):
    # Be aware that it takes quite some time to read the preprocessed xml file
    dom_doc = xml_util.get_dom_from_xml("data/RTE2_dev.preprocessed.xml")
    pair_nodes = xml_util.get_pair_nodes(dom_doc)
    pair_attributes = xml_util.get_attributes_from_preprocessed_pair_nodes(pair_nodes)
    if threshold == -1:
        for i in range(100):
            threshold = 1.0 - (0.01 * i)
            bleu_matching(threshold, pair_attributes)
    else:
        bleu_matching(threshold, pair_attributes)
Beispiel #9
0
def threshold_iterator(threshold):
    # Be aware that it takes quite some time to read the preprocessed xml file
    dom_doc = xml_util.get_dom_from_xml("data/RTE2_dev.preprocessed.xml")
    pair_nodes = xml_util.get_pair_nodes(dom_doc)
    pair_attributes = xml_util.get_attributes_from_preprocessed_pair_nodes(
        pair_nodes)
    if threshold == -1:
        for i in range(100):
            threshold = 1.0 - (0.01 * i)
            lemma_matching(threshold, pair_attributes)
    else:
        lemma_matching(threshold, pair_attributes)
Beispiel #10
0
def threshold_iterator(threshold):
    """ 
    The method used to extract data from the XML-file and calculate distances.
    Iterates over different thresholds to find the best threshold or just
    calculates the entailment values for one threshold.
    """
    global idf_dict
    dom_doc = xml_util.get_dom_from_xml("data/RTE2_dev.preprocessed.xml")
    pairs = xml_util.get_pairs(dom_doc)
    pair_attributes = xml_util.get_attributes_from_preprocessed_pair_nodes(pairs)
    idf_dict = calculate_idf_dictionary(pair_attributes)
    print len(idf_dict.keys())
    tree_value_pairs = []
    
    # Extracting the actual lemma values from the pair nodes
    for i in range(len(pair_attributes)):
        t,h,id_num,e,ta = pair_attributes[i]
        id_num = int(id_num)
        t_values = xml_util.get_minipar_values_from_text_node(t)
        h_values = xml_util.get_minipar_values_from_text_node(h)
        tree_value_pairs.append((t_values,h_values))
        
    # Calculating distances between text and hypothesis
    distances = []
    for i in range(len(tree_value_pairs)):
        t_tree,h_tree = build_tree(tree_value_pairs[i])
        dist = tree_edit_dist.distance(t_tree, h_tree, idf_cost)
        normalizer = tree_edit_dist.distance(tree_edit_dist.Node("root"), h_tree, idf_cost)
        normalized_dist = float(dist) / float(normalizer)
        distances.append(normalized_dist)
        
    #for d in distances:
    #    print d
    
    if threshold == -1:
        for i in range(200):
            threshold = 1.0 - (0.005 * i)
            syntax_matching(pair_attributes, distances, threshold)
    else:
        syntax_matching(pair_attributes, distances, threshold)
Beispiel #11
0
 def readAttributesFromFile(self):
     document = xml_util.get_dom_from_xml("data/RTE2_dev.xml")
     pair_nodes = xml_util.get_pair_nodes(document)
     pair_attributes = xml_util.get_attributes_from_pair_nodes(pair_nodes)
     return pair_attributes
 def readAttributesFromFile(self):
     document = xml_util.get_dom_from_xml("data/RTE2_dev.xml")
     pair_nodes = xml_util.get_pair_nodes(document)
     pair_attributes = xml_util.get_attributes_from_pair_nodes(pair_nodes)
     return pair_attributes
 def readProcessedAttributesFromFile(self):
         # Be aware that it takes quite some time to read the preprocessed xml file
         dom_doc = xml_util.get_dom_from_xml("data/RTE2_dev.preprocessed.xml")
         pair_nodes = xml_util.get_pair_nodes(dom_doc)
         pair_attributes = xml_util.get_attributes_from_preprocessed_pair_nodes(pair_nodes)
         return pair_attributes