Exemple #1
0
 def readProcessedAttributesFromFile(self):
     # Be aware that it takes quite some time to read the preprocessed xml file
     dom_doc = xml_util.get_dom_from_xml("data/RTE2_dev.preprocessed.xml")
     pair_nodes = xml_util.get_pair_nodes(dom_doc)
     pair_attributes = xml_util.get_attributes_from_preprocessed_pair_nodes(
         pair_nodes)
     return pair_attributes
def threshold_iterator(threshold):
    dom_doc = xml_util.get_dom_from_xml("data/RTE2_dev.preprocessed.xml")
    pair_nodes = xml_util.get_pair_nodes(dom_doc)
    pair_attributes = xml_util.get_attributes_from_preprocessed_pair_nodes(pair_nodes)
    if threshold == -1:
        for i in range(100):
            threshold = 1.0 - (0.01 * i)
            idf_weighting(threshold, pair_attributes)
    else:
        idf_weighting(threshold, pair_attributes)
def threshold_iterator(threshold):
    dom_doc = xml_util.get_dom_from_xml("data/RTE2_dev.preprocessed.xml")
    pair_nodes = xml_util.get_pair_nodes(dom_doc)
    pair_attributes = xml_util.get_attributes_from_preprocessed_pair_nodes(
        pair_nodes)
    if threshold == -1:
        for i in range(100):
            threshold = 1.0 - (0.01 * i)
            idf_weighting(threshold, pair_attributes)
    else:
        idf_weighting(threshold, pair_attributes)
Exemple #4
0
def threshold_iterator(threshold):
    # Be aware that it takes quite some time to read the preprocessed xml file
    dom_doc = xml_util.get_dom_from_xml("data/RTE2_dev.preprocessed.xml")
    pair_nodes = xml_util.get_pair_nodes(dom_doc)
    pair_attributes = xml_util.get_attributes_from_preprocessed_pair_nodes(pair_nodes)
    if threshold == -1:
        for i in range(100):
            threshold = 1.0 - (0.01 * i)
            bleu_matching(threshold, pair_attributes)
    else:
        bleu_matching(threshold, pair_attributes)
def threshold_iterator(threshold):
    # Be aware that it takes quite some time to read the preprocessed xml file
    dom_doc = xml_util.get_dom_from_xml("data/RTE2_dev.preprocessed.xml")
    pair_nodes = xml_util.get_pair_nodes(dom_doc)
    pair_attributes = xml_util.get_attributes_from_preprocessed_pair_nodes(
        pair_nodes)
    if threshold == -1:
        for i in range(100):
            threshold = 1.0 - (0.01 * i)
            lemma_matching(threshold, pair_attributes)
    else:
        lemma_matching(threshold, pair_attributes)
Exemple #6
0
def threshold_iterator(threshold):
    """ 
    The method used to extract data from the XML-file and calculate distances.
    Iterates over different thresholds to find the best threshold or just
    calculates the entailment values for one threshold.
    """
    global idf_dict
    dom_doc = xml_util.get_dom_from_xml("data/RTE2_dev.preprocessed.xml")
    pairs = xml_util.get_pairs(dom_doc)
    pair_attributes = xml_util.get_attributes_from_preprocessed_pair_nodes(pairs)
    idf_dict = calculate_idf_dictionary(pair_attributes)
    print len(idf_dict.keys())
    tree_value_pairs = []
    
    # Extracting the actual lemma values from the pair nodes
    for i in range(len(pair_attributes)):
        t,h,id_num,e,ta = pair_attributes[i]
        id_num = int(id_num)
        t_values = xml_util.get_minipar_values_from_text_node(t)
        h_values = xml_util.get_minipar_values_from_text_node(h)
        tree_value_pairs.append((t_values,h_values))
        
    # Calculating distances between text and hypothesis
    distances = []
    for i in range(len(tree_value_pairs)):
        t_tree,h_tree = build_tree(tree_value_pairs[i])
        dist = tree_edit_dist.distance(t_tree, h_tree, idf_cost)
        normalizer = tree_edit_dist.distance(tree_edit_dist.Node("root"), h_tree, idf_cost)
        normalized_dist = float(dist) / float(normalizer)
        distances.append(normalized_dist)
        
    #for d in distances:
    #    print d
    
    if threshold == -1:
        for i in range(200):
            threshold = 1.0 - (0.005 * i)
            syntax_matching(pair_attributes, distances, threshold)
    else:
        syntax_matching(pair_attributes, distances, threshold)
 def readProcessedAttributesFromFile(self):
         # Be aware that it takes quite some time to read the preprocessed xml file
         dom_doc = xml_util.get_dom_from_xml("data/RTE2_dev.preprocessed.xml")
         pair_nodes = xml_util.get_pair_nodes(dom_doc)
         pair_attributes = xml_util.get_attributes_from_preprocessed_pair_nodes(pair_nodes)
         return pair_attributes