Beispiel #1
0
def get_features(in_file, idf_enabled=False):
    print "loading xml..."
    lexical_tree = load_xml.get_pairs(in_file)
    syntax_tree = create_tree.generate_syntax_tree(in_file)
    print "done loading"

    if idf_enabled:
        generate_idf_score(lexical_tree)

    print "parsing reference"
    ref = get_attributes_pair(in_file)

    print "extracting features"
    features = defaultdict(list)

    #word_matching
    score = lexical.word_match(lexical_tree, idf_enabled=idf_enabled)
    for k, v in score:
        features[k].append(v)

    #simple negation
    score = lexical.get_simple_negations(lexical_tree)
    for k, v in score:
        features[k].append(v)

    #tree edit distance
    score = syntactic.tree_edit_distance(syntax_tree)
    for k,v in score:
        features[k].append(v)

    #number_match
    score = lexical.number_match(lexical_tree)
    for k,v in score:
        features[k].append(v)

    #1,2,3-gram with synonyms of lemmas 
    for n in [1,2,3]:
        score = bleu(lexical_tree, n=n, idf_enabled=True, lemma=True, synonyms=True)
        for k,v in score:
            features[k].append(v)

    memory = {}
    for n in [2]: # 2-gram without synonyms
        score = bleu(lexical_tree, n=n, idf_enabled=True, lemma=True, synonyms=False)
        for k,v in score:
            features[k].append(v)

    #appending task and entailment
    for k,v in features.iteritems():
        features[k].extend(ref[str(k)])

    return features
Beispiel #2
0
def get_features(in_file, idf_enabled=False):

    print "loading xml..."
    lexical_tree = load_xml.get_pairs(in_file)
    syntax_tree = create_tree.generate_syntax_tree(in_file)
    print "done loading"

    if idf_enabled:
        generate_idf_score(lexical_tree)

    print "parsing reference"
    ref = get_attributes_pair(in_file)

    print "extracting features"
    features = defaultdict(list)

    #word_matching
    score = lexical.word_match(lexical_tree, idf_enabled=idf_enabled)
    for k, v in score:
        features[k].append(v)


    #lemma_matching
    score = lexical.lemma_match(lexical_tree)
    for k, v in score:
        features[k].append(v)

    #bigram_matching (lemma)
    score = lexical.bleu(lexical_tree, n=2, return_only_n=2,
                         idf_enabled=idf_enabled, lemma=True)
    for k, v in score:
        features[k].append(v)

    #leamma_pos_matching
    score = lexical.lemma_match(lexical_tree)
    for k, v in score:
        features[k].append(v)

    #simple negation
    score = lexical.get_simple_negations(lexical_tree)
    for k, v in score:
        features[k].append(v)

    score = syntactic.tree_edit_distance(syntax_tree)
    for k,v in score:
        features[k].append(v)

    #appending task and entailment
    for k,v in features.iteritems():
        features[k].extend(ref[str(k)])

    return features