def get_features(in_file, idf_enabled=False): print "loading xml..." lexical_tree = load_xml.get_pairs(in_file) syntax_tree = create_tree.generate_syntax_tree(in_file) print "done loading" if idf_enabled: generate_idf_score(lexical_tree) print "parsing reference" ref = get_attributes_pair(in_file) print "extracting features" features = defaultdict(list) #word_matching score = lexical.word_match(lexical_tree, idf_enabled=idf_enabled) for k, v in score: features[k].append(v) #simple negation score = lexical.get_simple_negations(lexical_tree) for k, v in score: features[k].append(v) #tree edit distance score = syntactic.tree_edit_distance(syntax_tree) for k,v in score: features[k].append(v) #number_match score = lexical.number_match(lexical_tree) for k,v in score: features[k].append(v) #1,2,3-gram with synonyms of lemmas for n in [1,2,3]: score = bleu(lexical_tree, n=n, idf_enabled=True, lemma=True, synonyms=True) for k,v in score: features[k].append(v) memory = {} for n in [2]: # 2-gram without synonyms score = bleu(lexical_tree, n=n, idf_enabled=True, lemma=True, synonyms=False) for k,v in score: features[k].append(v) #appending task and entailment for k,v in features.iteritems(): features[k].extend(ref[str(k)]) return features
def get_features(in_file, idf_enabled=False): print "loading xml..." lexical_tree = load_xml.get_pairs(in_file) syntax_tree = create_tree.generate_syntax_tree(in_file) print "done loading" if idf_enabled: generate_idf_score(lexical_tree) print "parsing reference" ref = get_attributes_pair(in_file) print "extracting features" features = defaultdict(list) #word_matching score = lexical.word_match(lexical_tree, idf_enabled=idf_enabled) for k, v in score: features[k].append(v) #lemma_matching score = lexical.lemma_match(lexical_tree) for k, v in score: features[k].append(v) #bigram_matching (lemma) score = lexical.bleu(lexical_tree, n=2, return_only_n=2, idf_enabled=idf_enabled, lemma=True) for k, v in score: features[k].append(v) #leamma_pos_matching score = lexical.lemma_match(lexical_tree) for k, v in score: features[k].append(v) #simple negation score = lexical.get_simple_negations(lexical_tree) for k, v in score: features[k].append(v) score = syntactic.tree_edit_distance(syntax_tree) for k,v in score: features[k].append(v) #appending task and entailment for k,v in features.iteritems(): features[k].extend(ref[str(k)]) return features