def trigger_description(trig_sentdict, ant_sentdict, trigger, POS_TAGS, AUX_WORDS): vector = [] trig_words = trigger.get_words() subtree = trigger.get_subtree() context_idx = 0 for w in trigger.get_context()['words']: if w == trig_sentdict['words'][trigger.get_idx()]: break context_idx += 1 # Features 1,2 vector.append(truth(len(subtree.leaves()) == len(trig_words))) vector.append(len(trig_words)) # Feature set 3. pos_tags_dict = {} for tag in POS_TAGS: pos_tags_dict[tag] = 0 for tag in trigger.get_context()['pos'][context_idx:len(trig_words)]: pos_tags_dict[tag] += 1 vector += [pos_tags_dict[tag] for tag in pos_tags_dict] # Feature sets 4,5,6. Description of the auxiliary. vector += VC.lemmacategoryvector( trig_sentdict['lemmas'][trigger.get_idx()]) vector += VC.lemmavector(trig_sentdict['lemmas'][trigger.get_idx()]) vector += VC.auxwordvector(trig_sentdict['words'][trigger.get_idx()], AUX_WORDS) return vector
def trigger_description(trig_sentdict, ant_sentdict, trigger, POS_TAGS, AUX_WORDS): vector = [] trig_words = trigger.get_words() subtree = trigger.get_subtree() context_idx = 0 for w in trigger.get_context()['words']: if w == trig_sentdict['words'][trigger.get_idx()]: break context_idx += 1 # Features 1,2 vector.append(truth(len(subtree.leaves()) == len(trig_words))) vector.append(len(trig_words)) # Feature set 3. pos_tags_dict = {} for tag in POS_TAGS: pos_tags_dict[tag] = 0 for tag in trigger.get_context()['pos'][context_idx:len(trig_words)]: pos_tags_dict[tag] += 1 vector += [pos_tags_dict[tag] for tag in pos_tags_dict] # Feature sets 4,5,6. Description of the auxiliary. vector += VC.lemmacategoryvector(trig_sentdict['lemmas'][trigger.get_idx()]) vector += VC.lemmavector(trig_sentdict['lemmas'][trigger.get_idx()]) vector += VC.auxwordvector(trig_sentdict['words'][trigger.get_idx()], AUX_WORDS) return vector
def make_vector(sentdict, aux, features, aux_categories, aux_lemmas, aux_words, surrounding_words, pos_tags, pos_bigrams, make_old=False): vec = [] if not make_old: if 'aux' in features: vec += lemma_category_vector(aux, aux_categories) vec += lemma_vector(aux, aux_lemmas) vec += aux_vector(aux, aux_words) if 'words' in features: vec += aux_structure_vector(sentdict, aux, 'words', surrounding_words) if 'pos' in features: vec += aux_structure_vector(sentdict, aux, 'pos', pos_tags) if 'bigrams' in features: vec += aux_pos_bigrams_vector(sentdict, aux, pos_bigrams) vec += linguistic_features_vector(sentdict, aux, features) else: vec += old_vc.lemmacategoryvector(aux.lemma) vec += old_vc.lemmavector(aux.lemma) vec += old_vc.auxwordvector(aux.word, aux_words) vec += old_vc.myfeaturesvector(sentdict, aux.wordnum, features) return vec