def get_features(ptree: nltk.ParentedTree, dtree: List[DepRel], indices, sense, offset): features_sentence = [] for i, drel in enumerate(dtree): if drel.rel.lower() == 'root': mv_position = i break else: mv_position = 0 main_verb = lemmatizer.lemmatize(ptree.pos()[mv_position][0]) for i, (word, tag) in enumerate(ptree.pos()): tree_pos = ptree.treeposition_spanning_leaves(i, i + 1)[:-2] chain = [ptree[tree_pos[:i + 1]].label() for i in range(len(tree_pos))] chain = ['S' if c == 'SBAR' else c for c in chain] if len(chain) > 0: chain = "-".join(get_compressed_chain(chain)) stem = stemmer.stem(word).lower() features_word = { 'idx': offset + i, 'BOS': i == 0, 'word': word.lower(), 'pos': tag, 'lemma': lemmatizer.lemmatize(word).lower(), 'stem': stem.lower(), 'chain': chain, 'conn': sense.split('.')[0] if offset + i in indices else "", 'inflection': word[len(stem):], 'is_main_verb': i == mv_position, 'main_verb': main_verb.lower() } features_sentence.append(features_word) return features_sentence
def get_features(ptree: nltk.ParentedTree, conn_idxs: List[int]): features = [] for i, (word, tag) in enumerate(ptree.pos()): features.append({ 'BOS': i == 0, 'word': word, 'pos': tag, 'lemma': lemmatizer.lemmatize(word), 'stem': stemmer.stem(word), 'conn': i in conn_idxs }) return features