def score_model(vocab, tagger, parser, gold_docs, verbose=False): scorer = Scorer() for _, gold_doc in gold_docs: for (ids, words, tags, heads, deps, entities), _ in gold_doc: doc = Doc(vocab, words=words) tagger(doc) parser(doc) PseudoProjectivity.deprojectivize(doc) gold = GoldParse(doc, tags=tags, heads=heads, deps=deps) scorer.score(doc, gold, verbose=verbose) return scorer
def deprojectivize(proj_heads, deco_labels, EN): slen = len(proj_heads) sent = EN.tokenizer.tokens_from_list(["whatever"] * slen) rel_proj_heads = [head - i for i, head in enumerate(proj_heads)] labelids = [EN.vocab.strings[label] for label in deco_labels] pairs = list(zip(rel_proj_heads, labelids)) parse = numpy.asarray(pairs, dtype=numpy.int32) sent.from_array([HEAD, DEP], parse) PseudoProjectivity.deprojectivize(sent) parse = sent.to_array([HEAD, DEP]) deproj_heads = [i + head for i, head in enumerate(parse[:, 0])] undeco_labels = [EN.vocab.strings[int(labelid)] for labelid in parse[:, 1]] return deproj_heads, undeco_labels
def deprojectivize(proj_heads, deco_labels, EN): slen = len(proj_heads) sent = EN.tokenizer.tokens_from_list(['whatever'] * slen) rel_proj_heads = [head - i for i, head in enumerate(proj_heads)] labelids = [EN.vocab.strings[label] for label in deco_labels] pairs = list(zip(rel_proj_heads, labelids)) parse = numpy.asarray(pairs, dtype=numpy.int32) sent.from_array([HEAD, DEP], parse) PseudoProjectivity.deprojectivize(sent) parse = sent.to_array([HEAD, DEP]) deproj_heads = [i + head for i, head in enumerate(parse[:, 0])] undeco_labels = [EN.vocab.strings[int(labelid)] for labelid in parse[:, 1]] return deproj_heads, undeco_labels
def main(train_loc, dev_loc, model_dir, tag_map_loc=None): if tag_map_loc: with open(tag_map_loc) as file_: tag_map = json.loads(file_.read()) else: tag_map = DEFAULT_TAG_MAP train_sents = list(read_conllx(train_loc)) train_sents = PseudoProjectivity.preprocess_training_data(train_sents) actions = ArcEager.get_actions(gold_parses=train_sents) features = get_templates('basic') model_dir = pathlib.Path(model_dir) if not (model_dir / 'deps').exists(): (model_dir / 'deps').mkdir() with (model_dir / 'deps' / 'config.json').open('wb') as file_: file_.write( json.dumps( {'pseudoprojective': True, 'labels': actions, 'features': features}).encode('utf8')) vocab = Vocab(lex_attr_getters=Language.Defaults.lex_attr_getters, tag_map=tag_map) # Populate vocab for _, doc_sents in train_sents: for (ids, words, tags, heads, deps, ner), _ in doc_sents: for word in words: _ = vocab[word] for dep in deps: _ = vocab[dep] for tag in tags: _ = vocab[tag] if tag_map: for tag in tags: assert tag in tag_map, repr(tag) tagger = Tagger(vocab, tag_map=tag_map) parser = DependencyParser(vocab, actions=actions, features=features, L1=0.0) for itn in range(15): loss = 0. for _, doc_sents in train_sents: for (ids, words, tags, heads, deps, ner), _ in doc_sents: doc = Doc(vocab, words=words) gold = GoldParse(doc, tags=tags, heads=heads, deps=deps) tagger(doc) loss += parser.update(doc, gold, itn=itn) doc = Doc(vocab, words=words) tagger.update(doc, gold) random.shuffle(train_sents) scorer = score_model(vocab, tagger, parser, read_conllx(dev_loc)) print('%d:\t%.3f\t%.3f\t%.3f' % (itn, loss, scorer.uas, scorer.tags_acc)) nlp = Language(vocab=vocab, tagger=tagger, parser=parser) nlp.end_training(model_dir) scorer = score_model(vocab, tagger, parser, read_conllx(dev_loc)) print('%d:\t%.3f\t%.3f\t%.3f' % (itn, scorer.uas, scorer.las, scorer.tags_acc))
def main(train_loc, dev_loc, model_dir, tag_map_loc): with open(tag_map_loc) as file_: tag_map = json.loads(file_.read()) train_sents = list(read_conllx(train_loc)) train_sents = PseudoProjectivity.preprocess_training_data(train_sents) actions = ArcEager.get_actions(gold_parses=train_sents) features = get_templates('basic') model_dir = pathlib.Path(model_dir) with (model_dir / 'deps' / 'config.json').open('w') as file_: json.dump({'pseudoprojective': True, 'labels': actions, 'features': features}, file_) vocab = Vocab(lex_attr_getters=Language.Defaults.lex_attr_getters, tag_map=tag_map) # Populate vocab for _, doc_sents in train_sents: for (ids, words, tags, heads, deps, ner), _ in doc_sents: for word in words: _ = vocab[word] for dep in deps: _ = vocab[dep] for tag in tags: _ = vocab[tag] for tag in tags: assert tag in tag_map, repr(tag) tagger = Tagger(vocab, tag_map=tag_map) parser = DependencyParser(vocab, actions=actions, features=features) for itn in range(15): for _, doc_sents in train_sents: for (ids, words, tags, heads, deps, ner), _ in doc_sents: doc = Doc(vocab, words=words) gold = GoldParse(doc, tags=tags, heads=heads, deps=deps) tagger(doc) parser.update(doc, gold) doc = Doc(vocab, words=words) tagger.update(doc, gold) random.shuffle(train_sents) scorer = score_model(vocab, tagger, parser, read_conllx(dev_loc)) print('%d:\t%.3f\t%.3f' % (itn, scorer.uas, scorer.tags_acc)) nlp = Language(vocab=vocab, tagger=tagger, parser=parser) nlp.end_training(model_dir) scorer = score_model(vocab, tagger, parser, read_conllx(dev_loc)) print('%d:\t%.3f\t%.3f\t%.3f' % (itn, scorer.uas, scorer.las, scorer.tags_acc))
def main(lang_name, train_loc, dev_loc, model_dir, clusters_loc=None): LangClass = spacy.util.get_lang_class(lang_name) train_sents = list(read_conllx(train_loc)) train_sents = PseudoProjectivity.preprocess_training_data(train_sents) actions = ArcEager.get_actions(gold_parses=train_sents) features = get_templates('basic') model_dir = pathlib.Path(model_dir) if not model_dir.exists(): model_dir.mkdir() if not (model_dir / 'deps').exists(): (model_dir / 'deps').mkdir() if not (model_dir / 'pos').exists(): (model_dir / 'pos').mkdir() with (model_dir / 'deps' / 'config.json').open('wb') as file_: file_.write( json.dumps( {'pseudoprojective': True, 'labels': actions, 'features': features}).encode('utf8')) vocab = LangClass.Defaults.create_vocab() if not (model_dir / 'vocab').exists(): (model_dir / 'vocab').mkdir() else: if (model_dir / 'vocab' / 'strings.json').exists(): with (model_dir / 'vocab' / 'strings.json').open() as file_: vocab.strings.load(file_) if (model_dir / 'vocab' / 'lexemes.bin').exists(): vocab.load_lexemes(model_dir / 'vocab' / 'lexemes.bin') if clusters_loc is not None: clusters_loc = pathlib.Path(clusters_loc) with clusters_loc.open() as file_: for line in file_: try: cluster, word, freq = line.split() except ValueError: continue lex = vocab[word] lex.cluster = int(cluster[::-1], 2) # Populate vocab for _, doc_sents in train_sents: for (ids, words, tags, heads, deps, ner), _ in doc_sents: for word in words: _ = vocab[word] for dep in deps: _ = vocab[dep] for tag in tags: _ = vocab[tag] if vocab.morphology.tag_map: for tag in tags: assert tag in vocab.morphology.tag_map, repr(tag) tagger = Tagger(vocab) parser = DependencyParser(vocab, actions=actions, features=features, L1=0.0) for itn in range(30): loss = 0. for _, doc_sents in train_sents: for (ids, words, tags, heads, deps, ner), _ in doc_sents: doc = Doc(vocab, words=words) gold = GoldParse(doc, tags=tags, heads=heads, deps=deps) tagger(doc) loss += parser.update(doc, gold, itn=itn) doc = Doc(vocab, words=words) tagger.update(doc, gold) random.shuffle(train_sents) scorer = score_model(vocab, tagger, parser, read_conllx(dev_loc)) print('%d:\t%.3f\t%.3f\t%.3f' % (itn, loss, scorer.uas, scorer.tags_acc)) nlp = LangClass(vocab=vocab, tagger=tagger, parser=parser) nlp.end_training(model_dir) scorer = score_model(vocab, tagger, parser, read_conllx(dev_loc)) print('%d:\t%.3f\t%.3f\t%.3f' % (itn, scorer.uas, scorer.las, scorer.tags_acc))
def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic', seed=0, gold_preproc=False, n_sents=0, corruption_level=0, beam_width=1, verbose=False, use_orig_arc_eager=False, pseudoprojective=False): dep_model_dir = path.join(model_dir, 'deps') ner_model_dir = path.join(model_dir, 'ner') pos_model_dir = path.join(model_dir, 'pos') if path.exists(dep_model_dir): shutil.rmtree(dep_model_dir) if path.exists(ner_model_dir): shutil.rmtree(ner_model_dir) if path.exists(pos_model_dir): shutil.rmtree(pos_model_dir) os.mkdir(dep_model_dir) os.mkdir(ner_model_dir) os.mkdir(pos_model_dir) if pseudoprojective: # preprocess training data here before ArcEager.get_labels() is called gold_tuples = PseudoProjectivity.preprocess_training_data(gold_tuples) Config.write(dep_model_dir, 'config', features=feat_set, seed=seed, labels=ArcEager.get_labels(gold_tuples), beam_width=beam_width, projectivize=pseudoprojective) Config.write(ner_model_dir, 'config', features='ner', seed=seed, labels=BiluoPushDown.get_labels(gold_tuples), beam_width=0) if n_sents > 0: gold_tuples = gold_tuples[:n_sents] nlp = Language(data_dir=model_dir, tagger=False, parser=False, entity=False) if nlp.lang == 'de': nlp.vocab.morphology.lemmatizer = lambda string, pos: set([string]) nlp.tagger = Tagger.blank(nlp.vocab, Tagger.default_templates()) nlp.parser = Parser.from_dir(dep_model_dir, nlp.vocab.strings, ArcEager) nlp.entity = Parser.from_dir(ner_model_dir, nlp.vocab.strings, BiluoPushDown) print("Itn.\tP.Loss\tUAS\tNER F.\tTag %\tToken %") for itn in range(n_iter): scorer = Scorer() loss = 0 for raw_text, sents in gold_tuples: if gold_preproc: raw_text = None else: sents = _merge_sents(sents) for annot_tuples, ctnt in sents: if len(annot_tuples[1]) == 1: continue score_model(scorer, nlp, raw_text, annot_tuples, verbose=verbose if itn >= 2 else False) if raw_text is None: words = add_noise(annot_tuples[1], corruption_level) tokens = nlp.tokenizer.tokens_from_list(words) else: raw_text = add_noise(raw_text, corruption_level) tokens = nlp.tokenizer(raw_text) nlp.tagger(tokens) gold = GoldParse(tokens, annot_tuples) if not gold.is_projective: raise Exception("Non-projective sentence in training: %s" % annot_tuples) loss += nlp.parser.train(tokens, gold) nlp.entity.train(tokens, gold) nlp.tagger.train(tokens, gold.tags) random.shuffle(gold_tuples) print('%d:\t%d\t%.3f\t%.3f\t%.3f\t%.3f' % (itn, loss, scorer.uas, scorer.ents_f, scorer.tags_acc, scorer.token_acc)) print('end training') nlp.end_training(model_dir) print('done')
def test_pseudoprojectivity(EN): tree = [1, 2, 2] nonproj_tree = [1, 2, 2, 4, 5, 2, 7, 4, 2] labels = ["det", "nsubj", "root", "det", "dobj", "aux", "nsubj", "acl", "punct"] nonproj_tree2 = [9, 1, 3, 1, 5, 6, 9, 8, 6, 1, 6, 12, 13, 10, 1] labels2 = [ "advmod", "root", "det", "nsubj", "advmod", "det", "dobj", "det", "nmod", "aux", "nmod", "advmod", "det", "amod", "punct", ] assert PseudoProjectivity.decompose("X||Y") == ("X", "Y") assert PseudoProjectivity.decompose("X") == ("X", "") assert PseudoProjectivity.is_decorated("X||Y") == True assert PseudoProjectivity.is_decorated("X") == False PseudoProjectivity._lift(0, tree) assert tree == [2, 2, 2] np_arc = PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree) assert np_arc == 7 np_arc = PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree2) assert np_arc == 10 proj_heads, deco_labels = PseudoProjectivity.projectivize(nonproj_tree, labels) assert proj_heads == [1, 2, 2, 4, 5, 2, 7, 5, 2] assert deco_labels == ["det", "nsubj", "root", "det", "dobj", "aux", "nsubj", "acl||dobj", "punct"] deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert deproj_heads == nonproj_tree assert undeco_labels == labels proj_heads, deco_labels = PseudoProjectivity.projectivize(nonproj_tree2, labels2) assert proj_heads == [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1] assert deco_labels == [ "advmod||aux", "root", "det", "nsubj", "advmod", "det", "dobj", "det", "nmod", "aux", "nmod||dobj", "advmod", "det", "amod", "punct", ] deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert deproj_heads == nonproj_tree2 assert undeco_labels == labels2 # if decoration is wrong such that there is no head with the desired label # the structure is kept and the label is undecorated proj_heads = [1, 2, 2, 4, 5, 2, 7, 5, 2] deco_labels = ["det", "nsubj", "root", "det", "dobj", "aux", "nsubj", "acl||iobj", "punct"] deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert deproj_heads == proj_heads assert undeco_labels == ["det", "nsubj", "root", "det", "dobj", "aux", "nsubj", "acl", "punct"] # if there are two potential new heads, the first one is chosen even if it's wrong proj_heads = [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1] deco_labels = [ "advmod||aux", "root", "det", "aux", "advmod", "det", "dobj", "det", "nmod", "aux", "nmod||dobj", "advmod", "det", "amod", "punct", ] deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert deproj_heads == [3, 1, 3, 1, 5, 6, 9, 8, 6, 1, 6, 12, 13, 10, 1] assert undeco_labels == [ "advmod", "root", "det", "aux", "advmod", "det", "dobj", "det", "nmod", "aux", "nmod", "advmod", "det", "amod", "punct", ]
def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic', seed=0, gold_preproc=False, n_sents=0, corruption_level=0, beam_width=1, verbose=False, use_orig_arc_eager=False, pseudoprojective=False): dep_model_dir = path.join(model_dir, 'deps') ner_model_dir = path.join(model_dir, 'ner') pos_model_dir = path.join(model_dir, 'pos') if path.exists(dep_model_dir): shutil.rmtree(dep_model_dir) if path.exists(ner_model_dir): shutil.rmtree(ner_model_dir) if path.exists(pos_model_dir): shutil.rmtree(pos_model_dir) os.mkdir(dep_model_dir) os.mkdir(ner_model_dir) os.mkdir(pos_model_dir) if pseudoprojective: # preprocess training data here before ArcEager.get_labels() is called gold_tuples = PseudoProjectivity.preprocess_training_data(gold_tuples) Config.write(dep_model_dir, 'config', features=feat_set, seed=seed, labels=ArcEager.get_labels(gold_tuples), beam_width=beam_width,projectivize=pseudoprojective) Config.write(ner_model_dir, 'config', features='ner', seed=seed, labels=BiluoPushDown.get_labels(gold_tuples), beam_width=0) if n_sents > 0: gold_tuples = gold_tuples[:n_sents] nlp = Language(data_dir=model_dir, tagger=False, parser=False, entity=False) if nlp.lang == 'de': nlp.vocab.morphology.lemmatizer = lambda string,pos: set([string]) nlp.tagger = Tagger.blank(nlp.vocab, Tagger.default_templates()) nlp.parser = Parser.from_dir(dep_model_dir, nlp.vocab.strings, ArcEager) nlp.entity = Parser.from_dir(ner_model_dir, nlp.vocab.strings, BiluoPushDown) print("Itn.\tP.Loss\tUAS\tNER F.\tTag %\tToken %") for itn in range(n_iter): scorer = Scorer() loss = 0 for raw_text, sents in gold_tuples: if gold_preproc: raw_text = None else: sents = _merge_sents(sents) for annot_tuples, ctnt in sents: if len(annot_tuples[1]) == 1: continue score_model(scorer, nlp, raw_text, annot_tuples, verbose=verbose if itn >= 2 else False) if raw_text is None: words = add_noise(annot_tuples[1], corruption_level) tokens = nlp.tokenizer.tokens_from_list(words) else: raw_text = add_noise(raw_text, corruption_level) tokens = nlp.tokenizer(raw_text) nlp.tagger(tokens) gold = GoldParse(tokens, annot_tuples) if not gold.is_projective: raise Exception("Non-projective sentence in training: %s" % annot_tuples[1]) loss += nlp.parser.train(tokens, gold) nlp.entity.train(tokens, gold) nlp.tagger.train(tokens, gold.tags) random.shuffle(gold_tuples) print('%d:\t%d\t%.3f\t%.3f\t%.3f\t%.3f' % (itn, loss, scorer.uas, scorer.ents_f, scorer.tags_acc, scorer.token_acc)) print('end training') nlp.end_training(model_dir) print('done')
def main(lang_name, train_loc, dev_loc, model_dir, clusters_loc=None): LangClass = spacy.util.get_lang_class(lang_name) train_sents = list(read_conllx(train_loc)) train_sents = PseudoProjectivity.preprocess_training_data(train_sents) actions = ArcEager.get_actions(gold_parses=train_sents) features = get_templates('basic') model_dir = pathlib.Path(model_dir) if not model_dir.exists(): model_dir.mkdir() if not (model_dir / 'deps').exists(): (model_dir / 'deps').mkdir() if not (model_dir / 'pos').exists(): (model_dir / 'pos').mkdir() with (model_dir / 'deps' / 'config.json').open('wb') as file_: file_.write( json.dumps( {'pseudoprojective': True, 'labels': actions, 'features': features}).encode('utf8')) vocab = LangClass.Defaults.create_vocab() if not (model_dir / 'vocab').exists(): (model_dir / 'vocab').mkdir() else: if (model_dir / 'vocab' / 'strings.json').exists(): with (model_dir / 'vocab' / 'strings.json').open() as file_: vocab.strings.load(file_) if (model_dir / 'vocab' / 'lexemes.bin').exists(): vocab.load_lexemes(model_dir / 'vocab' / 'lexemes.bin') if clusters_loc is not None: clusters_loc = pathlib.Path(clusters_loc) with clusters_loc.open() as file_: for line in file_: try: cluster, word, freq = line.split() except ValueError: continue lex = vocab[word] lex.cluster = int(cluster[::-1], 2) # Populate vocab for _, doc_sents in train_sents: for (ids, words, tags, heads, deps, ner), _ in doc_sents: for word in words: _ = vocab[word] for dep in deps: _ = vocab[dep] for tag in tags: _ = vocab[tag] if vocab.morphology.tag_map: for tag in tags: assert tag in vocab.morphology.tag_map, repr(tag) tagger = Tagger(vocab) parser = DependencyParser(vocab, actions=actions, features=features, L1=0.0) for itn in range(30): loss = 0. for _, doc_sents in train_sents: for (ids, words, tags, heads, deps, ner), _ in doc_sents: doc = Doc(vocab, words=words) gold = GoldParse(doc, tags=tags, heads=heads, deps=deps) tagger(doc) loss += parser.update(doc, gold, itn=itn) doc = Doc(vocab, words=words) tagger.update(doc, gold) random.shuffle(train_sents) scorer = score_model(vocab, tagger, parser, read_conllx(dev_loc)) print('%d:\t%.3f\t%.3f\t%.3f' % (itn, loss, scorer.uas, scorer.tags_acc)) nlp = Language(vocab=vocab, tagger=tagger, parser=parser) nlp.end_training(model_dir) scorer = score_model(vocab, tagger, parser, read_conllx(dev_loc)) print('%d:\t%.3f\t%.3f\t%.3f' % (itn, scorer.uas, scorer.las, scorer.tags_acc))
def test_pseudoprojectivity(EN): tree = [1, 2, 2] nonproj_tree = [1, 2, 2, 4, 5, 2, 7, 4, 2] labels = [ 'det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl', 'punct' ] nonproj_tree2 = [9, 1, 3, 1, 5, 6, 9, 8, 6, 1, 6, 12, 13, 10, 1] labels2 = [ 'advmod', 'root', 'det', 'nsubj', 'advmod', 'det', 'dobj', 'det', 'nmod', 'aux', 'nmod', 'advmod', 'det', 'amod', 'punct' ] assert (PseudoProjectivity.decompose('X||Y') == ('X', 'Y')) assert (PseudoProjectivity.decompose('X') == ('X', '')) assert (PseudoProjectivity.is_decorated('X||Y') == True) assert (PseudoProjectivity.is_decorated('X') == False) PseudoProjectivity._lift(0, tree) assert (tree == [2, 2, 2]) np_arc = PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree) assert (np_arc == 7) np_arc = PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree2) assert (np_arc == 10) proj_heads, deco_labels = PseudoProjectivity.projectivize( nonproj_tree, labels) assert (proj_heads == [1, 2, 2, 4, 5, 2, 7, 5, 2]) assert (deco_labels == [ 'det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl||dobj', 'punct' ]) deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert (deproj_heads == nonproj_tree) assert (undeco_labels == labels) proj_heads, deco_labels = PseudoProjectivity.projectivize( nonproj_tree2, labels2) assert (proj_heads == [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1]) assert (deco_labels == [ 'advmod||aux', 'root', 'det', 'nsubj', 'advmod', 'det', 'dobj', 'det', 'nmod', 'aux', 'nmod||dobj', 'advmod', 'det', 'amod', 'punct' ]) deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert (deproj_heads == nonproj_tree2) assert (undeco_labels == labels2) # if decoration is wrong such that there is no head with the desired label # the structure is kept and the label is undecorated proj_heads = [1, 2, 2, 4, 5, 2, 7, 5, 2] deco_labels = [ 'det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl||iobj', 'punct' ] deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert (deproj_heads == proj_heads) assert (undeco_labels == [ 'det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl', 'punct' ]) # if there are two potential new heads, the first one is chosen even if it's wrong proj_heads = [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1] deco_labels = [ 'advmod||aux', 'root', 'det', 'aux', 'advmod', 'det', 'dobj', 'det', 'nmod', 'aux', 'nmod||dobj', 'advmod', 'det', 'amod', 'punct' ] deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert (deproj_heads == [3, 1, 3, 1, 5, 6, 9, 8, 6, 1, 6, 12, 13, 10, 1]) assert (undeco_labels == [ 'advmod', 'root', 'det', 'aux', 'advmod', 'det', 'dobj', 'det', 'nmod', 'aux', 'nmod', 'advmod', 'det', 'amod', 'punct' ])