def score_model(vocab, tagger, parser, gold_docs, verbose=False): scorer = Scorer() for _, gold_doc in gold_docs: for (ids, words, tags, heads, deps, entities), _ in gold_doc: doc = Doc(vocab, words=words) tagger(doc) parser(doc) PseudoProjectivity.deprojectivize(doc) gold = GoldParse(doc, tags=tags, heads=heads, deps=deps) scorer.score(doc, gold, verbose=verbose) return scorer
def deprojectivize(proj_heads, deco_labels, EN): slen = len(proj_heads) sent = EN.tokenizer.tokens_from_list(["whatever"] * slen) rel_proj_heads = [head - i for i, head in enumerate(proj_heads)] labelids = [EN.vocab.strings[label] for label in deco_labels] pairs = list(zip(rel_proj_heads, labelids)) parse = numpy.asarray(pairs, dtype=numpy.int32) sent.from_array([HEAD, DEP], parse) PseudoProjectivity.deprojectivize(sent) parse = sent.to_array([HEAD, DEP]) deproj_heads = [i + head for i, head in enumerate(parse[:, 0])] undeco_labels = [EN.vocab.strings[int(labelid)] for labelid in parse[:, 1]] return deproj_heads, undeco_labels
def deprojectivize(proj_heads, deco_labels, EN): slen = len(proj_heads) sent = EN.tokenizer.tokens_from_list(['whatever'] * slen) rel_proj_heads = [head - i for i, head in enumerate(proj_heads)] labelids = [EN.vocab.strings[label] for label in deco_labels] pairs = list(zip(rel_proj_heads, labelids)) parse = numpy.asarray(pairs, dtype=numpy.int32) sent.from_array([HEAD, DEP], parse) PseudoProjectivity.deprojectivize(sent) parse = sent.to_array([HEAD, DEP]) deproj_heads = [i + head for i, head in enumerate(parse[:, 0])] undeco_labels = [EN.vocab.strings[int(labelid)] for labelid in parse[:, 1]] return deproj_heads, undeco_labels