def test(sentences_io, model_file): parser = Parser() parser.model.load(model_file) for sentence in sentences_io: tagged_words = list(camxes.tag(sentence)) words = [word for word, _ in tagged_words] tags = [tag for _, tag in tagged_words] pad_tokens(words) pad_tokens(tags) print tagged_words print parser.parse(words, tags)
def parse_sent_str(sent_lines): words = DefaultList('') tags = DefaultList('') heads = [None] labels = [None] for line in sent_lines: fields = line.split() word, head = fields[1], fields[2] label = '' if len(fields) < 4 else fields[3] words.append(intern(word)) heads.append(int(head) + 1 if head != '-1' else len(sent_lines) + 1) labels.append(label) sent = ' '.join(words) tagged_words = list(camxes.tag(sent)) assert len(tagged_words) == len(words) tags = [tag for _, tag in tagged_words] pad_tokens(words) pad_tokens(tags) return words, tags, heads, labels