def test_parser(self): tagged_words = camxes.tag('mi klama le zarci') arcs = { DependencyArc(child=0, parent=1, label='PLACE_1'), DependencyArc(child=1, parent=-1, label='MAIN_BRIDI'), DependencyArc(child=2, parent=3, label='DESCRIPTOR'), DependencyArc(child=3, parent=1, label='PLACE_2') } next_transition = get_oracle_transition_func(arcs) print decode(sent, next_transition)
def test(sentences_io, model_file): parser = Parser() parser.model.load(model_file) for sentence in sentences_io: tagged_words = list(camxes.tag(sentence)) words = [word for word, _ in tagged_words] tags = [tag for _, tag in tagged_words] pad_tokens(words) pad_tokens(tags) print tagged_words print parser.parse(words, tags)
def parse_sent_str(sent_lines): words = DefaultList('') tags = DefaultList('') heads = [None] labels = [None] for line in sent_lines: fields = line.split() word, head = fields[1], fields[2] label = '' if len(fields) < 4 else fields[3] words.append(intern(word)) heads.append(int(head) + 1 if head != '-1' else len(sent_lines) + 1) labels.append(label) sent = ' '.join(words) tagged_words = list(camxes.tag(sent)) assert len(tagged_words) == len(words) tags = [tag for _, tag in tagged_words] pad_tokens(words) pad_tokens(tags) return words, tags, heads, labels
def test_camxes(self): self.assertEquals([('KOhA', 'mi'), ('gismu', 'klama'), ('LE', 'le'), ('gismu', 'zarci')], list(camxes.tag('mi klama le zarci')))