def test_missing_pos(self): parser = MorParser() for uid, speaker, tokens in parser.parse("fixtures/missing_pos.xml"): for token in tokens: #print(token.word + '/' + token.pos + '|' + token.stem) self.assertNotEqual(token.pos, 'unk', 'failed to parse known tag')
def test_clitics(self): parser = MorParser() for uid, speaker, tokens in parser.parse("fixtures/clitics.xml"): self.assertGreater(len(tokens), 1, "failed splitting {0} into clitics".format(tokens)) self.assertNotIn("?", [w.word for w in tokens]) self.assertEqual(' '.join(map(str, tokens)), ("hidden/part|hide&PERF away/adv|away where/adv:wh|where " "nobody/pro:indef|nobody 'd/mod|genmod be/v:cop|be ./.|.")) head, tail = parser.split_clitic_wordform("that's") self.assertEqual(head, "that") self.assertEqual(tail, ["'s"])
def test_commas(self): parser = MorParser() for uid, speaker, tokens in parser.parse("fixtures/commas.xml"): self.assertIn(',', [word.stem for word in tokens])
def test_document(self): parser = MorParser() for i in parser.parse("fixtures/test_doc.xml"): # iterate through an ensure no exceptions are thrown pass