예제 #1
0
    def test_missing_pos(self):
        parser = MorParser()
        for uid, speaker, tokens in parser.parse("fixtures/missing_pos.xml"):

            for token in tokens:
               #print(token.word + '/' + token.pos + '|' + token.stem)
               self.assertNotEqual(token.pos, 'unk',
                                   'failed to parse known tag')
예제 #2
0
 def test_clitics(self):
     parser = MorParser()
     for uid, speaker, tokens in parser.parse("fixtures/clitics.xml"):
         self.assertGreater(len(tokens), 1,
                            "failed splitting {0} into clitics".format(tokens))
         self.assertNotIn("?", [w.word for w in tokens])
     self.assertEqual(' '.join(map(str, tokens)),
                      ("hidden/part|hide&PERF away/adv|away where/adv:wh|where "
                       "nobody/pro:indef|nobody 'd/mod|genmod be/v:cop|be ./.|."))
     head, tail = parser.split_clitic_wordform("that's")
     self.assertEqual(head, "that")
     self.assertEqual(tail, ["'s"])
예제 #3
0
 def test_commas(self):
     parser = MorParser()
     for uid, speaker, tokens in parser.parse("fixtures/commas.xml"):
         self.assertIn(',', [word.stem for word in tokens])
예제 #4
0
 def test_document(self):
     parser = MorParser()
     for i in parser.parse("fixtures/test_doc.xml"):
         # iterate through an ensure no exceptions are thrown
         pass