예제 #1
0
 def test_createTokens(self):
     testDocument = Document(
         'Test Doc',
         'Test of tokenization\n dates like 12.03.1998, 103/78 and Words should be lowered and appear more more often.?'
     )
     testDocument.createTokens()
     self.targetDocument.tokens = [
         'test', 'of', 'tokenization', 'dates', 'like', '12.03.1998', ',',
         '103/78', 'and', 'words', 'should', 'be', 'lowered', 'and',
         'appear', 'more', 'more', 'often', '.', '?'
     ]
     self.assertEqual(testDocument.tokens, self.targetDocument.tokens)
예제 #2
0
    def test_appendEntities(self):
        testDocument = Document(
            'Test Document',
            'Name entities like World Health Organization, person names like Sir James and Ms Rosa Wallis but also world locations or states like Lebanon, United States of America, Lebanon or new cities like New York have to be recognized'
        )
        testDocument.createEntities()
        testDocument.createTokens()
        testDocument.appendEntities()

        self.targetDocument.tokens = [
            'name', 'entities', 'like', ',', 'person', 'names', 'like', 'sir',
            'and', 'ms', 'but', 'also', 'world', 'locations', 'or', 'like',
            ',', 'states', ',', 'or', 'cities', 'like', 'new', 'have', 'to',
            'be', 'recognized', 'world health organization', 'lebanon',
            'lebanon', 'united states of america', 'new york', 'james',
            'rosa wallis'
        ]
        self.assertEqual(self.targetDocument.tokens, testDocument.tokens)