예제 #1
0
 def test_all(self):
     l = WordsList( VOCAB )
     self.assertEqual(l.get_size(), 20 )
     self.assertTrue( l.is_unk('toto') )
     self.assertFalse( l.is_unk('normale') )
     self.assertFalse( l.is_unk("isn't") )
     self.assertFalse( l.is_unk(u"đ") )
     l.add(u"être")
     self.assertTrue( l.is_in(u"être") )
     self.assertTrue( l.is_unk("être") )
예제 #2
0
    def testVocab(self):
        wds = WordsList()
        wds.add("a")
        wds.add("b")
        wds.add("c")
        ngramcounter = NgramCounter(1,wds)
        ngramcounter.count( self.corpusfile )

        self.assertEqual(ngramcounter.get_count('a'), 15)
        self.assertEqual(ngramcounter.get_count('b'), 10)
        self.assertEqual(ngramcounter.get_count('c'), 4)
        self.assertEqual(ngramcounter.get_count('d'), 0)
        self.assertEqual(ngramcounter.get_count(UNKSTAMP), 3)
        self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
        self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 3)