def test_all(self): l = WordsList( VOCAB ) self.assertEqual(l.get_size(), 20 ) self.assertTrue( l.is_unk('toto') ) self.assertFalse( l.is_unk('normale') ) self.assertFalse( l.is_unk("isn't") ) self.assertFalse( l.is_unk(u"đ") ) l.add(u"être") self.assertTrue( l.is_in(u"être") ) self.assertTrue( l.is_unk("être") )
def testVocab(self): wds = WordsList() wds.add("a") wds.add("b") wds.add("c") ngramcounter = NgramCounter(1,wds) ngramcounter.count( self.corpusfile ) self.assertEqual(ngramcounter.get_count('a'), 15) self.assertEqual(ngramcounter.get_count('b'), 10) self.assertEqual(ngramcounter.get_count('c'), 4) self.assertEqual(ngramcounter.get_count('d'), 0) self.assertEqual(ngramcounter.get_count(UNKSTAMP), 3) self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0) self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 3)