예제 #1
0
 def testAppendSentence2(self):
     ngramcounter = NgramCounter(2) # bigram
     ngramcounter.append_sentence( self.sent1 )
     self.assertEqual(ngramcounter.get_count('a b'), 3)
     self.assertEqual(ngramcounter.get_count('b a'), 2)
     self.assertEqual(ngramcounter.get_count('a c'), 1)
     self.assertEqual(ngramcounter.get_count('a d'), 0)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL+' a'), 1)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL+' b'), 0)
     self.assertEqual(ngramcounter.get_count('a '+END_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count('b '+END_SENT_SYMBOL), 1)
     ngramcounter.append_sentence( self.sent2 )
     ngramcounter.append_sentence( self.sent3 )
     self.assertEqual(ngramcounter.get_count('a b'), 7)
     self.assertEqual(ngramcounter.get_count('b a'), 4)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL+' a'), 3)
     self.assertEqual(ngramcounter.get_count('b '+END_SENT_SYMBOL), 3)
예제 #2
0
 def testAppendSentence1(self):
     ngramcounter = NgramCounter() # default is unigram
     ngramcounter.append_sentence( self.sent1 )
     self.assertEqual(ngramcounter.get_count('a'), 6)
     self.assertEqual(ngramcounter.get_count('b'), 4)
     self.assertEqual(ngramcounter.get_count('c'), 1)
     self.assertEqual(ngramcounter.get_count('d'), 0)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 1)
     self.assertEqual(ngramcounter.get_ncount(), 12)
     ngramcounter.append_sentence( self.sent2 )
     ngramcounter.append_sentence( self.sent3 )
     self.assertEqual(ngramcounter.get_count('a'), 15)
     self.assertEqual(ngramcounter.get_count('b'), 10)
     self.assertEqual(ngramcounter.get_count('c'), 4)
     self.assertEqual(ngramcounter.get_count('d'), 3)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 3)