Example #1
0
 def testCount2(self):
     ngramcounter = sppasNgramCounter(2)
     ngramcounter.count(self.corpusfile)
     self.assertEqual(ngramcounter.get_count('a b'), 7)
     self.assertEqual(ngramcounter.get_count('b a'), 4)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL+' a'), 3)
     self.assertEqual(ngramcounter.get_count('b '+END_SENT_SYMBOL), 3)
Example #2
0
 def testCount2(self):
     ngramcounter = sppasNgramCounter(2)
     ngramcounter.count(self.corpusfile)
     self.assertEqual(ngramcounter.get_count('a b'), 7)
     self.assertEqual(ngramcounter.get_count('b a'), 4)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL + ' a'), 3)
     self.assertEqual(ngramcounter.get_count('b ' + END_SENT_SYMBOL), 3)
Example #3
0
 def testCount1(self):
     ngramcounter = sppasNgramCounter()  # default is unigram
     ngramcounter.count(self.corpusfile)
     self.assertEqual(ngramcounter.get_count('a'), 15)
     self.assertEqual(ngramcounter.get_count('b'), 10)
     self.assertEqual(ngramcounter.get_count('c'), 4)
     self.assertEqual(ngramcounter.get_count('d'), 3)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 3)
     ngramcounter = sppasNgramCounter(1)
     ngramcounter.count(self.corpusfile, self.corpusfile)
     self.assertEqual(ngramcounter.get_count('a'), 30)
     self.assertEqual(ngramcounter.get_count('b'), 20)
     self.assertEqual(ngramcounter.get_count('c'), 8)
     self.assertEqual(ngramcounter.get_count('d'), 6)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 6)
Example #4
0
 def testCount1(self):
     ngramcounter = sppasNgramCounter()  # default is unigram
     ngramcounter.count(self.corpusfile)
     self.assertEqual(ngramcounter.get_count('a'), 15)
     self.assertEqual(ngramcounter.get_count('b'), 10)
     self.assertEqual(ngramcounter.get_count('c'), 4)
     self.assertEqual(ngramcounter.get_count('d'), 3)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 3)
     ngramcounter = sppasNgramCounter(1)
     ngramcounter.count(self.corpusfile, self.corpusfile)
     self.assertEqual(ngramcounter.get_count('a'), 30)
     self.assertEqual(ngramcounter.get_count('b'), 20)
     self.assertEqual(ngramcounter.get_count('c'), 8)
     self.assertEqual(ngramcounter.get_count('d'), 6)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 6)
Example #5
0
 def testShave(self):
     ngramcounter = sppasNgramCounter(1)
     ngramcounter.count(self.corpusfile)
     ngramcounter.shave(4)
     self.assertEqual(ngramcounter.get_count('a'), 15)
     self.assertEqual(ngramcounter.get_count('b'), 10)
     self.assertEqual(ngramcounter.get_count('c'), 4)
     self.assertEqual(ngramcounter.get_count('d'), 0)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 3)
Example #6
0
 def testShave(self):
     ngramcounter = sppasNgramCounter(1)
     ngramcounter.count(self.corpusfile)
     ngramcounter.shave(4)
     self.assertEqual(ngramcounter.get_count('a'), 15)
     self.assertEqual(ngramcounter.get_count('b'), 10)
     self.assertEqual(ngramcounter.get_count('c'), 4)
     self.assertEqual(ngramcounter.get_count('d'), 0)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 3)
Example #7
0
    def testVocab(self):
        wds = sppasVocabulary()
        wds.add("a")
        wds.add("b")
        wds.add("c")
        ngramcounter = sppasNgramCounter(1, wds)
        ngramcounter.count(self.corpusfile)

        self.assertEqual(ngramcounter.get_count('a'), 15)
        self.assertEqual(ngramcounter.get_count('b'), 10)
        self.assertEqual(ngramcounter.get_count('c'), 4)
        self.assertEqual(ngramcounter.get_count('d'), 0)
        self.assertEqual(ngramcounter.get_count(symbols.unk), 3)
        self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
        self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 3)
Example #8
0
    def testVocab(self):
        wds = sppasVocabulary()
        wds.add("a")
        wds.add("b")
        wds.add("c")
        ngramcounter = sppasNgramCounter(1, wds)
        ngramcounter.count(self.corpusfile)

        self.assertEqual(ngramcounter.get_count('a'), 15)
        self.assertEqual(ngramcounter.get_count('b'), 10)
        self.assertEqual(ngramcounter.get_count('c'), 4)
        self.assertEqual(ngramcounter.get_count('d'), 0)
        self.assertEqual(ngramcounter.get_count(unk_stamp), 3)
        self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
        self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 3)
Example #9
0
 def testAppendSentence2(self):
     ngramcounter = sppasNgramCounter(2)  # bigram
     ngramcounter.append_sentence(self.sent1)
     self.assertEqual(ngramcounter.get_count('a b'), 3)
     self.assertEqual(ngramcounter.get_count('b a'), 2)
     self.assertEqual(ngramcounter.get_count('a c'), 1)
     self.assertEqual(ngramcounter.get_count('a d'), 0)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL + ' a'), 1)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL + ' b'), 0)
     self.assertEqual(ngramcounter.get_count('a ' + END_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count('b ' + END_SENT_SYMBOL), 1)
     ngramcounter.append_sentence(self.sent2)
     ngramcounter.append_sentence(self.sent3)
     self.assertEqual(ngramcounter.get_count('a b'), 7)
     self.assertEqual(ngramcounter.get_count('b a'), 4)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL + ' a'), 3)
     self.assertEqual(ngramcounter.get_count('b ' + END_SENT_SYMBOL), 3)
Example #10
0
 def testAppendSentence2(self):
     ngramcounter = sppasNgramCounter(2)  # bigram
     ngramcounter.append_sentence(self.sent1)
     self.assertEqual(ngramcounter.get_count('a b'), 3)
     self.assertEqual(ngramcounter.get_count('b a'), 2)
     self.assertEqual(ngramcounter.get_count('a c'), 1)
     self.assertEqual(ngramcounter.get_count('a d'), 0)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL+' a'), 1)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL+' b'), 0)
     self.assertEqual(ngramcounter.get_count('a '+END_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count('b '+END_SENT_SYMBOL), 1)
     ngramcounter.append_sentence(self.sent2)
     ngramcounter.append_sentence(self.sent3)
     self.assertEqual(ngramcounter.get_count('a b'), 7)
     self.assertEqual(ngramcounter.get_count('b a'), 4)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL+' a'), 3)
     self.assertEqual(ngramcounter.get_count('b '+END_SENT_SYMBOL), 3)
Example #11
0
 def testAppendSentence1(self):
     ngramcounter = sppasNgramCounter()  # default is unigram
     ngramcounter.append_sentence(self.sent1)
     self.assertEqual(ngramcounter.get_count('a'), 6)
     self.assertEqual(ngramcounter.get_count('b'), 4)
     self.assertEqual(ngramcounter.get_count('c'), 1)
     self.assertEqual(ngramcounter.get_count('d'), 0)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 1)
     self.assertEqual(ngramcounter.get_ncount(), 12)
     ngramcounter.append_sentence(self.sent2)
     ngramcounter.append_sentence(self.sent3)
     self.assertEqual(ngramcounter.get_count('a'), 15)
     self.assertEqual(ngramcounter.get_count('b'), 10)
     self.assertEqual(ngramcounter.get_count('c'), 4)
     self.assertEqual(ngramcounter.get_count('d'), 3)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 3)
Example #12
0
 def testAppendSentence1(self):
     ngramcounter = sppasNgramCounter()  # default is unigram
     ngramcounter.append_sentence(self.sent1)
     self.assertEqual(ngramcounter.get_count('a'), 6)
     self.assertEqual(ngramcounter.get_count('b'), 4)
     self.assertEqual(ngramcounter.get_count('c'), 1)
     self.assertEqual(ngramcounter.get_count('d'), 0)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 1)
     self.assertEqual(ngramcounter.get_ncount(), 12)
     ngramcounter.append_sentence(self.sent2)
     ngramcounter.append_sentence(self.sent3)
     self.assertEqual(ngramcounter.get_count('a'), 15)
     self.assertEqual(ngramcounter.get_count('b'), 10)
     self.assertEqual(ngramcounter.get_count('c'), 4)
     self.assertEqual(ngramcounter.get_count('d'), 3)
     self.assertEqual(ngramcounter.get_count(START_SENT_SYMBOL), 0)
     self.assertEqual(ngramcounter.get_count(END_SENT_SYMBOL), 3)
Example #13
0
 def testInit(self):
     with self.assertRaises(NgramOrderValueError):
         m = sppasNgramCounter(0)
         m = sppasNgramCounter(100)