コード例 #1
0
    def test_BothUnigramsBigramsApplyStopwords(self):
        # set up
        teststring = 'I like cats and birds.'
        both = ['cats', 'birds', 'cats birds']

        # test
        testtokenize = TokenizeOnWhitespacePunctuation(teststring, applyStopwords=True)
        self.assertEqual(both, testtokenize.getBothUnigramsBigrams())
コード例 #2
0
    def test_TokenizeOnWhitespacePunctuationBothUnigramsBigrams(self):
        # set up
        teststring = 'I like cats and birds.'
        both = ['i', 'like', 'cats', 'and', 'birds', 'i like', 'like cats', 'cats and', 'and birds']

        # test
        testtokenize = TokenizeOnWhitespacePunctuation(teststring)
        self.assertEqual(both, testtokenize.getBothUnigramsBigrams())
コード例 #3
0
    def test_BothUnigramsBigramsApplyStopwords(self):
        # set up
        teststring = 'I like cats and birds.'
        both = ['cats', 'birds', 'cats birds']

        # test
        testtokenize = TokenizeOnWhitespacePunctuation(teststring,
                                                       applyStopwords=True)
        self.assertEqual(both, testtokenize.getBothUnigramsBigrams())
コード例 #4
0
    def test_TokenizeOnWhitespacePunctuationBothUnigramsBigrams(self):
        # set up
        teststring = 'I like cats and birds.'
        both = [
            'i', 'like', 'cats', 'and', 'birds', 'i like', 'like cats',
            'cats and', 'and birds'
        ]

        # test
        testtokenize = TokenizeOnWhitespacePunctuation(teststring)
        self.assertEqual(both, testtokenize.getBothUnigramsBigrams())