Ejemplo n.º 1
0
class EightBitTests(unittest.TestCase):

    data = 'the quick brown fox jumps over the lazy dog and i am so proud'
    words = ('i','am','so','lazy')

    def setUp(self):
        self._sw = Stopwords('en', self.words)
        verifyClass(StopwordsInterface, self._sw.__class__)
        
    def testSimple(self):

        self.assertEqual( self._sw.getLanguage(), 'en')
        self.assertEqual( len(self._sw.getStopWords()), len(self.words) ) 

        for w in self.words:
            self.assertEqual(self._sw.isStopWord(w.lower()), 1)
            self.assertEqual(self._sw.isStopWord(w.upper()), 1)
            self.assertEqual(self._sw.isStopWord(w.capitalize()), 1)

    def testNonStopwords(self):

        self.assertEqual(self._sw.isStopWord('foo'), 0)
        self.assertEqual(self._sw.isStopWord('bar'), 0)

    def testFilter1(self):

        data = ''
        expected = [ w for w in data.split() if w not in self.words]
        self.assertEqual(self._sw.process(data.split()), expected)    

    def testFilter2(self):

        data = 'the quick brown fox jumps over the lazy dog and i am so proud'
        expected = [ w for w in data.split() if w not in self.words]
        self.assertEqual(self._sw.process(data.split()), expected)    
Ejemplo n.º 2
0
 def setUp(self):
     self._sw = Stopwords('en', self.words)
     verifyClass(StopwordsInterface, self._sw.__class__)