def testStopList(self): stopWords = ["good", "test", "analyzer"] stopWordsSet = StopFilter.makeStopSet(stopWords) newStop = StopAnalyzer(stopWordsSet) reader = StringReader("This is a good test of the english stop analyzer") stream = newStop.tokenStream("test", reader) self.assert_(stream is not None) stream.reset() termAtt = stream.getAttribute(CharTermAttribute.class_) while stream.incrementToken(): text = termAtt.toString() self.assert_(text not in stopWordsSet)
def testStopListPositions(self): stopWords = ["good", "test", "analyzer"] stopWordsSet = StopFilter.makeStopSet(stopWords) newStop = StopAnalyzer(stopWordsSet) reader = StringReader("This is a good test of the english stop analyzer with positions") expectedIncr = [ 1, 1, 1, 3, 1, 1, 1, 2, 1] stream = newStop.tokenStream("test", reader) self.assert_(stream is not None) stream.reset() i = 0 termAtt = stream.getAttribute(CharTermAttribute.class_) posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class_) while stream.incrementToken(): text = termAtt.toString() self.assert_(text not in stopWordsSet) self.assertEqual(expectedIncr[i], posIncrAtt.getPositionIncrement()) i += 1
class StopAnalyzerTestCase(unittest.TestCase): """ Unit tests ported from Java Lucene """ def setUp(self): self.stop = StopAnalyzer() self.invalidTokens = StopAnalyzer.ENGLISH_STOP_WORDS_SET def testDefaults(self): self.assert_(self.stop is not None) reader = StringReader("This is a test of the english stop analyzer") stream = self.stop.tokenStream("test", reader) self.assert_(stream is not None) stream.reset() termAtt = stream.getAttribute(CharTermAttribute.class_) while stream.incrementToken(): self.assert_(termAtt.toString() not in self.invalidTokens) def testStopList(self): stopWords = ["good", "test", "analyzer"] stopWordsSet = StopFilter.makeStopSet(stopWords) newStop = StopAnalyzer(stopWordsSet) reader = StringReader("This is a good test of the english stop analyzer") stream = newStop.tokenStream("test", reader) self.assert_(stream is not None) stream.reset() termAtt = stream.getAttribute(CharTermAttribute.class_) while stream.incrementToken(): text = termAtt.toString() self.assert_(text not in stopWordsSet) def testStopListPositions(self): stopWords = ["good", "test", "analyzer"] stopWordsSet = StopFilter.makeStopSet(stopWords) newStop = StopAnalyzer(stopWordsSet) reader = StringReader("This is a good test of the english stop analyzer with positions") expectedIncr = [ 1, 1, 1, 3, 1, 1, 1, 2, 1] stream = newStop.tokenStream("test", reader) self.assert_(stream is not None) stream.reset() i = 0 termAtt = stream.getAttribute(CharTermAttribute.class_) posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class_) while stream.incrementToken(): text = termAtt.toString() self.assert_(text not in stopWordsSet) self.assertEqual(expectedIncr[i], posIncrAtt.getPositionIncrement()) i += 1