class TestAlphaTermProcessor(unittest.TestCase): def setUp(self): self.logger = logging.getLogger("TestAlphaTermProcessor") self.atp = AlphaTermProcessor() def test_check(self): result = self.atp.process('<h>') self.assertEquals(result, 'h') #check neg numbers result = self.atp.process('-5') self.assertEquals(result, '') #check pos numbers result = self.atp.process('5') self.assertEquals(result, '') #check punct result = self.atp.process('hello.') self.assertEquals(result, 'hello') term = "hello world my name is python111 cant" result = self.atp.process(term) self.assertEquals(result,'hello world my name is python cant')
def construct_pipeline(self, pipeline): #create all processors and add to pipeline ltp = LengthTermProcessor() ltp.set_min_length(self.min_len) stp = StopwordTermProcessor(stopwordfile=self.stop_filename) ptp = PunctuationTermProcessor() atp = AlphaTermProcessor() sctp = SpecialCharProcessor() pipeline.add_processor(ltp) pipeline.add_processor(sctp) pipeline.add_processor(ptp) pipeline.add_processor(stp) pipeline.add_processor(atp) return pipeline
def setUp(self): self.logger = logging.getLogger("TestTermPipeline") self.ltp = LengthTermProcessor() self.tp = TermProcessor() self.stp = StopwordTermProcessor(stopwordfile='stopwords_test.txt') self.ptp = PunctuationTermProcessor() self.atp = AlphaTermProcessor() self.sctp = SpecialCharProcessor() self.pipeline = TermPipeline() self.pipeline.add_processor(self.sctp) self.pipeline.add_processor(self.tp) self.pipeline.add_processor(self.ltp) self.pipeline.add_processor(self.ptp) self.pipeline.add_processor(self.stp) self.pipeline.add_processor(self.atp)
def setUp(self): self.logger = logging.getLogger("TestAlphaTermProcessor") self.atp = AlphaTermProcessor()