Example #1
0
    def construct_pipeline(self, pipeline):
        #create all processors and add to pipeline
        ltp = LengthTermProcessor()
        ltp.set_min_length(self.min_len)
        stp = StopwordTermProcessor(stopwordfile=self.stop_filename)
        ptp = PunctuationTermProcessor()
        atp = AlphaTermProcessor()
        sctp = SpecialCharProcessor()

        pipeline.add_processor(ltp)
        pipeline.add_processor(sctp)
        pipeline.add_processor(ptp)
        pipeline.add_processor(stp)
        pipeline.add_processor(atp)

        return pipeline
Example #2
0
    def setUp(self):
        self.logger = logging.getLogger("TestTermPipeline")

        self.ltp = LengthTermProcessor()
        self.tp = TermProcessor()
        self.stp = StopwordTermProcessor(stopwordfile='stopwords_test.txt')
        self.ptp = PunctuationTermProcessor()
        self.atp = AlphaTermProcessor()
        self.sctp = SpecialCharProcessor()



        self.pipeline = TermPipeline()
        self.pipeline.add_processor(self.sctp)
        self.pipeline.add_processor(self.tp)
        self.pipeline.add_processor(self.ltp)
        self.pipeline.add_processor(self.ptp)
        self.pipeline.add_processor(self.stp)
        self.pipeline.add_processor(self.atp)
Example #3
0
 def setUp(self):
     self.logger = logging.getLogger("TestAlphaTermProcessor")
     self.atp = AlphaTermProcessor()