def setUp(self): self.doc = IEDocFactory() super(TestDocumentSegmenter, self).setUp() self.segmenter = SyntacticSegmenterRunner()
preprocess.py -h | --help | --version Options: -h --help Show this screen --version Version number """ import logging from docopt import docopt import iepy iepy.setup(__file__) from iepy.data.db import DocumentManager from iepy.preprocess.stanford_preprocess import StanfordPreprocess from iepy.preprocess.pipeline import PreProcessPipeline from iepy.preprocess.segmenter import SyntacticSegmenterRunner if __name__ == '__main__': logger = logging.getLogger(u'preprocess') logger.setLevel(logging.INFO) logging.basicConfig( level=logging.INFO, format=u"%(asctime)s - %(name)s - %(levelname)s - %(message)s") opts = docopt(__doc__, version=0.1) docs = DocumentManager() pipeline = PreProcessPipeline( [StanfordPreprocess(), SyntacticSegmenterRunner(increment=True)], docs) pipeline.process_everything()
def start_preprocess(docs, increment_ner): pipeline = PreProcessPipeline([ StanfordPreprocess(increment_ner), SyntacticSegmenterRunner(increment=True) ], docs) pipeline.process_everything()