def start_preprocess(docs, increment_ner): pipeline = PreProcessPipeline([ TokenizeSentencerRunner(increment_ner), ResolucionesNERRunner(), SociedadesNERRunner(), ], docs) pipeline.process_everything()
def test_process_everythin_calls_successively_process_step_in_batch(self): runner1 = mock.Mock(wraps=lambda x: x) runner2 = mock.Mock(wraps=lambda x: x) docs = [object() for i in range(5)] p = PreProcessPipeline([runner1, runner2], docs) with mock.patch.object(p, 'process_step_in_batch') as mock_batch: p.call_order = [] mock_batch.side_effect = lambda r: p.call_order.append(r) p.process_everything() self.assertEqual(mock_batch.call_count, 2) self.assertEqual(mock_batch.call_args_list, [mock.call(runner1), mock.call(runner2)]) self.assertEqual(p.call_order, [runner1, runner2])
def test_process_everythin_calls_successively_process_step_in_batch(self): runner1 = mock.Mock(wraps=lambda x: x) runner2 = mock.Mock(wraps=lambda x: x) docs = [object() for i in range(5)] p = PreProcessPipeline([runner1, runner2], docs) with mock.patch.object(p, 'process_step_in_batch') as mock_batch: p.call_order = [] mock_batch.side_effect = lambda r: p.call_order.append(r) p.process_everything() self.assertEqual(mock_batch.call_count, 2) self.assertEqual( mock_batch.call_args_list, [mock.call(runner1), mock.call(runner2)]) self.assertEqual(p.call_order, [runner1, runner2])
preprocess.py preprocess.py -h | --help | --version Options: -h --help Show this screen --version Version number """ import logging from docopt import docopt import iepy iepy.setup(__file__) from iepy.data.db import DocumentManager from iepy.preprocess.stanford_preprocess import StanfordPreprocess from iepy.preprocess.pipeline import PreProcessPipeline from iepy.preprocess.segmenter import SyntacticSegmenterRunner if __name__ == '__main__': logger = logging.getLogger(u'preprocess') logger.setLevel(logging.INFO) logging.basicConfig(level=logging.INFO, format='%(message)s') opts = docopt(__doc__, version=iepy.__version__) docs = DocumentManager() pipeline = PreProcessPipeline([ StanfordPreprocess(), SyntacticSegmenterRunner(increment=True) ], docs) pipeline.process_everything()
preprocess.py -h | --help | --version Options: -h --help Show this screen --version Version number """ import logging from docopt import docopt import iepy iepy.setup(__file__) from iepy.data.db import DocumentManager from iepy.preprocess.stanford_preprocess import StanfordPreprocess from iepy.preprocess.pipeline import PreProcessPipeline from iepy.preprocess.segmenter import SyntacticSegmenterRunner if __name__ == '__main__': logger = logging.getLogger(u'preprocess') logger.setLevel(logging.INFO) logging.basicConfig( level=logging.INFO, format=u"%(asctime)s - %(name)s - %(levelname)s - %(message)s") opts = docopt(__doc__, version=0.1) docs = DocumentManager() pipeline = PreProcessPipeline( [StanfordPreprocess(), SyntacticSegmenterRunner(increment=True)], docs) pipeline.process_everything()
def start_preprocess(docs, increment_ner): pipeline = PreProcessPipeline([ StanfordPreprocess(increment_ner), SyntacticSegmenterRunner(increment=True) ], docs) pipeline.process_everything()