def test_override(self): self.override_preprocess = StanfordPreprocess() self.override_preprocess.override = True with mock.patch("iepy.preprocess.corenlp.get_analizer") as mock_analizer: class MockAnalizer: def analize(self, *args, **kwargs): return {} mock_analizer.side_effect = lambda: MockAnalizer self.override_preprocess(self.document_all_done) self.assertTrue(mock_analizer.called)
def setUp(self): pps = PreProcessSteps self._all_steps = [ pps.tokenization, pps.sentencer, pps.tagging, pps.ner, pps.lemmatization, pps.syntactic_parsing ] patcher = mock.patch("iepy.preprocess.corenlp.get_analizer") self.mock_get_analizer = patcher.start() self.mock_analizer = self.mock_get_analizer.return_value self.addCleanup(patcher.stop) self.stanfordpp = StanfordPreprocess()
def setUp(self): self.preprocess = StanfordPreprocess() self.document_nothing_done = IEDocFactory() self.document_all_done = IEDocFactory( tokenization_done_at=datetime.now(), lemmatization_done_at=datetime.now(), sentencer_done_at=datetime.now(), tagging_done_at=datetime.now(), ner_done_at=datetime.now(), segmentation_done_at=datetime.now(), ) self.document_missing_lemmatization = IEDocFactory( tokenization_done_at=datetime.now(), sentencer_done_at=datetime.now(), tagging_done_at=datetime.now(), ner_done_at=datetime.now(), segmentation_done_at=datetime.now(), )
preprocess.py -h | --help | --version Options: -h --help Show this screen --version Version number """ import logging from docopt import docopt import iepy iepy.setup(__file__) from iepy.data.db import DocumentManager from iepy.preprocess.stanford_preprocess import StanfordPreprocess from iepy.preprocess.pipeline import PreProcessPipeline from iepy.preprocess.segmenter import SyntacticSegmenterRunner if __name__ == '__main__': logger = logging.getLogger(u'preprocess') logger.setLevel(logging.INFO) logging.basicConfig( level=logging.INFO, format=u"%(asctime)s - %(name)s - %(levelname)s - %(message)s") opts = docopt(__doc__, version=0.1) docs = DocumentManager() pipeline = PreProcessPipeline( [StanfordPreprocess(), SyntacticSegmenterRunner(increment=True)], docs) pipeline.process_everything()
def start_preprocess(docs, increment_ner): pipeline = PreProcessPipeline([ StanfordPreprocess(increment_ner), SyntacticSegmenterRunner(increment=True) ], docs) pipeline.process_everything()
Options: -h --help Show this screen --increment-ner Re run NER and Gazetter for every document. If a document lacked any of the previous steps, will be preprocessed entirely. --version Version number """ import logging from docopt import docopt import iepy iepy.setup(__file__) from iepy.data.db import DocumentManager from iepy.preprocess.stanford_preprocess import StanfordPreprocess from iepy.preprocess.pipeline import PreProcessPipeline from iepy.preprocess.segmenter import SyntacticSegmenterRunner if __name__ == '__main__': logger = logging.getLogger(u'preprocess') logger.setLevel(logging.INFO) logging.basicConfig(level=logging.INFO, format='%(message)s') opts = docopt(__doc__, version=iepy.__version__) docs = DocumentManager() increment_ner = opts['--increment-ner'] pipeline = PreProcessPipeline([ StanfordPreprocess(increment_ner), SyntacticSegmenterRunner(increment=True) ], docs) pipeline.process_everything()