def start_preprocess(docs, increment_ner):
    pipeline = PreProcessPipeline([
        TokenizeSentencerRunner(increment_ner),
        ResolucionesNERRunner(),
        SociedadesNERRunner(),
    ], docs)
    pipeline.process_everything()
 def test_process_everythin_calls_successively_process_step_in_batch(self):
     runner1 = mock.Mock(wraps=lambda x: x)
     runner2 = mock.Mock(wraps=lambda x: x)
     docs = [object() for i in range(5)]
     p = PreProcessPipeline([runner1, runner2], docs)
     with mock.patch.object(p, 'process_step_in_batch') as mock_batch:
         p.call_order = []
         mock_batch.side_effect = lambda r: p.call_order.append(r)
         p.process_everything()
         self.assertEqual(mock_batch.call_count, 2)
         self.assertEqual(mock_batch.call_args_list,
                          [mock.call(runner1), mock.call(runner2)])
     self.assertEqual(p.call_order, [runner1, runner2])
Example #3
0
 def test_process_everythin_calls_successively_process_step_in_batch(self):
     runner1 = mock.Mock(wraps=lambda x: x)
     runner2 = mock.Mock(wraps=lambda x: x)
     docs = [object() for i in range(5)]
     p = PreProcessPipeline([runner1, runner2], docs)
     with mock.patch.object(p, 'process_step_in_batch') as mock_batch:
         p.call_order = []
         mock_batch.side_effect = lambda r: p.call_order.append(r)
         p.process_everything()
         self.assertEqual(mock_batch.call_count, 2)
         self.assertEqual(
             mock_batch.call_args_list,
             [mock.call(runner1), mock.call(runner2)])
     self.assertEqual(p.call_order, [runner1, runner2])
Example #4
0
    preprocess.py
    preprocess.py -h | --help | --version

Options:
  -h --help             Show this screen
  --version             Version number
"""
import logging

from docopt import docopt

import iepy
iepy.setup(__file__)
from iepy.data.db import DocumentManager
from iepy.preprocess.stanford_preprocess import StanfordPreprocess
from iepy.preprocess.pipeline import PreProcessPipeline
from iepy.preprocess.segmenter import SyntacticSegmenterRunner


if __name__ == '__main__':
    logger = logging.getLogger(u'preprocess')
    logger.setLevel(logging.INFO)
    logging.basicConfig(level=logging.INFO, format='%(message)s')
    opts = docopt(__doc__, version=iepy.__version__)
    docs = DocumentManager()
    pipeline = PreProcessPipeline([
        StanfordPreprocess(),
        SyntacticSegmenterRunner(increment=True)
    ], docs)
    pipeline.process_everything()
Example #5
0
    preprocess.py -h | --help | --version

Options:
  -h --help             Show this screen
  --version             Version number
"""
import logging

from docopt import docopt

import iepy

iepy.setup(__file__)
from iepy.data.db import DocumentManager
from iepy.preprocess.stanford_preprocess import StanfordPreprocess
from iepy.preprocess.pipeline import PreProcessPipeline
from iepy.preprocess.segmenter import SyntacticSegmenterRunner

if __name__ == '__main__':
    logger = logging.getLogger(u'preprocess')
    logger.setLevel(logging.INFO)
    logging.basicConfig(
        level=logging.INFO,
        format=u"%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    opts = docopt(__doc__, version=0.1)
    docs = DocumentManager()
    pipeline = PreProcessPipeline(
        [StanfordPreprocess(),
         SyntacticSegmenterRunner(increment=True)], docs)
    pipeline.process_everything()
Example #6
0
def start_preprocess(docs, increment_ner):
    pipeline = PreProcessPipeline([
        StanfordPreprocess(increment_ner),
        SyntacticSegmenterRunner(increment=True)
    ], docs)
    pipeline.process_everything()
Example #7
0
def start_preprocess(docs, increment_ner):
    pipeline = PreProcessPipeline([
        StanfordPreprocess(increment_ner),
        SyntacticSegmenterRunner(increment=True)
    ], docs)
    pipeline.process_everything()