예제 #1
0
    def test_override(self):
        self.override_preprocess = StanfordPreprocess()
        self.override_preprocess.override = True

        with mock.patch("iepy.preprocess.corenlp.get_analizer") as mock_analizer:
            class MockAnalizer:
                def analize(self, *args, **kwargs):
                    return {}

            mock_analizer.side_effect = lambda: MockAnalizer
            self.override_preprocess(self.document_all_done)
            self.assertTrue(mock_analizer.called)
예제 #2
0
    def setUp(self):
        pps = PreProcessSteps
        self._all_steps = [
            pps.tokenization, pps.sentencer, pps.tagging, pps.ner,
            pps.lemmatization, pps.syntactic_parsing
        ]

        patcher = mock.patch("iepy.preprocess.corenlp.get_analizer")
        self.mock_get_analizer = patcher.start()
        self.mock_analizer = self.mock_get_analizer.return_value
        self.addCleanup(patcher.stop)
        self.stanfordpp = StanfordPreprocess()
예제 #3
0
    def setUp(self):
        self.preprocess = StanfordPreprocess()

        self.document_nothing_done = IEDocFactory()
        self.document_all_done = IEDocFactory(
            tokenization_done_at=datetime.now(),
            lemmatization_done_at=datetime.now(),
            sentencer_done_at=datetime.now(),
            tagging_done_at=datetime.now(),
            ner_done_at=datetime.now(),
            segmentation_done_at=datetime.now(),
        )
        self.document_missing_lemmatization = IEDocFactory(
            tokenization_done_at=datetime.now(),
            sentencer_done_at=datetime.now(),
            tagging_done_at=datetime.now(),
            ner_done_at=datetime.now(),
            segmentation_done_at=datetime.now(),
        )
예제 #4
0
    preprocess.py -h | --help | --version

Options:
  -h --help             Show this screen
  --version             Version number
"""
import logging

from docopt import docopt

import iepy

iepy.setup(__file__)
from iepy.data.db import DocumentManager
from iepy.preprocess.stanford_preprocess import StanfordPreprocess
from iepy.preprocess.pipeline import PreProcessPipeline
from iepy.preprocess.segmenter import SyntacticSegmenterRunner

if __name__ == '__main__':
    logger = logging.getLogger(u'preprocess')
    logger.setLevel(logging.INFO)
    logging.basicConfig(
        level=logging.INFO,
        format=u"%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    opts = docopt(__doc__, version=0.1)
    docs = DocumentManager()
    pipeline = PreProcessPipeline(
        [StanfordPreprocess(),
         SyntacticSegmenterRunner(increment=True)], docs)
    pipeline.process_everything()
예제 #5
0
def start_preprocess(docs, increment_ner):
    pipeline = PreProcessPipeline([
        StanfordPreprocess(increment_ner),
        SyntacticSegmenterRunner(increment=True)
    ], docs)
    pipeline.process_everything()
예제 #6
0
파일: preprocess.py 프로젝트: chagge/iepy
Options:
  -h --help             Show this screen
  --increment-ner       Re run NER and Gazetter for every document. If a document lacked any of the previous steps, will be preprocessed entirely.
  --version             Version number
"""
import logging

from docopt import docopt

import iepy
iepy.setup(__file__)
from iepy.data.db import DocumentManager
from iepy.preprocess.stanford_preprocess import StanfordPreprocess
from iepy.preprocess.pipeline import PreProcessPipeline
from iepy.preprocess.segmenter import SyntacticSegmenterRunner


if __name__ == '__main__':
    logger = logging.getLogger(u'preprocess')
    logger.setLevel(logging.INFO)
    logging.basicConfig(level=logging.INFO, format='%(message)s')
    opts = docopt(__doc__, version=iepy.__version__)
    docs = DocumentManager()
    increment_ner = opts['--increment-ner']

    pipeline = PreProcessPipeline([
        StanfordPreprocess(increment_ner),
        SyntacticSegmenterRunner(increment=True)
    ], docs)
    pipeline.process_everything()