예제 #1
0
 def setUp(self):
     self.doc = IEDocFactory()
     super(TestDocumentSegmenter, self).setUp()
     self.segmenter = SyntacticSegmenterRunner()
예제 #2
0
    preprocess.py -h | --help | --version

Options:
  -h --help             Show this screen
  --version             Version number
"""
import logging

from docopt import docopt

import iepy

iepy.setup(__file__)
from iepy.data.db import DocumentManager
from iepy.preprocess.stanford_preprocess import StanfordPreprocess
from iepy.preprocess.pipeline import PreProcessPipeline
from iepy.preprocess.segmenter import SyntacticSegmenterRunner

if __name__ == '__main__':
    logger = logging.getLogger(u'preprocess')
    logger.setLevel(logging.INFO)
    logging.basicConfig(
        level=logging.INFO,
        format=u"%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    opts = docopt(__doc__, version=0.1)
    docs = DocumentManager()
    pipeline = PreProcessPipeline(
        [StanfordPreprocess(),
         SyntacticSegmenterRunner(increment=True)], docs)
    pipeline.process_everything()
예제 #3
0
def start_preprocess(docs, increment_ner):
    pipeline = PreProcessPipeline([
        StanfordPreprocess(increment_ner),
        SyntacticSegmenterRunner(increment=True)
    ], docs)
    pipeline.process_everything()