Ejemplo n.º 1
0
 def __init__(self, sql_db_path, surface_cache_path):
     self.app = Flask(__name__)
     CORS(self.app)
     self.mem_cache = ServerCache()
     self.surface_cache = SurfaceCache(surface_cache_path)
     self.pipeline = local_pipeline.LocalPipeline()
     self.runner = ZoeRunner(allow_tensorflow=True)
     self.runner.elmo_processor.load_sqlite_db(sql_db_path,
                                               server_mode=True)
     signal.signal(signal.SIGINT, self.grace_end)
Ejemplo n.º 2
0
def produce_surface_cache(db_name, cache_name):
    pipeline = local_pipeline.LocalPipeline()
    cache = SurfaceCache(db_name, server_mode=False)
    runner = ZoeRunner()
    runner.elmo_processor.load_sqlite_db(cache_name, server_mode=False)
    dataset = DataReader("data/large_text.json", size=-1, unique=True)
    counter = 0
    total = len(dataset.sentences)
    for sentence in dataset.sentences:
        ta = pipeline.doc([sentence.tokens], pretokenized=True)
        for chunk in ta.get_shallow_parse:
            new_sentence = Sentence(sentence.tokens, chunk['start'], chunk['end'])
            runner.process_sentence(new_sentence)
            cache.insert_cache(new_sentence)
        progress_bar(counter, total)
Ejemplo n.º 3
0
def main(args):
    pipeline = local_pipeline.LocalPipeline()
    annotator = setup_annotator(args, pipeline=pipeline)

    test_file = args["test_doc"]
    out_file = args["out_doc"]
    print("[#] Test Mentions File : {}".format(test_file))
    with open(test_file, 'r') as f:
        lines = f.read().strip().split("\n")
    assert len(lines) == 1, "Only support inference for single doc"
    doctext = lines[0].strip()

    ta = annotator.inference_on_text(text=doctext)
    ta_json = ta.as_json
    json.dump(ta_json, open(out_file, "w"), indent=True)
Ejemplo n.º 4
0
def main():
    # create your model object here, see the DummyModel class for a minimal example.
    mymodel = ExampleModel()
    pipeline = local_pipeline.LocalPipeline()
    annotator = ExampleAnnotator(model=mymodel,
                                 pipeline=pipeline,
                                 provided_view="DUMMYVIEW",
                                 required_views=["TOKENS"])

    # Expose wrapper.annotate method through a Flask server
    app.add_url_rule(rule='/annotate',
                     endpoint='annotate',
                     view_func=annotator.annotate,
                     methods=['GET'])
    app.run(host='localhost', port=5000)
Ejemplo n.º 5
0
def main(args):
    pipeline = local_pipeline.LocalPipeline()

    annotator = setup_annotator(args=args, pipeline=pipeline)
    annotator.load_params()
    # The model should have two methods
    # 1) method load_params() that loads the relevant model parameters into memory.
    # 2) method inference_on_ta(docta, new_view_name) that takes a text annotation and view name,
    # creates the view in the text annotation, and returns it.
    # See the DummyModel class for a minimal example.
    app.add_url_rule(rule='/annotate',
                     endpoint='annotate',
                     view_func=annotator.annotate,
                     methods=['GET'])
    # app.run(host='0.0.0.0', port=8009)
    app.run(host='0.0.0.0', port=8080)
Ejemplo n.º 6
0
def get_ta_dir(directory):
    """
        Returns a list of TextAnnotation objects which are instatiated
        using the serialized json data in the directory parameter.

        @param directory path to directory with serialized TAs
        @return tas a list of TextAnnotations
    """
    pipeline = local_pipeline.LocalPipeline()
    serialized_tas = [join(directory+"/",f) \
            for f in listdir(directory) if isfile(join(directory+"/",f))]
    tas = []

    for ser_ta in serialized_tas:
        with open(ser_ta, mode='r', encoding='utf-8') as f:
            tas.append(core.text_annotation.TextAnnotation(f.read(), pipeline))
    return tas
Ejemplo n.º 7
0
 def __init__(self, sql_db_path, surface_cache_path):
     self.app = Flask(__name__)
     CORS(self.app)
     self.mem_cache = ServerCache()
     self.surface_cache = SurfaceCache(surface_cache_path)
     self.pipeline = local_pipeline.LocalPipeline()
     self.pipeline_initialize_helper(['.'])
     self.runner = ZoeRunner(allow_tensorflow=True)
     status = self.runner.elmo_processor.load_sqlite_db(sql_db_path,
                                                        server_mode=True)
     if not status:
         print(
             "ELMo cache file is not found. Server mode is prohibited without it."
         )
         print(
             "Please contact the author for this cache, or modify this code if you know what you are doing."
         )
         exit(1)
     self.runner.elmo_processor.rank_candidates_vec()
     signal.signal(signal.SIGINT, self.grace_end)
Ejemplo n.º 8
0
def main(args):
    pipeline = local_pipeline.LocalPipeline()
    annotators: List[Annotator] = []
    langs = ["es", "zh", "fr", "it", "de"]
    model_paths = [
        "data/saved_models/joint/es.joint.wtype.model",
        "data/saved_models/joint/zh.joint.wtype.model",
        "data/saved_models/joint/fr.joint.31.5k_v2.model",
        "data/saved_models/joint/it.joint.56.5k.10M.model",
        "data/saved_models/joint/de.joint.20M.99k.w0.4.c0.6.model"
    ]
    VOCABPKL = "data/{}wiki/vocab/{}wiki.train.vocab.wiki.en-{}.{}.vec_wiki.en.vec.True.True.5.0.word2idx.pkl"
    VECPKL = "data/{}wiki/vocab/{}wiki.train.vocab.wiki.en-{}.{}.vec_wiki.en.vec.True.True.5.0.embeddings.pkl"
    COHPATH = "data/{}wiki/combined_coh/en{}.coh1M"
    for lang, model_path in zip(langs, model_paths):
        vocab_pkl = VOCABPKL.format(lang, lang, lang, lang)
        vec_pkl = VECPKL.format(lang, lang, lang, lang)
        coh_path = COHPATH.format(lang, lang)
        args["lang"] = lang
        args["vocabpkl"] = vocab_pkl
        args["vecpkl"] = vec_pkl
        args["cohstr"] = coh_path
        args["restore"] = model_path
        args["filter_sizes"] = "5"
        annotator = setup_annotator(args=args, pipeline=pipeline)
        # print(model.lang)
        annotator.load_params()
        annotators.append(annotator)

    # The model should have two methods
    # 1) method load_params() that loads the relevant model parameters into memory.
    # 2) method inference_on_ta(docta, new_view_name) that takes a text annotation and view name,
    # creates the view in the text annotation, and returns it.
    # See the DummyModel class for a minimal example.
    # wrapper = MultiModelWrapperServerLocal(models=models)
    multi_annotator = MultiAnnotator(annotators=annotators)
    app.add_url_rule(rule='/annotate',
                     endpoint='annotate',
                     view_func=multi_annotator.annotate,
                     methods=['GET'])
    app.run(host='0.0.0.0', port=8009)
Ejemplo n.º 9
0
# -*- coding: utf8 -*-

import unittest
import os
from ccg_nlpy import local_pipeline

if os.path.exists('annotation-cache'):
    os.remove('annotation-cache')
lp = local_pipeline.LocalPipeline()


class TestLocalPipeline(unittest.TestCase):
    def setUp(self):
        self.lp = lp

    def test_doc(self):
        ta = self.lp.doc("Hello,  how are you.\n\n\n I am doing fine")
        tokens = [
            'Hello', ',', 'how', 'are', 'you', '.', 'I', 'am', 'doing', 'fine'
        ]
        self.assertEqual(ta.get_tokens, tokens)

        testarr = [6, 10]
        self.assertEqual(ta.get_sentence_end_token_indices, testarr)

        self.assertEqual(ta.get_score, 1.0)

        self.assertEqual(ta.get_text,
                         "Hello,  how are you.\n\n\n I am doing fine")

    def test_doc_illigal_characters(self):
 def get_pipeline_instance(self):
     return local_pipeline.LocalPipeline()
Ejemplo n.º 11
0
 def __init__(self, ontonotes=True):
     super().__init__()
     self.connl = not ontonotes
     self.ontonotes = ontonotes
     self.pipeline = local_pipeline.LocalPipeline()
     self.add_detector(self.annotate)