def load_parser(gpu=True): import types extra_args=types.SimpleNamespace() if gpu: extra_args.__dict__["udify_mod.device"]="0" #simulates someone giving a --device 0 parameter to Udify extra_args.__dict__["lemmatizer_mod.device"]="0" available_pipelines=read_pipelines("models_fi_tdt_v2.7/pipelines.yaml") # {pipeline_name -> its steps} turku_parser=Pipeline(available_pipelines["parse_plaintext"], extra_args) # launch the pipeline from the steps return turku_parser
default=0, help= 'Deprecated, uses GPU if available, use CUDA_VISIBLE_DEVICES to control the gpu device.' ) general_group.add_argument( 'action', default="parse_plaintext", nargs='?', help= "What to do. Either 'list' to lists pipelines or a pipeline name to parse, or nothing in which case the default parse_plaintext is used." ) args = argparser.parse_args() # set gpu device pipelines = read_pipelines(args.conf_yaml) if args.action == "list": print(sorted(pipelines.keys()), file=sys.stderr, flush=True) sys.exit(0) elif args.action is not None and args.action != "parse": pipeline = pipelines[args.action] if pipeline[0].startswith("extraoptions"): extraoptions = pipeline[0].split()[1:] pipeline.pop(0) newoptions = extraoptions + sys.argv[1:] print("Got extra arguments from the pipeline, now running with", newoptions, file=sys.stderr, flush=True)
sys.path.append( "/home/jmnybl/git_checkout/Turku-neural-parser-pipeline-modularize") from tnparser.pipeline import read_pipelines, Pipeline ID, FORM, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC = range(10) # GPU import types extra_args = types.SimpleNamespace() extra_args.__dict__[ "udify_mod.device"] = "0" #simulates someone giving a --device 0 parameter to Udify extra_args.__dict__["lemmatizer_mod.device"] = "0" available_pipelines = read_pipelines( "models_fi_tdt_v2.7/pipelines.yaml") # {pipeline_name -> its steps} p = Pipeline(available_pipelines["parse_plaintext"] ) # launch the pipeline from the steps def parse(txt): txt_parsed = p.parse(txt) # txt be a paragraph sents = [] tokens = [] lemmas = [] txt_parsed = txt_parsed.split("\n\n") for sent_parsed in txt_parsed: lemma_sent = [] for line in sent_parsed.split("\n"): line = line.strip()
#!/usr/bin/env python import yaml import os import flask import sys from tnparser.pipeline import Pipeline, read_pipelines app = flask.Flask(__name__) model = os.environ.get("TNPP_MODEL", "models_fi_tdt/pipelines.yaml") pipeline = os.environ.get("TNPP_PIPELINE", "parse_plaintext") max_char = int(os.environ.get("TNPP_MAX_CHARS", 15000)) available_pipelines = read_pipelines(model) p = Pipeline(available_pipelines[pipeline]) @app.route("/", methods=["GET"]) def parse_get(): global p txt = flask.request.args.get("text") if not txt: return "You need to specify ?text=sometext", 400 res = p.parse(txt) return flask.Response(res, mimetype="text/plain; charset=utf-8") @app.route("/", methods=["POST"]) def parse_post(): global p, max_char txt = flask.request.get_data(as_text=True) if max_char > 0: txt = txt[:max_char]
from tnparser.pipeline import read_pipelines, Pipeline text1 = "I have a dog! Let's see what I can do with Silo.ai. :) Can I tokenize it? I think so! Heading: This is the heading And here continues a new sentence and there's no dot." text2 = "Some other text, to see we can tokenize more stuff without reloading the model... :)" # What do we have for English in models_en_ewt? available_pipelines = read_pipelines( "models_en_ewt/pipelines.yaml") # {pipeline_name -> its steps} p = Pipeline( available_pipelines["tokenize"]) # launch the pipeline from the steps for _ in range(1000): print(p.parse(text1)) print(p.parse(text2))