コード例 #1
0
def load_parser(gpu=True):
    import types
    extra_args=types.SimpleNamespace()
    if gpu:
        extra_args.__dict__["udify_mod.device"]="0" #simulates someone giving a --device 0 parameter to Udify
        extra_args.__dict__["lemmatizer_mod.device"]="0"
    available_pipelines=read_pipelines("models_fi_tdt_v2.7/pipelines.yaml")        # {pipeline_name -> its steps}
    turku_parser=Pipeline(available_pipelines["parse_plaintext"], extra_args)         # launch the pipeline from the steps
    return turku_parser
コード例 #2
0
        default=0,
        help=
        'Deprecated, uses GPU if available, use CUDA_VISIBLE_DEVICES to control the gpu device.'
    )
    general_group.add_argument(
        'action',
        default="parse_plaintext",
        nargs='?',
        help=
        "What to do. Either 'list' to lists pipelines or a pipeline name to parse, or nothing in which case the default parse_plaintext is used."
    )

    args = argparser.parse_args()

    # set gpu device
    pipelines = read_pipelines(args.conf_yaml)

    if args.action == "list":
        print(sorted(pipelines.keys()), file=sys.stderr, flush=True)
        sys.exit(0)
    elif args.action is not None and args.action != "parse":
        pipeline = pipelines[args.action]

    if pipeline[0].startswith("extraoptions"):
        extraoptions = pipeline[0].split()[1:]
        pipeline.pop(0)
        newoptions = extraoptions + sys.argv[1:]
        print("Got extra arguments from the pipeline, now running with",
              newoptions,
              file=sys.stderr,
              flush=True)
コード例 #3
0
sys.path.append(
    "/home/jmnybl/git_checkout/Turku-neural-parser-pipeline-modularize")
from tnparser.pipeline import read_pipelines, Pipeline

ID, FORM, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC = range(10)

# GPU
import types

extra_args = types.SimpleNamespace()
extra_args.__dict__[
    "udify_mod.device"] = "0"  #simulates someone giving a --device 0 parameter to Udify
extra_args.__dict__["lemmatizer_mod.device"] = "0"

available_pipelines = read_pipelines(
    "models_fi_tdt_v2.7/pipelines.yaml")  # {pipeline_name -> its steps}
p = Pipeline(available_pipelines["parse_plaintext"]
             )  # launch the pipeline from the steps


def parse(txt):

    txt_parsed = p.parse(txt)  # txt be a paragraph
    sents = []
    tokens = []
    lemmas = []
    txt_parsed = txt_parsed.split("\n\n")
    for sent_parsed in txt_parsed:
        lemma_sent = []
        for line in sent_parsed.split("\n"):
            line = line.strip()
コード例 #4
0
#!/usr/bin/env python
import yaml
import os
import flask
import sys
from tnparser.pipeline import Pipeline, read_pipelines

app = flask.Flask(__name__)
model = os.environ.get("TNPP_MODEL", "models_fi_tdt/pipelines.yaml")
pipeline = os.environ.get("TNPP_PIPELINE", "parse_plaintext")
max_char = int(os.environ.get("TNPP_MAX_CHARS", 15000))
available_pipelines = read_pipelines(model)
p = Pipeline(available_pipelines[pipeline])


@app.route("/", methods=["GET"])
def parse_get():
    global p
    txt = flask.request.args.get("text")
    if not txt:
        return "You need to specify ?text=sometext", 400
    res = p.parse(txt)
    return flask.Response(res, mimetype="text/plain; charset=utf-8")


@app.route("/", methods=["POST"])
def parse_post():
    global p, max_char
    txt = flask.request.get_data(as_text=True)
    if max_char > 0:
        txt = txt[:max_char]
コード例 #5
0
from tnparser.pipeline import read_pipelines, Pipeline

text1 = "I have a dog! Let's see what I can do with Silo.ai. :) Can I tokenize it? I think so! Heading: This is the heading And here continues a new sentence and there's no dot."
text2 = "Some other text, to see we can tokenize more stuff without reloading the model... :)"

# What do we have for English in models_en_ewt?
available_pipelines = read_pipelines(
    "models_en_ewt/pipelines.yaml")  # {pipeline_name -> its steps}
p = Pipeline(
    available_pipelines["tokenize"])  # launch the pipeline from the steps

for _ in range(1000):
    print(p.parse(text1))
    print(p.parse(text2))