Exemple #1
0
def usage():
    from pkg_resources import get_distribution
    print("Unidic2UD Version " + get_distribution("unidic2ud").version,
          file=sys.stderr)
    print("Usage: unidic2ud -U Dict [-u|-t|-t2] file", file=sys.stderr)
    print("       unidic2ud --download=Dict", file=sys.stderr)
    s = " Dict:\n" + unidic2ud.dictlist().replace(".udpipe", "(udpipe)")
    print(s.replace("\n", " ").rstrip(), file=sys.stderr)
    sys.exit()
Exemple #2
0
def main():
    argc = len(sys.argv)
    d = "japanese-modern"
    optu = optt = False
    i = w = 1
    while i < argc:
        o = sys.argv[i]
        if o == "-h" or o == "--help" or o == "-v" or o == "--version":
            usage()
        elif o.startswith("-U"):
            if o == "-U":
                i += 1
                d = sys.argv[i]
            else:
                d = sys.argv[i][2:]
        elif o == "-u":
            optu = True
        elif o == "-t" or o == "-t1":
            optt = True
            w = 1
        elif o == "-t2":
            optt = True
            w = 2
        elif o.startswith("--download="):
            d = sys.argv[i][11:]
            if d > "":
                s = unidic2ud.dictlist().replace(".udpipe", "").split()
                if d in s:
                    print("unidic2ud: " + d + " was already downloaded",
                          file=sys.stderr)
                else:
                    unidic2ud.download(d)
            usage()
        else:
            break
        i += 1
    else:
        if d.find("-") > 0:
            nlp = unidic2ud.load(None, d)
        else:
            nlp = unidic2ud.load(d)
        while True:
            try:
                s = input()
            except:
                return
            print(output(nlp, optu, optt, w, s), end="")
    if d.find("-") > 0:
        nlp = unidic2ud.load(None, d)
    else:
        nlp = unidic2ud.load(d)
    while i < argc:
        f = open(sys.argv[i], "r", encoding="utf-8")
        s = f.read()
        f.close()
        print(output(nlp, optu, optt, w, s), end="")
        i += 1
Exemple #3
0
def usage():
    from pkg_resources import get_distribution
    from unidic2ud import dictlist
    print("UniDic2UD Version " + get_distribution("unidic2ud").version,
          file=sys.stderr)
    print("Usage: udcabocha -U Dict [-f 0-8] file", file=sys.stderr)
    print("       udcabocha --download=Dict", file=sys.stderr)
    s = " Dict:\n" + dictlist().replace(".udpipe", "(udpipe)")
    print(s.replace("\n", " ").rstrip(), file=sys.stderr)
    sys.exit()
Exemple #4
0
def main():
    argc = len(sys.argv)
    i = 1
    f = 0
    u = None
    while i < argc:
        o = sys.argv[i]
        if o.startswith("-f"):
            if o == "-f":
                i += 1
                f = int(sys.argv[i])
            else:
                f = int(sys.argv[i][2:])
            if f < 0 or f > 8:
                usage()
        elif o.startswith("-U"):
            if o == "-U":
                i += 1
                u = sys.argv[i]
            else:
                u = sys.argv[i][2:]
        elif o.startswith("--download="):
            d = sys.argv[i][11:]
            if d > "":
                from unidic2ud import download, dictlist
                s = dictlist().replace(".udpipe", "").split()
                if d in s:
                    print("udcabocha: " + d + " was already downloaded",
                          file=sys.stderr)
                else:
                    download(d)
            usage()
        elif o == "-h" or o == "--help" or o == "-v" or o == "--version":
            usage()
        else:
            break
        i += 1
    else:
        ja = parser(u, f)
        while True:
            try:
                s = input()
            except:
                return
            print(ja(s), end="")
    ja = parser(u, f)
    while i < argc:
        p = open(sys.argv[i], "r", encoding="utf-8")
        s = p.read()
        p.close()
        print(ja(s), end="")
        i += 1
Exemple #5
0
 def run(self):
     try:
         import unidic2ud
         if unidic2ud.dictlist().find("qkana\n") < 0:
             import subprocess
             subprocess.check_call(["unidic2ud", "--download=qkana"])
     except:
         import os, ssl, urllib.request, zipfile, glob
         ssl._create_default_https_context = ssl._create_unverified_context
         f, h = urllib.request.urlretrieve(QKANA_URL)
         with zipfile.ZipFile(f) as z:
             z.extractall("build")
         os.renames(
             glob.glob("build/UniDic-qkana*")[0],
             "build/lib/udkundoku/qkana")
     install.run(self)
Exemple #6
0
#! /usr/bin/python -i
# coding=utf-8

import udkanbun
import unidic2ud
if unidic2ud.dictlist().find("qkana\n") < 0:
    import os
    p = os.path.join(os.path.abspath(os.path.dirname(__file__)), "qkana")
    if os.path.isdir(p):
        import shutil
        shutil.move(p, unidic2ud.DOWNLOAD_DIR)
    else:
        unidic2ud.download("qkana", "unidic")
QKANA = unidic2ud.UniDic2UD("qkana", None)

from udkundoku.adp import ADP
from udkundoku.adv import ADV
from udkundoku.aux import AUX
from udkundoku.part import PART
from udkundoku.verb import VERB


class UDKundokuToken(object):
    def __init__(self, id, form, lemma, upos, xpos, feats, deprel, deps, misc):
        self.id = id
        self.form = form
        self.lemma = lemma
        self.upos = upos
        self.xpos = xpos
        self.feats = feats
        self.deprel = deprel