Beispiel #1
0
def main(args, opts):
    global Opts
    Opts = opts
    global Char
    Char = ''
    global Lineno
    Lineno = 1
    global KW

    jdb.KW = KW = jdb.Kwds(jdb.std_csv_dir())

    if opts.l: opts.l = open(opts.l, "w", encoding=opts.e)
    else: opts.l = sys.stderr
    if not opts.o:
        fn = (os.path.split(args[0]))[1]
        fn = (os.path.splitext(fn))[0]
        opts.o = fn + ".pgi"
    elif opts.o == "-":
        opts.o = None
    if opts.g:
        langs = [KW.LANG[iso639_1_to_2[x]].id for x in opts.g.split(',')]
    else:
        langs = None
    workfiles = pgi.initialize(opts.t)
    srcdate = parse_xmlfile(args[0], 4, workfiles, opts.b, opts.c, langs)
    srcrec = jdb.Obj(id=4,
                     kw='kanjidic',
                     descr='kanjidic2.xml',
                     dt=srcdate,
                     seq='seq_kanjidic',
                     srct=KW.SRCT['kanjidic'].id)
    pgi.wrcorp(srcrec, workfiles)
    pgi.finalize(workfiles, opts.o, not opts.k)
    print("\nDone!", file=sys.stderr)
Beispiel #2
0
def main (args, opts):
        global msg
        global Opts; Opts = opts
        global KW; jdb.KW = KW = jdb.Kwds (jdb.std_csv_dir())

          # Create a globally accessible function, msg() that has
          # has 'logfile' and 'opts.verbose' already bound and
          # which will be called elsewhere when there is a need to
          # write a message to the logfile.
        logfile = sys.stderr
        if opts.logfile:
            logfile = open (opts.logfile, "w", encoding=opts.encoding)
        def msg (message): _msg (logfile, opts.verbose, message)

        fin = ABPairReader (args[0], encoding='utf-8')
          # FIXME: following gives localtime, change to utc or lt+tz.
        mtime = datetime.date.fromtimestamp(os.stat(args[0])[8])
        corpid, corprec \
            = pgi.parse_corpus_opt (opts.corpus, "examples", mtime, KW.SRCT['examples'].id)
        tmpfiles = pgi.initialize (opts.tempdir)
        if not opts.noaction:
            tmpfiles = pgi.initialize (opts.tempdir)
            if corprec: pgi.wrcorp (corprec, tmpfiles)
        for eid, entr in enumerate (parse_ex (fin, opts.begin)):
            if not opts.noaction:
                entr.src = corpid
                jdb.setkeys (entr, eid+1)
                pgi.wrentr (entr, tmpfiles)
            if not (eid % 2000):
                sys.stdout.write ('.'); sys.stdout.flush()
            if opts.count and eid+1 >= opts.count: break
        sys.stdout.write ('\n')
        if not opts.noaction: pgi.finalize (tmpfiles, opts.output, not opts.keep)
Beispiel #3
0
def main(args, opts):
    global KW

    if opts.database:
        jdb.dbOpen(opts.database, **jdb.dbopts(opts))
        KW = jdb.KW
    else:
        jdb.KW = KW = jdb.Kwds(jdb.std_csv_dir())

    xlang = None
    if opts.lang:
        xlang = [KW.LANG[x].id for x in opts.lang.split(',')]

    #FIXME: we open the xml file with utf-8 encoding even though
    # its encoding may be given within the file and may be different.
    inpf = jmxml.JmdictFile(open(args[0], encoding='utf-8'))
    tmpfiles = pgi.initialize(opts.tempdir)
    if not opts.logfile: logfile = sys.stderr
    else: logfile = open(opts.logfile, "w", encoding=opts.encoding)
    eid = 0
    jmparser = jmxml.Jmparser(KW, logfile=logfile)
    for typ, entr in jmparser.parse_xmlfile(inpf,
                                            opts.begin,
                                            opts.count,
                                            opts.extract,
                                            xlang,
                                            toptag=True,
                                            seqnum_init=opts.sequence[0],
                                            seqnum_incr=opts.sequence[1]):
        if typ == 'entry':
            eid += 1
            if not ((eid - 1) % 1800):
                sys.stdout.write('.')
                sys.stdout.flush()
                logfile.flush()
            if not getattr(entr, 'src', None): entr.src = corpid
            jdb.setkeys(entr, eid)
            pgi.wrentr(entr, tmpfiles)
        elif typ == 'corpus':
            pgi.wrcorp(entr, tmpfiles)
        elif typ == 'grpdef':
            pgi.wrgrpdef(entr, tmpfiles)
        elif typ == 'root':
            # Note that 'entr' here is actually the tag name of the
            # top-level element in the xml file, typically either
            # "JMdict" or "JMnedict".
            try:                corpid, corprec \
                   = pgi.parse_corpus_opt (opts.corpus, entr, inpf.created, kw=KW)
            except KeyError:
                pass
            else:
                if corprec: pgi.wrcorp(corprec, tmpfiles)

    sys.stdout.write('\n')
    pgi.finalize(tmpfiles, opts.output, not opts.keep)
Beispiel #4
0
def main(args, opts):
    m = {'vol': 'sndvol', 'sel': 'sndfile', 'clip': 'snd'}
    inpf = jmxml.JmdictFile(open(args[0]))
    workfiles = pgi.initialize(opts.tempdir)
    snd_iter = jmxml.parse_sndfile(inpf)
    for obj, typ, lineno in snd_iter:
        pgi._wrrow(obj, workfiles[m[typ]])
    pgi.finalize(workfiles,
                 args[1],
                 delfiles=(not opts.keep),
                 transaction=True)