Ejemplo n.º 1
0
def main():
    """Command-line interface for omorfi's sort | uniq -c tester."""
    a = ArgumentParser()
    a.add_argument('-g', '--generator', metavar='FSAFILE', required=True,
                   help="load generator from FSAFILE")
    a.add_argument('-w', '--word', metavar="WORD_ID", required=True,
                   help="generate forms of word WORD_ID")
    a.add_argument('-o', '--output', metavar="OUTFILE",
                   type=FileType('w'),
                   dest="outfile", help="log outputs to OUTFILE")
    a.add_argument('-X', '--statistics', metavar="STATFILE",
                   type=FileType('w'),
                   dest="statfile", help="statistics")
    a.add_argument('-v', '--verbose', action="store_true", default=False,
                   help="Print verbosely while processing")
    a.add_argument('-O', '--output-format', metavar="OFORMAT",
                   default="markdown",
                   help="Create output table in OFORMAT")
    a.add_argument('-u', '--upos', metavar="UPOS", required=True,
                   choices=["ADJ", "NOUN", "VERB", "NUM", "X"],
                   help="generate inflection table for UPOS")
    options = a.parse_args()
    omorfi = Omorfi(options.verbose)
    try:
        if options.generator:
            if options.verbose:
                print("reading generator from", options.generator)
            omorfi.load_generator(options.generator)
        if not options.statfile:
            options.statfile = stdout
        if not options.outfile:
            options.outfile = stdout
    except IOError:
        print("Could not process file", options.generator, file=stderr)
        exit(2)
    # for make check target
    realstart = perf_counter()
    cpustart = process_time()
    print("### Inflection of", options.word, file=options.outfile)
    print(file=options.outfile)
    if options.upos == 'NOUN':
        print_nominals(omorfi, options.word, options.upos, options.outfile)
    elif options.upos == 'ADJ':
        print_comparatives(omorfi, options.word, options.upos, 'POS',
                           options.outfile)
        # comparisons
        print(file=options.outfile)
        print_comparatives(omorfi, options.word, options.upos, "CMP",
                           options.outfile)
        print(file=options.outfile)
        print_comparatives(omorfi, options.word, options.upos, "SUP",
                           options.outfile)
    elif options.upos == 'NUM':
        print_numerals(omorfi, options.word, options.upos, options.outfile)
    elif options.upos == 'VERB':
        print_finites(omorfi, options.word, options.upos, options.outfile)
        print(file=options.outfile)
        print_infinites(omorfi, options.word, options.upos, options.outfile)
    print(file=options.outfile)
    print("_Note:_ the inflection tables cover small percentage of the " +
          "whole inflectional paradigm, for full list, see [" +
          options.word + " full form list](" + options.word + ".html)",
          file=options.outfile)
    print(file=options.outfile)
    realend = perf_counter()
    cpuend = process_time()
    print("CPU time:", cpuend - cpustart, "real time:", realend - realstart)
    exit(0)
Ejemplo n.º 2
0
def main():
    """Command-line interface for omorfi's sort | uniq -c tester."""
    a = ArgumentParser()
    a.add_argument('-a',
                   '--analyser',
                   metavar='FSAFILE',
                   required=True,
                   help="load analyser from FSAFILE")
    a.add_argument('-g',
                   '--generator',
                   metavar='FSAFILE',
                   required=True,
                   help="load analyser from FSAFILE")
    a.add_argument('-i',
                   '--input',
                   metavar="INFILE",
                   type=open,
                   dest="infile",
                   help="source of analysis data")
    a.add_argument('-o',
                   '--output',
                   metavar="OUTFILE",
                   type=FileType('w'),
                   dest="outfile",
                   help="log outputs to OUTFILE")
    a.add_argument('-X',
                   '--statistics',
                   metavar="STATFILE",
                   type=FileType('w'),
                   dest="statfile",
                   help="statistics")
    a.add_argument('-v',
                   '--verbose',
                   action="store_true",
                   default=False,
                   help="Print verbosely while processing")
    a.add_argument('-C',
                   '--no-casing',
                   action="store_true",
                   default=False,
                   help="Do not try to recase input and output when matching")
    a.add_argument('-t',
                   '--threshold',
                   metavar="THOLD",
                   default=99,
                   help="if coverage is less than THOLD exit with error")
    a.add_argument('-F',
                   '--format',
                   metavar="FMT",
                   required=True,
                   help="which SIGMORHON shared task format is used")

    options = a.parse_args()
    omorfi = Omorfi(options.verbose)
    try:
        if options.analyser:
            if options.verbose:
                print("reading analyser from", options.analyser)
            omorfi.load_analyser(options.analyser)
        if options.generator:
            if options.verbose:
                print("reading generator from", options.generator)
            omorfi.load_generator(options.generator)
        if not options.infile:
            options.infile = stdin
            print("reading from <stdin>")
        if not options.statfile:
            options.statfile = stdout
        if not options.outfile:
            options.outfile = stdout
    except IOError:
        print("Could not process file", options.analyser, file=stderr)
        exit(2)
    # basic statistics
    correct = 0
    incorrect = 0
    oov = 0
    lines = 0
    # for make check target
    realstart = perf_counter()
    cpustart = process_time()
    for line in options.infile:
        fields = line.strip().split('\t')
        if len(fields) < 3:
            print("ERROR: Skipping line", fields, file=stderr)
            continue
        omors = None
        lemma = None
        print("<<<", fields)
        if options.format == '1':
            lemma = fields[0]
            omors = unimorph2omor(fields[1])
        elif options.format == '2':
            srcomors = unimorph2omor(fields[0])
            srchyps = omorfi.analyse(fields[1])
            for srchyp in srchyps:
                if srcomors in srchyp.raw and len(srchyp.get_lemmas()) == 1:
                    lemma = srchyp.get_lemmas()[0]
            if not lemma:
                lemma = ''.join(srchyps[0].get_lemmas())
            omors = unimorph2omor(fields[2])
        elif options.format == '3':
            srchyps = omorfi.analyse(fields[0])
            for srchyp in srchyps:
                if len(srchyp.get_lemmas()) == 1:
                    lemma = srchyp.get_lemmas()[0]
            if not lemma:
                lemma = ''.join(srchyps[0].get_lemmas())
            omors = unimorph2omor(fields[1])
        else:
            print("format fail", options.format)
            exit(1)
        genomor = '[WORD_ID=' + lemma + ']' + omors
        print(">>> ", genomor)
        generations = omorfi.generate(genomor)
        if not generations or '[' in generations:
            oov += 1
            genat1 = lemma
            print("OOV", genat1)
        else:
            genat1 = generations.split('/')[0]
            print("@1 ", genat1)
        if options.format == '1':
            if genat1 == fields[2]:
                correct += 1
            else:
                print("MIS", genat1, "!=", fields[2])
                incorrect += 1
        elif options.format == '2':
            if genat1 == fields[3]:
                correct += 1
            else:
                print("MIS", genat1, "!=", fields[2])
                incorrect += 1
        elif options.format == '3':
            if genat1 == fields[2]:
                correct += 1
            else:
                print("MIS", genat1, "!=", fields[2])
                incorrect += 1
        lines += 1
        if options.verbose and lines % 1000 == 0:
            print(lines, '...')
    realend = perf_counter()
    cpuend = process_time()
    print("CPU time:", cpuend - cpustart, "real time:", realend - realstart)
    if lines == 0:
        print("Needs more than 0 lines to determine something", file=stderr)
        exit(2)
    print("Lines", "Corect", "OOV", sep="\t", file=options.statfile)
    print(lines, correct, oov, sep="\t", file=options.statfile)
    print(lines / lines * 100 if lines != 0 else 0,
          correct / lines * 100 if lines != 0 else 0,
          oov / lines * 100,
          sep="\t",
          file=options.statfile)
    exit(0)