def main(): """Command-line interface for omorfi's sort | uniq -c tester.""" a = ArgumentParser() a.add_argument('-g', '--generator', metavar='FSAFILE', required=True, help="load generator from FSAFILE") a.add_argument('-w', '--word', metavar="WORD_ID", required=True, help="generate forms of word WORD_ID") a.add_argument('-o', '--output', metavar="OUTFILE", type=FileType('w'), dest="outfile", help="log outputs to OUTFILE") a.add_argument('-X', '--statistics', metavar="STATFILE", type=FileType('w'), dest="statfile", help="statistics") a.add_argument('-v', '--verbose', action="store_true", default=False, help="Print verbosely while processing") a.add_argument('-O', '--output-format', metavar="OFORMAT", default="markdown", help="Create output table in OFORMAT") a.add_argument('-u', '--upos', metavar="UPOS", required=True, choices=["ADJ", "NOUN", "VERB", "NUM", "X"], help="generate inflection table for UPOS") options = a.parse_args() omorfi = Omorfi(options.verbose) try: if options.generator: if options.verbose: print("reading generator from", options.generator) omorfi.load_generator(options.generator) if not options.statfile: options.statfile = stdout if not options.outfile: options.outfile = stdout except IOError: print("Could not process file", options.generator, file=stderr) exit(2) # for make check target realstart = perf_counter() cpustart = process_time() print("### Inflection of", options.word, file=options.outfile) print(file=options.outfile) if options.upos == 'NOUN': print_nominals(omorfi, options.word, options.upos, options.outfile) elif options.upos == 'ADJ': print_comparatives(omorfi, options.word, options.upos, 'POS', options.outfile) # comparisons print(file=options.outfile) print_comparatives(omorfi, options.word, options.upos, "CMP", options.outfile) print(file=options.outfile) print_comparatives(omorfi, options.word, options.upos, "SUP", options.outfile) elif options.upos == 'NUM': print_numerals(omorfi, options.word, options.upos, options.outfile) elif options.upos == 'VERB': print_finites(omorfi, options.word, options.upos, options.outfile) print(file=options.outfile) print_infinites(omorfi, options.word, options.upos, options.outfile) print(file=options.outfile) print("_Note:_ the inflection tables cover small percentage of the " + "whole inflectional paradigm, for full list, see [" + options.word + " full form list](" + options.word + ".html)", file=options.outfile) print(file=options.outfile) realend = perf_counter() cpuend = process_time() print("CPU time:", cpuend - cpustart, "real time:", realend - realstart) exit(0)
def main(): """Command-line interface for omorfi's sort | uniq -c tester.""" a = ArgumentParser() a.add_argument('-a', '--analyser', metavar='FSAFILE', required=True, help="load analyser from FSAFILE") a.add_argument('-g', '--generator', metavar='FSAFILE', required=True, help="load analyser from FSAFILE") a.add_argument('-i', '--input', metavar="INFILE", type=open, dest="infile", help="source of analysis data") a.add_argument('-o', '--output', metavar="OUTFILE", type=FileType('w'), dest="outfile", help="log outputs to OUTFILE") a.add_argument('-X', '--statistics', metavar="STATFILE", type=FileType('w'), dest="statfile", help="statistics") a.add_argument('-v', '--verbose', action="store_true", default=False, help="Print verbosely while processing") a.add_argument('-C', '--no-casing', action="store_true", default=False, help="Do not try to recase input and output when matching") a.add_argument('-t', '--threshold', metavar="THOLD", default=99, help="if coverage is less than THOLD exit with error") a.add_argument('-F', '--format', metavar="FMT", required=True, help="which SIGMORHON shared task format is used") options = a.parse_args() omorfi = Omorfi(options.verbose) try: if options.analyser: if options.verbose: print("reading analyser from", options.analyser) omorfi.load_analyser(options.analyser) if options.generator: if options.verbose: print("reading generator from", options.generator) omorfi.load_generator(options.generator) if not options.infile: options.infile = stdin print("reading from <stdin>") if not options.statfile: options.statfile = stdout if not options.outfile: options.outfile = stdout except IOError: print("Could not process file", options.analyser, file=stderr) exit(2) # basic statistics correct = 0 incorrect = 0 oov = 0 lines = 0 # for make check target realstart = perf_counter() cpustart = process_time() for line in options.infile: fields = line.strip().split('\t') if len(fields) < 3: print("ERROR: Skipping line", fields, file=stderr) continue omors = None lemma = None print("<<<", fields) if options.format == '1': lemma = fields[0] omors = unimorph2omor(fields[1]) elif options.format == '2': srcomors = unimorph2omor(fields[0]) srchyps = omorfi.analyse(fields[1]) for srchyp in srchyps: if srcomors in srchyp.raw and len(srchyp.get_lemmas()) == 1: lemma = srchyp.get_lemmas()[0] if not lemma: lemma = ''.join(srchyps[0].get_lemmas()) omors = unimorph2omor(fields[2]) elif options.format == '3': srchyps = omorfi.analyse(fields[0]) for srchyp in srchyps: if len(srchyp.get_lemmas()) == 1: lemma = srchyp.get_lemmas()[0] if not lemma: lemma = ''.join(srchyps[0].get_lemmas()) omors = unimorph2omor(fields[1]) else: print("format fail", options.format) exit(1) genomor = '[WORD_ID=' + lemma + ']' + omors print(">>> ", genomor) generations = omorfi.generate(genomor) if not generations or '[' in generations: oov += 1 genat1 = lemma print("OOV", genat1) else: genat1 = generations.split('/')[0] print("@1 ", genat1) if options.format == '1': if genat1 == fields[2]: correct += 1 else: print("MIS", genat1, "!=", fields[2]) incorrect += 1 elif options.format == '2': if genat1 == fields[3]: correct += 1 else: print("MIS", genat1, "!=", fields[2]) incorrect += 1 elif options.format == '3': if genat1 == fields[2]: correct += 1 else: print("MIS", genat1, "!=", fields[2]) incorrect += 1 lines += 1 if options.verbose and lines % 1000 == 0: print(lines, '...') realend = perf_counter() cpuend = process_time() print("CPU time:", cpuend - cpustart, "real time:", realend - realstart) if lines == 0: print("Needs more than 0 lines to determine something", file=stderr) exit(2) print("Lines", "Corect", "OOV", sep="\t", file=options.statfile) print(lines, correct, oov, sep="\t", file=options.statfile) print(lines / lines * 100 if lines != 0 else 0, correct / lines * 100 if lines != 0 else 0, oov / lines * 100, sep="\t", file=options.statfile) exit(0)