def main(): # initialise argument parser ap = argparse.ArgumentParser( description= "Convert Finnish dictionary TSV data into xerox/HFST lexc format") ap.add_argument("--quiet", "-q", action="store_false", dest="verbose", default=False, help="do not print output to stdout while processing") ap.add_argument("--verbose", "-v", action="store_true", default=False, help="print each step to stdout while processing") ap.add_argument("--input", "-i", action="append", required=True, metavar="INFILE", help="read tests from INFILEs") ap.add_argument("--version", "-V", action="version") ap.add_argument("--output", "-o", "--one-file", "-1", type=argparse.FileType("w"), required=True, metavar="OFILE", help="write output to OFILE") ap.add_argument("--fields", "-F", action="store", default=2, metavar="N", help="read N fields from master") ap.add_argument("--separator", action="store", default="\t", metavar="SEP", help="use SEP as separator") ap.add_argument("--comment", "-C", action="append", default=["#"], metavar="COMMENT", help="skip lines starting with COMMENT that" "do not have SEPs") ap.add_argument("--strip", action="store", metavar="STRIP", help="strip STRIP from fields before using") ap.add_argument("--format", "-f", action="store", default="omor", help="use specific output format for lexc data", choices=['omor', 'apertium']) args = ap.parse_args() quoting = csv.QUOTE_NONE quotechar = None # setup files formatter = None if args.format == 'omor': formatter = OmorFormatter() elif args.format == 'apertium': formatter = ApertiumFormatter() if args.verbose: print("Writing yaml to", args.output.name) # print test cases for tsv_filename in args.input: if args.verbose: print("Reading from", tsv_filename) linecount = 0 print("# Omorfi tests generated from", tsv_filename, "date:", strftime("%Y-%m-%d %H:%M:%S+%Z"), "params: ", ' '.join(argv), file=args.output, sep='\n# ') print("Tests:\n All tests:", file=args.output) # for each line with open(tsv_filename, 'r', newline='') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter=args.separator, quoting=quoting, quotechar=quotechar, escapechar='%', strict=True) for tsv_parts in tsv_reader: linecount += 1 if len(tsv_parts) < 3: print(tsv_filename, linecount, "Too few tabs on line", "skipping following fields:", tsv_parts, file=stderr) continue # format output print(' "', tsv_parts[1], sep='', file=args.output, end='') print(formatter.analyses2lexc(tsv_parts[2], args.format).replace('% ', ' '), file=args.output, end='') print('": "', tsv_parts[0], '"', sep='', file=args.output) exit(0)
def main(): # initialise argument parser ap = argparse.ArgumentParser( description="Convert Finnish dictionary TSV data into xerox/HFST lexc format") ap.add_argument("--quiet", "-q", action="store_false", dest="verbose", default=False, help="do not print output to stdout while processing") ap.add_argument("--verbose", "-v", action="store_true", default=False, help="print each step to stdout while processing") ap.add_argument("--input", "-i", action="append", required=True, metavar="INFILE", help="read tests from INFILEs") ap.add_argument("--version", "-V", action="version") ap.add_argument("--output", "-o", "--one-file", "-1", type=argparse.FileType("w"), required=True, metavar="OFILE", help="write output to OFILE") ap.add_argument("--fields", "-F", action="store", default=2, metavar="N", help="read N fields from master") ap.add_argument("--separator", action="store", default="\t", metavar="SEP", help="use SEP as separator") ap.add_argument("--comment", "-C", action="append", default=["#"], metavar="COMMENT", help="skip lines starting with COMMENT that" "do not have SEPs") ap.add_argument("--strip", action="store", metavar="STRIP", help="strip STRIP from fields before using") ap.add_argument("--format", "-f", action="store", default="omor", help="use specific output format for lexc data", choices=['omor', 'apertium']) args = ap.parse_args() quoting = csv.QUOTE_NONE quotechar = None # setup files formatter = None if args.format == 'omor': formatter = OmorFormatter() elif args.format == 'apertium': formatter = ApertiumFormatter() if args.verbose: print("Writing yaml to", args.output.name) # print test cases for tsv_filename in args.input: if args.verbose: print("Reading from", tsv_filename) linecount = 0 print("# Omorfi tests generated from", tsv_filename, "date:", strftime("%Y-%m-%d %H:%M:%S+%Z"), "params: ", ' '.join(argv), file=args.output, sep='\n# ') print("Tests:\n All tests:", file=args.output) # for each line with open(tsv_filename, 'r', newline='') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter=args.separator, quoting=quoting, quotechar=quotechar, escapechar='%', strict=True) for tsv_parts in tsv_reader: linecount += 1 if len(tsv_parts) < 3: print(tsv_filename, linecount, "Too few tabs on line", "skipping following fields:", tsv_parts, file=stderr) continue # format output print(' "', tsv_parts[1], sep='', file=args.output, end='') print(formatter.analyses2lexc(tsv_parts[2], args.format).replace('% ', ' '), file=args.output, end='') print('": "', tsv_parts[0], '"', sep='', file=args.output) exit(0)