def xigt_import(infile, outfile, options=None): if options is None: options = {} options.setdefault("tier_types", default_tier_types) options.setdefault("alignments", default_alignments) options.setdefault("record_markers", default_record_markers) options.setdefault("attribute_map", default_attribute_map) with open(infile, "r") as in_fh, open(outfile, "w") as out_fh: tb = toolbox.read_toolbox_file(in_fh) igts = toolbox_igts(tb, options) xc = XigtCorpus(igts=igts, mode="transient") xigtxml.dump(out_fh, xc)
def xigt_import(infile, outfile, options=None): if options is None: options = {} options.setdefault('tier_types', default_tier_types) options.setdefault('alignments', default_alignments) options.setdefault('record_markers', default_record_markers) options.setdefault('attribute_map', default_attribute_map) options.setdefault('error_recovery_method', default_error_recovery_method) with open(infile, 'r') as in_fh, open(outfile, 'w') as out_fh: tb = toolbox.read_toolbox_file(in_fh) igts = toolbox_igts(tb, options) xc = XigtCorpus(igts=igts, mode='transient') xigtxml.dump(out_fh, xc)
def xigt_import(infile, outfile, options=None): if options is None: options = {} options.setdefault('record_markers', default_record_markers) options.setdefault('igt_attribute_map', default_igt_attribute_map) options.setdefault('tier_map', default_tier_map) options.setdefault('make_phrase_tier', default_make_phrase_tier) options.setdefault('tier_types', default_tier_types) options.setdefault('alignments', default_alignments) options.setdefault('error_recovery_method', default_error_recovery_method) # just use existing info to create marker-based alignment info options['tb_alignments'] = _make_tb_alignments(options) with open(infile, 'r') as in_fh, open(outfile, 'w') as out_fh: tb = toolbox.read_toolbox_file(in_fh) igts = toolbox_igts(tb, options) xc = XigtCorpus(igts=igts, mode='transient') xigtxml.dump(out_fh, xc)
print(' --dry-run: Do not write anything.') print('') print('This script is mainly designed for converting ha language') print('corpora from sfm to vrt format. After running the script, you') print('need to postprocess the result with postprocess-ha-whitespace-and-punctuation.pl') print('and postprocess-ha-add-lemmas.pl.') print('') exit(0) try: import toolbox except ImportError: print("Error: module 'toolbox' could not be imported.") print("You must first install it (https://github.com/goodmami/toolbox).") data = toolbox.read_toolbox_file(open(sys.argv[1]), 'r') print_all_words=False if '--print-all-words' in sys.argv: print_all_words=True print_all_translations=False if '--print-all-translations' in sys.argv: print_all_translations=True dry_run=False if '--dry-run' in sys.argv: dry_run=True class FooFile: def write(self, s): pass