def parse_args(): parser = argparse.ArgumentParser( description='umt.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) opts.add_md_help_argument(parser) opts.preprocess_opts(parser) opt = parser.parse_args() return opt
def parse_args(): parser = argparse.ArgumentParser( description='preprocess.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) opts.add_md_help_argument(parser) opts.preprocess_opts(parser) opt = parser.parse_args() torch.manual_seed(opt.seed) return opt
def parse_args(): """ Parsing arguments """ parser = argparse.ArgumentParser( description='preprocess.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) opts.preprocess_opts(parser) opt = parser.parse_args() # torch.manual_seed(opt.seed) return opt
def parse_args(): parser = argparse.ArgumentParser( description='umt.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) opts.add_md_help_argument(parser) opts.model_opts(parser) opts.preprocess_opts(parser) opts.train_opts(parser) opt = parser.parse_args() torch.manual_seed(opt.seed) if opt.word_vec_size != -1: opt.src_word_vec_size = opt.word_vec_size opt.tgt_word_vec_size = opt.word_vec_size if opt.layers != -1: opt.enc_layers = opt.layers opt.dec_layers = opt.layers opt.brnn = (opt.encoder_type == "brnn") # if opt.seed > 0: random.seed(opt.seed) torch.manual_seed(opt.seed) if torch.cuda.is_available() and not opt.gpuid: print("WARNING: You have a CUDA device, should run with -gpuid 0") if opt.gpuid: cuda.set_device(opt.gpuid[0]) if opt.seed > 0: torch.cuda.manual_seed(opt.seed) if len(opt.gpuid) > 1: sys.stderr.write("Sorry, multigpu isn't supported yet, coming soon!\n") sys.exit(1) # Set up the Crayon logging server. if opt.exp_host != "": from pycrayon import CrayonClient cc = CrayonClient(hostname=opt.exp_host) experiments = cc.get_experiment_names() print(experiments) if opt.exp in experiments: cc.remove_experiment(opt.exp) return opt
def parse_args(): parser = argparse.ArgumentParser( description='preprocess.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) opts.add_md_help_argument(parser) opts.preprocess_opts(parser) opt = parser.parse_args() torch.manual_seed(opt.seed) check_existing_pt_files(opt) return opt
def parse_args(): parser = configargparse.ArgumentParser( description='preprocess.py', formatter_class=configargparse.ArgumentDefaultsHelpFormatter) opts.add_md_help_argument(parser) opts.config_opts(parser) opts.preprocess_opts(parser) opt = parser.parse_args() torch.manual_seed(opt.seed) check_existing_pt_files(opt) return opt
def parse_args(): parser = argparse.ArgumentParser( description='template_preprocess.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) opts.add_md_help_argument(parser) opts.preprocess_opts(parser) group = parser.add_argument_group('Template') group.add_argument('-train_template', required=True, help="Path to the training template") group.add_argument('-valid_template', required=True, help="Path to the valid template") opt = parser.parse_args() torch.manual_seed(opt.seed) check_existing_pt_files(opt) return opt
required=True, help="Output file for the prepared data") parser.add_argument('-src_vocab', help="Path to an existing source vocabulary") parser.add_argument('-tgt_vocab', help="Path to an existing target vocabulary") parser.add_argument('-features_vocabs_prefix', type=str, default='', help="Path prefix to existing features vocabularies") parser.add_argument('-seed', type=int, default=3435, help="Random seed") parser.add_argument('-report_every', type=int, default=100000, help="Report status every this many sentences") opts.preprocess_opts(parser) opt = parser.parse_args() torch.manual_seed(opt.seed) def main(): print('Preparing training ...') with codecs.open(opt.train_src, "r", "utf-8") as src_file: src_line = src_file.readline().strip().split() _, _, nFeatures = onmt.IO.extractFeatures(src_line) fields = onmt.IO.ONMTDataset.get_fields(nFeatures) print("Building Training...") train = onmt.IO.ONMTDataset(opt.train_src, opt.train_tgt, fields, opt) print("Building Vocab...")
import copy import unittest import glob import os from collections import Counter import torchtext import onmt import onmt.io import opts import preprocess parser = argparse.ArgumentParser(description='preprocess.py') opts.preprocess_opts(parser) SAVE_DATA_PREFIX = 'data/test_preprocess' default_opts = [ '-data_type', 'text', '-train_src', 'data/src-train.txt', '-train_tgt', 'data/tgt-train.txt', '-valid_src', 'data/src-val.txt', '-valid_tgt', 'data/tgt-val.txt', '-save_data', SAVE_DATA_PREFIX ] opt = parser.parse_known_args(default_opts)[0]