def test_placeholder_loadserialized(self): with open(f"{self.out_dir}/tmp1.yaml", "w") as f_out: yaml.dump(DummyClass(arg1="v1"), f_out) test_obj = yaml.load(f""" a: !LoadSerialized filename: '{{EXP_DIR}}/{{EXP}}.yaml' """) YamlPreloader.preload_obj(test_obj, exp_name = "tmp1", exp_dir=self.out_dir)
def test_resolve_bare_default_args(self): test_obj = yaml.load(""" a: !DummyClass arg1: !DummyClass2 {} b: !DummyClass3 {} """) YamlPreloader._resolve_bare_default_args(test_obj) self.assertIsInstance(test_obj["a"].arg1.arg1, DummyClass) self.assertIsInstance(test_obj["b"].arg1, DummyClass2) self.assertIsInstance(test_obj["b"].arg1.arg1, DummyClass)
def test_inconsistent_loadserialized(self): with open(f"{self.out_dir}/tmp1.yaml", "w") as f_out: yaml.dump(DummyClass(arg1="v1"), f_out) test_obj = yaml.load(f""" a: !LoadSerialized filename: {self.out_dir}/tmp1.yaml bad_arg: 1 """) with self.assertRaises(ValueError): YamlPreloader.preload_obj(test_obj, "SOME_EXP_NAME", "SOME_EXP_DIR")
def test_resolve_kwargs(self): test_obj = yaml.load(""" !DummyClass kwargs: arg1: 1 other_arg: 2 """) YamlPreloader._resolve_kwargs(test_obj) self.assertFalse(hasattr(test_obj, "kwargs")) self.assertFalse(hasattr(test_obj, "arg2")) self.assertEqual(getattr(test_obj, "arg1", None), 1) self.assertEqual(getattr(test_obj, "other_arg", None), 2)
def test_load_referenced_serialized_top(self): with open(f"{self.out_dir}/tmp1.yaml", "w") as f_out: yaml.dump(DummyClass(arg1="v1"), f_out) test_obj = yaml.load(f"!LoadSerialized {{ filename: {self.out_dir}/tmp1.yaml }}") loaded_obj = YamlPreloader._load_serialized(test_obj) self.assertIsInstance(loaded_obj, DummyClass) self.assertEqual(loaded_obj.arg1, "v1")
def main() -> None: parser = argparse.ArgumentParser() utils.add_dynet_argparse(parser) parser.add_argument("--src", help=f"Path of source file to read from.", required=True) parser.add_argument("--hyp", help="Path of file to write hypothesis to.", required=True) parser.add_argument("--mod", help="Path of model file to read.", required=True) args = parser.parse_args() exp_dir = os.path.dirname(__file__) exp = "{EXP}" param_collections.ParamManager.init_param_col() # TODO: can we avoid the LoadSerialized proxy and load stuff directly? load_experiment = LoadSerialized(filename=args.mod) uninitialized_experiment = YamlPreloader.preload_obj(load_experiment, exp_dir=exp_dir, exp_name=exp) loaded_experiment = initialize_if_needed(uninitialized_experiment) model = loaded_experiment.model inference = model.inference param_collections.ParamManager.populate() decoding_task = tasks.DecodingEvalTask(args.src, args.hyp, model, inference) decoding_task.eval()
def setUp(self): events.clear() ParamManager.init_param_col() # Load a pre-trained model load_experiment = LoadSerialized(filename=f"test/data/tiny_jaen.model", overwrite=[ { "path": "train", "val": None }, { "path": "status", "val": None }, ]) EXP_DIR = '.' EXP = "decode" uninitialized_experiment = YamlPreloader.preload_obj(load_experiment, exp_dir=EXP_DIR, exp_name=EXP) loaded_experiment = initialize_if_needed(uninitialized_experiment) ParamManager.populate() # Pull out the parts we need from the experiment self.model = loaded_experiment.model src_vocab = self.model.src_reader.vocab trg_vocab = self.model.trg_reader.vocab event_trigger.set_train(False) self.src_data = list( self.model.src_reader.read_sents("test/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("test/data/head.en"))
def test_experiment_names_from_file(self): with open(f"{self.out_dir}/tmp.yaml", "w") as f_out: yaml.dump({ "exp1": DummyClass(""), "exp2": DummyClass(""), "exp10": DummyClass("") }, f_out) self.assertListEqual(YamlPreloader.experiment_names_from_file(f"{self.out_dir}/tmp.yaml"), ["exp1", "exp10", "exp2"])
def test_format_strings(self): test_obj = yaml.load(""" a: !DummyClass arg1: '{V1}' other_arg: 2 b: !DummyClass arg1: 1 other_arg: '{V2}' c: '{V1}/bla' d: ['bla', 'bla.{V2}'] """) YamlPreloader._format_strings(test_obj, {"V1":"val1", "V2":"val2"}) self.assertEqual(test_obj["a"].arg1, "val1") self.assertEqual(test_obj["a"].other_arg, 2) self.assertEqual(test_obj["a"].arg2, "val2") self.assertFalse(hasattr(test_obj["a"], "arg3")) self.assertEqual(test_obj["b"].arg1, 1) self.assertEqual(test_obj["b"].other_arg, '{V2}') self.assertEqual(test_obj["b"].arg2, "val2") self.assertFalse(hasattr(test_obj["b"], "arg3")) self.assertEqual(test_obj["c"], "val1/bla") self.assertListEqual(test_obj["d"], ["bla", "bla.val2"])
def test_load_referenced_serialized_nested(self): with open(f"{self.out_dir}/tmp1.yaml", "w") as f_out: yaml.dump(DummyClass(arg1="v1"), f_out) test_obj = yaml.load(f""" a: 1 b: !LoadSerialized filename: {self.out_dir}/tmp1.yaml overwrite: - path: arg1 val: !LoadSerialized filename: {self.out_dir}/tmp1.yaml """) loaded_obj = YamlPreloader._load_serialized(test_obj) self.assertIsInstance(loaded_obj["b"], DummyClass) self.assertIsInstance(loaded_obj["b"].arg1, DummyClass)
def main(overwrite_args=None): with tee.Tee(), tee.Tee(error=True): argparser = argparse.ArgumentParser() argparser.add_argument("--dynet-mem", type=str) argparser.add_argument("--dynet-seed", type=int, help="set random seed for DyNet and XNMT.") argparser.add_argument("--dynet-autobatch", type=int) argparser.add_argument("--dynet-devices", type=str) argparser.add_argument("--dynet-viz", action='store_true', help="use visualization") argparser.add_argument("--dynet-gpu", action='store_true', help="use GPU acceleration") argparser.add_argument("--dynet-gpu-ids", type=int) argparser.add_argument("--dynet-gpus", type=int) argparser.add_argument("--dynet-weight-decay", type=float) argparser.add_argument("--dynet-profiling", type=int) argparser.add_argument("--settings", type=str, default="standard", help="settings (standard, debug, or unittest)" "must be given in '=' syntax, e.g." " --settings=standard") argparser.add_argument("experiments_file") argparser.add_argument("experiment_name", nargs='*', help="Run only the specified experiments") argparser.set_defaults(generate_doc=False) args = argparser.parse_args(overwrite_args) if args.dynet_seed: random.seed(args.dynet_seed) np.random.seed(args.dynet_seed) if args.dynet_gpu: if settings.CHECK_VALIDITY: settings.CHECK_VALIDITY = False log_preamble( "disabling CHECK_VALIDITY because it is not supported on GPU currently", logging.WARNING) config_experiment_names = YamlPreloader.experiment_names_from_file( args.experiments_file) results = [] # Check ahead of time that all experiments exist, to avoid bad surprises experiment_names = args.experiment_name or config_experiment_names if args.experiment_name: nonexistent = set(experiment_names).difference( config_experiment_names) if len(nonexistent) != 0: raise Exception("Experiments {} do not exist".format(",".join( list(nonexistent)))) log_preamble( f"running XNMT revision {tee.get_git_revision()} on {socket.gethostname()} on {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" ) for experiment_name in experiment_names: ParamManager.init_param_col() uninitialized_exp_args = YamlPreloader.preload_experiment_from_file( args.experiments_file, experiment_name) logger.info(f"=> Running {experiment_name}") glob_args = uninitialized_exp_args.data.exp_global log_file = glob_args.log_file if os.path.isfile(log_file) and not settings.OVERWRITE_LOG: logger.warning( f"log file {log_file} already exists, skipping experiment; please delete log file by hand if you want to overwrite it " f"(or activate OVERWRITE_LOG, by either specifying an environment variable as OVERWRITE_LOG=1, " f"or specifying --settings=debug, or changing xnmt.settings.Standard.OVERWRITE_LOG manually)" ) continue tee.set_out_file(log_file) model_file = glob_args.model_file uninitialized_exp_args.data.exp_global.commandline_args = args # Create the model experiment = initialize_if_needed(uninitialized_exp_args) ParamManager.param_col.model_file = experiment.exp_global.model_file ParamManager.param_col.save_num_checkpoints = experiment.exp_global.save_num_checkpoints ParamManager.populate() # Run the experiment eval_scores = experiment(save_fct=lambda: save_to_file( model_file, experiment, ParamManager.param_col)) results.append((experiment_name, eval_scores)) print_results(results) tee.unset_out_file()
def main(overwrite_args: Optional[Sequence[str]] = None) -> None: with tee.Tee(), tee.Tee(error=True): argparser = argparse.ArgumentParser() utils.add_backend_argparse(argparser) argparser.add_argument("--settings", type=str, default="standard", help="settings (standard, debug, or unittest)" "must be given in '=' syntax, e.g." " --settings=standard") argparser.add_argument( "--resume", action='store_true', help="whether a saved experiment is being resumed, and" "locations of output files should be re-used.") argparser.add_argument("--backend", type=str, default="dynet", help="backend (dynet or torch)") argparser.add_argument("experiments_file") argparser.add_argument("experiment_name", nargs='*', help="Run only the specified experiments") argparser.set_defaults(generate_doc=False) args = argparser.parse_args(overwrite_args) if xnmt.backend_dynet and args.dynet_seed: args.seed = args.dynet_seed if getattr(args, "seed", None): random.seed(args.seed) np.random.seed(args.seed) if xnmt.backend_torch: torch.manual_seed(0) if xnmt.backend_dynet and args.dynet_gpu and settings.CHECK_VALIDITY: settings.CHECK_VALIDITY = False log_preamble( "disabling CHECK_VALIDITY because it is not supported in the DyNet/GPU setting", logging.WARNING) config_experiment_names = YamlPreloader.experiment_names_from_file( args.experiments_file) results = [] # Check ahead of time that all experiments exist, to avoid bad surprises experiment_names = args.experiment_name or config_experiment_names if args.experiment_name: nonexistent = set(experiment_names).difference( config_experiment_names) if len(nonexistent) != 0: raise Exception("Experiments {} do not exist".format(",".join( list(nonexistent)))) log_preamble( f"running XNMT revision {tee.get_git_revision()} on {socket.gethostname()} with {'DyNet' if xnmt.backend_dynet else 'PyTorch'} on {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" ) for experiment_name in experiment_names: ParamManager.init_param_col() uninitialized_exp_args = YamlPreloader.preload_experiment_from_file( args.experiments_file, experiment_name, resume=args.resume) logger.info(f"=> Running {experiment_name}") glob_args = uninitialized_exp_args.data.exp_global log_file = glob_args.log_file if not settings.OVERWRITE_LOG: log_files_exist = [] if os.path.isfile(log_file): log_files_exist.append(log_file) if os.path.isdir(log_file + ".tb"): log_files_exist.append(log_file + ".tb/") if log_files_exist: logger.warning( f"log file(s) {' '.join(log_files_exist)} already exists, skipping experiment; " f"please delete log file by hand if you want to overwrite it " f"(or activate OVERWRITE_LOG, by either specifying an environment variable OVERWRITE_LOG=1, " f"or specifying --settings=debug, or changing xnmt.settings.Standard.OVERWRITE_LOG manually)" ) continue elif settings.OVERWRITE_LOG and os.path.isdir(log_file + ".tb"): shutil.rmtree( log_file + ".tb/" ) # remove tensorboard logs from previous run that is being overwritten tee.set_out_file(log_file, exp_name=experiment_name) try: model_file = glob_args.model_file uninitialized_exp_args.data.exp_global.commandline_args = vars( args) # Create the model experiment = initialize_if_needed(uninitialized_exp_args) ParamManager.param_col.model_file = experiment.exp_global.model_file ParamManager.param_col.save_num_checkpoints = experiment.exp_global.save_num_checkpoints ParamManager.populate() # Run the experiment eval_scores = experiment( save_fct=lambda: save_to_file(model_file, experiment)) results.append((experiment_name, eval_scores)) print_results(results) except Exception as e: file_logger.error(traceback.format_exc()) raise e finally: tee.unset_out_file()
import xnmt.tee from xnmt.param_collection import ParamManager from xnmt.persistence import initialize_if_needed, YamlPreloader, LoadSerialized, save_to_file EXP_DIR = os.path.dirname(__file__) EXP = "programmatic-load" model_file = f"{EXP_DIR}/models/{EXP}.mod" log_file = f"{EXP_DIR}/logs/{EXP}.log" xnmt.tee.set_out_file(log_file) ParamManager.init_param_col() load_experiment = LoadSerialized( filename=f"{EXP_DIR}/models/programmatic.mod", overwrite=[ {"path" : "train", "val" : None} ] ) uninitialized_experiment = YamlPreloader.preload_obj(load_experiment, exp_dir=EXP_DIR, exp_name=EXP) loaded_experiment = initialize_if_needed(uninitialized_experiment) # if we were to continue training, we would need to set a save model file like this: # ParamManager.param_col.model_file = model_file ParamManager.populate() # run experiment loaded_experiment(save_fct=lambda: save_to_file(model_file, loaded_experiment))
def test_resolve_bare_default_args_illegal(self): test_obj = yaml.load(""" a: !DummyClassForgotBare {} """) with self.assertRaises(ValueError): YamlPreloader._resolve_bare_default_args(test_obj)
from xnmt.persistence import initialize_if_needed, YamlPreloader, LoadSerialized, save_to_file parser = argparse.ArgumentParser() parser.add_argument("filename") parser.add_argument("output_vocab") parser.add_argument("output_embed") parser.add_argument("--embedding", choices=["src", "trg"], default="src") args = parser.parse_args() ParamManager.init_param_col() load_experiment = LoadSerialized( filename=args.filename, ) uninitialized_experiment = YamlPreloader.preload_obj(load_experiment, exp_dir="/tmp/dummy", exp_name="dummy") experiment = initialize_if_needed(uninitialized_experiment) if args.embedding == "src": vocab = experiment.model.src_reader.vocab tensor = experiment.model.src_embedder.embeddings else: vocab = experiment.model.trg_reader.vocab tensor = experiment.model.trg_embedder.embeddings with open(args.output_vocab, mode="w") as fp: for word in vocab.i2w: print(word, file=fp) with open(args.output_embed, mode="w") as fp: