def test_placeholder_loadserialized(self):
   with open(f"{self.out_dir}/tmp1.yaml", "w") as f_out:
     yaml.dump(DummyClass(arg1="v1"), f_out)
   test_obj = yaml.load(f"""
   a: !LoadSerialized
     filename: '{{EXP_DIR}}/{{EXP}}.yaml'
   """)
   YamlPreloader.preload_obj(test_obj, exp_name = "tmp1", exp_dir=self.out_dir)
 def test_resolve_bare_default_args(self):
   test_obj = yaml.load("""
                        a: !DummyClass
                          arg1: !DummyClass2 {}
                        b: !DummyClass3 {}
                        """)
   YamlPreloader._resolve_bare_default_args(test_obj)
   self.assertIsInstance(test_obj["a"].arg1.arg1, DummyClass)
   self.assertIsInstance(test_obj["b"].arg1, DummyClass2)
   self.assertIsInstance(test_obj["b"].arg1.arg1, DummyClass)
 def test_inconsistent_loadserialized(self):
   with open(f"{self.out_dir}/tmp1.yaml", "w") as f_out:
     yaml.dump(DummyClass(arg1="v1"), f_out)
   test_obj = yaml.load(f"""
   a: !LoadSerialized
     filename: {self.out_dir}/tmp1.yaml
     bad_arg: 1
   """)
   with self.assertRaises(ValueError):
     YamlPreloader.preload_obj(test_obj, "SOME_EXP_NAME", "SOME_EXP_DIR")
 def test_resolve_kwargs(self):
   test_obj = yaml.load("""
   !DummyClass
     kwargs:
       arg1: 1
       other_arg: 2
   """)
   YamlPreloader._resolve_kwargs(test_obj)
   self.assertFalse(hasattr(test_obj, "kwargs"))
   self.assertFalse(hasattr(test_obj, "arg2"))
   self.assertEqual(getattr(test_obj, "arg1", None), 1)
   self.assertEqual(getattr(test_obj, "other_arg", None), 2)
 def test_load_referenced_serialized_top(self):
   with open(f"{self.out_dir}/tmp1.yaml", "w") as f_out:
     yaml.dump(DummyClass(arg1="v1"), f_out)
   test_obj = yaml.load(f"!LoadSerialized {{ filename: {self.out_dir}/tmp1.yaml }}")
   loaded_obj = YamlPreloader._load_serialized(test_obj)
   self.assertIsInstance(loaded_obj, DummyClass)
   self.assertEqual(loaded_obj.arg1, "v1")
Exemple #6
0
def main() -> None:
    parser = argparse.ArgumentParser()
    utils.add_dynet_argparse(parser)
    parser.add_argument("--src",
                        help=f"Path of source file to read from.",
                        required=True)
    parser.add_argument("--hyp",
                        help="Path of file to write hypothesis to.",
                        required=True)
    parser.add_argument("--mod",
                        help="Path of model file to read.",
                        required=True)
    args = parser.parse_args()

    exp_dir = os.path.dirname(__file__)
    exp = "{EXP}"

    param_collections.ParamManager.init_param_col()

    # TODO: can we avoid the LoadSerialized proxy and load stuff directly?
    load_experiment = LoadSerialized(filename=args.mod)

    uninitialized_experiment = YamlPreloader.preload_obj(load_experiment,
                                                         exp_dir=exp_dir,
                                                         exp_name=exp)
    loaded_experiment = initialize_if_needed(uninitialized_experiment)
    model = loaded_experiment.model
    inference = model.inference
    param_collections.ParamManager.populate()

    decoding_task = tasks.DecodingEvalTask(args.src, args.hyp, model,
                                           inference)
    decoding_task.eval()
Exemple #7
0
    def setUp(self):
        events.clear()
        ParamManager.init_param_col()

        # Load a pre-trained model
        load_experiment = LoadSerialized(filename=f"test/data/tiny_jaen.model",
                                         overwrite=[
                                             {
                                                 "path": "train",
                                                 "val": None
                                             },
                                             {
                                                 "path": "status",
                                                 "val": None
                                             },
                                         ])
        EXP_DIR = '.'
        EXP = "decode"
        uninitialized_experiment = YamlPreloader.preload_obj(load_experiment,
                                                             exp_dir=EXP_DIR,
                                                             exp_name=EXP)
        loaded_experiment = initialize_if_needed(uninitialized_experiment)
        ParamManager.populate()

        # Pull out the parts we need from the experiment
        self.model = loaded_experiment.model
        src_vocab = self.model.src_reader.vocab
        trg_vocab = self.model.trg_reader.vocab

        event_trigger.set_train(False)

        self.src_data = list(
            self.model.src_reader.read_sents("test/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("test/data/head.en"))
 def test_experiment_names_from_file(self):
   with open(f"{self.out_dir}/tmp.yaml", "w") as f_out:
     yaml.dump({
         "exp1": DummyClass(""),
         "exp2": DummyClass(""),
         "exp10": DummyClass("")
       },
       f_out)
   self.assertListEqual(YamlPreloader.experiment_names_from_file(f"{self.out_dir}/tmp.yaml"),
                        ["exp1", "exp10", "exp2"])
 def test_format_strings(self):
   test_obj = yaml.load("""
                        a: !DummyClass
                          arg1: '{V1}'
                          other_arg: 2
                        b: !DummyClass
                          arg1: 1
                          other_arg: '{V2}'
                        c: '{V1}/bla'
                        d: ['bla', 'bla.{V2}']
                        """)
   YamlPreloader._format_strings(test_obj, {"V1":"val1", "V2":"val2"})
   self.assertEqual(test_obj["a"].arg1, "val1")
   self.assertEqual(test_obj["a"].other_arg, 2)
   self.assertEqual(test_obj["a"].arg2, "val2")
   self.assertFalse(hasattr(test_obj["a"], "arg3"))
   self.assertEqual(test_obj["b"].arg1, 1)
   self.assertEqual(test_obj["b"].other_arg, '{V2}')
   self.assertEqual(test_obj["b"].arg2, "val2")
   self.assertFalse(hasattr(test_obj["b"], "arg3"))
   self.assertEqual(test_obj["c"], "val1/bla")
   self.assertListEqual(test_obj["d"], ["bla", "bla.val2"])
 def test_load_referenced_serialized_nested(self):
   with open(f"{self.out_dir}/tmp1.yaml", "w") as f_out:
     yaml.dump(DummyClass(arg1="v1"), f_out)
   test_obj = yaml.load(f"""
   a: 1
   b: !LoadSerialized
     filename: {self.out_dir}/tmp1.yaml
     overwrite:
     - path: arg1
       val: !LoadSerialized
             filename: {self.out_dir}/tmp1.yaml
   """)
   loaded_obj = YamlPreloader._load_serialized(test_obj)
   self.assertIsInstance(loaded_obj["b"], DummyClass)
   self.assertIsInstance(loaded_obj["b"].arg1, DummyClass)
Exemple #11
0
def main(overwrite_args=None):

    with tee.Tee(), tee.Tee(error=True):
        argparser = argparse.ArgumentParser()
        argparser.add_argument("--dynet-mem", type=str)
        argparser.add_argument("--dynet-seed",
                               type=int,
                               help="set random seed for DyNet and XNMT.")
        argparser.add_argument("--dynet-autobatch", type=int)
        argparser.add_argument("--dynet-devices", type=str)
        argparser.add_argument("--dynet-viz",
                               action='store_true',
                               help="use visualization")
        argparser.add_argument("--dynet-gpu",
                               action='store_true',
                               help="use GPU acceleration")
        argparser.add_argument("--dynet-gpu-ids", type=int)
        argparser.add_argument("--dynet-gpus", type=int)
        argparser.add_argument("--dynet-weight-decay", type=float)
        argparser.add_argument("--dynet-profiling", type=int)
        argparser.add_argument("--settings",
                               type=str,
                               default="standard",
                               help="settings (standard, debug, or unittest)"
                               "must be given in '=' syntax, e.g."
                               " --settings=standard")
        argparser.add_argument("experiments_file")
        argparser.add_argument("experiment_name",
                               nargs='*',
                               help="Run only the specified experiments")
        argparser.set_defaults(generate_doc=False)
        args = argparser.parse_args(overwrite_args)

        if args.dynet_seed:
            random.seed(args.dynet_seed)
            np.random.seed(args.dynet_seed)

        if args.dynet_gpu:
            if settings.CHECK_VALIDITY:
                settings.CHECK_VALIDITY = False
                log_preamble(
                    "disabling CHECK_VALIDITY because it is not supported on GPU currently",
                    logging.WARNING)

        config_experiment_names = YamlPreloader.experiment_names_from_file(
            args.experiments_file)

        results = []

        # Check ahead of time that all experiments exist, to avoid bad surprises
        experiment_names = args.experiment_name or config_experiment_names

        if args.experiment_name:
            nonexistent = set(experiment_names).difference(
                config_experiment_names)
            if len(nonexistent) != 0:
                raise Exception("Experiments {} do not exist".format(",".join(
                    list(nonexistent))))

        log_preamble(
            f"running XNMT revision {tee.get_git_revision()} on {socket.gethostname()} on {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        )
        for experiment_name in experiment_names:

            ParamManager.init_param_col()

            uninitialized_exp_args = YamlPreloader.preload_experiment_from_file(
                args.experiments_file, experiment_name)

            logger.info(f"=> Running {experiment_name}")

            glob_args = uninitialized_exp_args.data.exp_global
            log_file = glob_args.log_file

            if os.path.isfile(log_file) and not settings.OVERWRITE_LOG:
                logger.warning(
                    f"log file {log_file} already exists, skipping experiment; please delete log file by hand if you want to overwrite it "
                    f"(or activate OVERWRITE_LOG, by either specifying an environment variable as OVERWRITE_LOG=1, "
                    f"or specifying --settings=debug, or changing xnmt.settings.Standard.OVERWRITE_LOG manually)"
                )
                continue

            tee.set_out_file(log_file)

            model_file = glob_args.model_file

            uninitialized_exp_args.data.exp_global.commandline_args = args

            # Create the model
            experiment = initialize_if_needed(uninitialized_exp_args)
            ParamManager.param_col.model_file = experiment.exp_global.model_file
            ParamManager.param_col.save_num_checkpoints = experiment.exp_global.save_num_checkpoints
            ParamManager.populate()

            # Run the experiment
            eval_scores = experiment(save_fct=lambda: save_to_file(
                model_file, experiment, ParamManager.param_col))
            results.append((experiment_name, eval_scores))
            print_results(results)

            tee.unset_out_file()
Exemple #12
0
def main(overwrite_args: Optional[Sequence[str]] = None) -> None:

    with tee.Tee(), tee.Tee(error=True):
        argparser = argparse.ArgumentParser()
        utils.add_backend_argparse(argparser)
        argparser.add_argument("--settings",
                               type=str,
                               default="standard",
                               help="settings (standard, debug, or unittest)"
                               "must be given in '=' syntax, e.g."
                               " --settings=standard")
        argparser.add_argument(
            "--resume",
            action='store_true',
            help="whether a saved experiment is being resumed, and"
            "locations of output files should be re-used.")
        argparser.add_argument("--backend",
                               type=str,
                               default="dynet",
                               help="backend (dynet or torch)")
        argparser.add_argument("experiments_file")
        argparser.add_argument("experiment_name",
                               nargs='*',
                               help="Run only the specified experiments")
        argparser.set_defaults(generate_doc=False)
        args = argparser.parse_args(overwrite_args)

        if xnmt.backend_dynet and args.dynet_seed: args.seed = args.dynet_seed
        if getattr(args, "seed", None):
            random.seed(args.seed)
            np.random.seed(args.seed)
            if xnmt.backend_torch: torch.manual_seed(0)

        if xnmt.backend_dynet and args.dynet_gpu and settings.CHECK_VALIDITY:
            settings.CHECK_VALIDITY = False
            log_preamble(
                "disabling CHECK_VALIDITY because it is not supported in the DyNet/GPU setting",
                logging.WARNING)

        config_experiment_names = YamlPreloader.experiment_names_from_file(
            args.experiments_file)

        results = []

        # Check ahead of time that all experiments exist, to avoid bad surprises
        experiment_names = args.experiment_name or config_experiment_names

        if args.experiment_name:
            nonexistent = set(experiment_names).difference(
                config_experiment_names)
            if len(nonexistent) != 0:
                raise Exception("Experiments {} do not exist".format(",".join(
                    list(nonexistent))))

        log_preamble(
            f"running XNMT revision {tee.get_git_revision()} on {socket.gethostname()} with {'DyNet' if xnmt.backend_dynet else 'PyTorch'} on {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        )
        for experiment_name in experiment_names:

            ParamManager.init_param_col()

            uninitialized_exp_args = YamlPreloader.preload_experiment_from_file(
                args.experiments_file, experiment_name, resume=args.resume)

            logger.info(f"=> Running {experiment_name}")

            glob_args = uninitialized_exp_args.data.exp_global
            log_file = glob_args.log_file

            if not settings.OVERWRITE_LOG:
                log_files_exist = []
                if os.path.isfile(log_file): log_files_exist.append(log_file)
                if os.path.isdir(log_file + ".tb"):
                    log_files_exist.append(log_file + ".tb/")
                if log_files_exist:
                    logger.warning(
                        f"log file(s) {' '.join(log_files_exist)} already exists, skipping experiment; "
                        f"please delete log file by hand if you want to overwrite it "
                        f"(or activate OVERWRITE_LOG, by either specifying an environment variable OVERWRITE_LOG=1, "
                        f"or specifying --settings=debug, or changing xnmt.settings.Standard.OVERWRITE_LOG manually)"
                    )
                    continue
            elif settings.OVERWRITE_LOG and os.path.isdir(log_file + ".tb"):
                shutil.rmtree(
                    log_file + ".tb/"
                )  # remove tensorboard logs from previous run that is being overwritten

            tee.set_out_file(log_file, exp_name=experiment_name)

            try:

                model_file = glob_args.model_file

                uninitialized_exp_args.data.exp_global.commandline_args = vars(
                    args)

                # Create the model
                experiment = initialize_if_needed(uninitialized_exp_args)
                ParamManager.param_col.model_file = experiment.exp_global.model_file
                ParamManager.param_col.save_num_checkpoints = experiment.exp_global.save_num_checkpoints
                ParamManager.populate()

                # Run the experiment
                eval_scores = experiment(
                    save_fct=lambda: save_to_file(model_file, experiment))
                results.append((experiment_name, eval_scores))
                print_results(results)

            except Exception as e:
                file_logger.error(traceback.format_exc())
                raise e
            finally:
                tee.unset_out_file()
Exemple #13
0
import xnmt.tee
from xnmt.param_collection import ParamManager
from xnmt.persistence import initialize_if_needed, YamlPreloader, LoadSerialized, save_to_file

EXP_DIR = os.path.dirname(__file__)
EXP = "programmatic-load"

model_file = f"{EXP_DIR}/models/{EXP}.mod"
log_file = f"{EXP_DIR}/logs/{EXP}.log"

xnmt.tee.set_out_file(log_file)

ParamManager.init_param_col()

load_experiment = LoadSerialized(
  filename=f"{EXP_DIR}/models/programmatic.mod",
  overwrite=[
    {"path" : "train", "val" : None}
  ]
)

uninitialized_experiment = YamlPreloader.preload_obj(load_experiment, exp_dir=EXP_DIR, exp_name=EXP)
loaded_experiment = initialize_if_needed(uninitialized_experiment)

# if we were to continue training, we would need to set a save model file like this:
# ParamManager.param_col.model_file = model_file
ParamManager.populate()

# run experiment
loaded_experiment(save_fct=lambda: save_to_file(model_file, loaded_experiment))
 def test_resolve_bare_default_args_illegal(self):
   test_obj = yaml.load("""
                        a: !DummyClassForgotBare {}
                        """)
   with self.assertRaises(ValueError):
     YamlPreloader._resolve_bare_default_args(test_obj)
from xnmt.persistence import initialize_if_needed, YamlPreloader, LoadSerialized, save_to_file


parser = argparse.ArgumentParser()
parser.add_argument("filename")
parser.add_argument("output_vocab")
parser.add_argument("output_embed")
parser.add_argument("--embedding", choices=["src", "trg"], default="src")
args = parser.parse_args()

ParamManager.init_param_col()
load_experiment = LoadSerialized(
  filename=args.filename,
)

uninitialized_experiment = YamlPreloader.preload_obj(load_experiment, exp_dir="/tmp/dummy", exp_name="dummy")
experiment = initialize_if_needed(uninitialized_experiment)


if args.embedding == "src":
  vocab = experiment.model.src_reader.vocab
  tensor = experiment.model.src_embedder.embeddings
else:
  vocab = experiment.model.trg_reader.vocab
  tensor = experiment.model.trg_embedder.embeddings

with open(args.output_vocab, mode="w") as fp:
  for word in vocab.i2w:
    print(word, file=fp)

with open(args.output_embed, mode="w") as fp: