예제 #1
0
 def set_random_seed(self, seed):
   """
   :param int seed:
   """
   # Unfortunately, there is only a global seed,
   # and also, it will only be used for new threads
   # where the random generator was not used yet...
   # https://github.com/google/sentencepiece/issues/635
   import sentencepiece as spm  # noqa
   spm.set_random_generator_seed(seed)
예제 #2
0
def test_preprocess(exp_folder):
    exp_truth_path = os.path.join(exp_truth_dir, exp_folder)
    config_file = os.path.join(exp_truth_path, "config.yml")
    assert os.path.isfile(
        config_file
    ), "The configuration file config.yml does not exist for " + exp_folder.name
    experiment_path = SIL_NLP_ENV.mt_experiments_dir / exp_folder.name
    shutil.rmtree(experiment_path, ignore_errors=True)
    os.makedirs(experiment_path, exist_ok=True)
    shutil.copyfile(src=config_file,
                    dst=os.path.join(experiment_path, "config.yml"))
    helper.init_file_logger(str(experiment_path))

    sp.set_random_generator_seed(
        111)  # this is to make the vocab generation consistent
    config = load_config(experiment_path)
    config.set_seed()
    config.preprocess(stats=False)

    helper.compare_folders(truth_folder=exp_truth_path,
                           computed_folder=experiment_path)
예제 #3
0
 def _set_seed(self, seed):
     """set seed to ensure reproducibility."""
     import sentencepiece as spm
     spm.set_random_generator_seed(seed)