def set_random_seed(self, seed): """ :param int seed: """ # Unfortunately, there is only a global seed, # and also, it will only be used for new threads # where the random generator was not used yet... # https://github.com/google/sentencepiece/issues/635 import sentencepiece as spm # noqa spm.set_random_generator_seed(seed)
def test_preprocess(exp_folder): exp_truth_path = os.path.join(exp_truth_dir, exp_folder) config_file = os.path.join(exp_truth_path, "config.yml") assert os.path.isfile( config_file ), "The configuration file config.yml does not exist for " + exp_folder.name experiment_path = SIL_NLP_ENV.mt_experiments_dir / exp_folder.name shutil.rmtree(experiment_path, ignore_errors=True) os.makedirs(experiment_path, exist_ok=True) shutil.copyfile(src=config_file, dst=os.path.join(experiment_path, "config.yml")) helper.init_file_logger(str(experiment_path)) sp.set_random_generator_seed( 111) # this is to make the vocab generation consistent config = load_config(experiment_path) config.set_seed() config.preprocess(stats=False) helper.compare_folders(truth_folder=exp_truth_path, computed_folder=experiment_path)
def _set_seed(self, seed): """set seed to ensure reproducibility.""" import sentencepiece as spm spm.set_random_generator_seed(seed)