def _add_normalization_wrapper(env, n_envs, normalize): if isinstance(normalize, bool): env = VecNormalize(env) elif isinstance(normalize, dict): if 'trained_agent' in normalize: path = normalize.pop('trained_agent') env = VecNormalize.load(path, env) env.training = normalize.pop('training', True) elif normalize.pop('precompute', False): samples = normalize.pop('samples', 10000) env = _precompute_normalization(env, n_envs, samples, normalize) else: env = VecNormalize(env, **normalize) return env
def _precompute_normalization(env, num_envs, samples, config): env = VecNormalize(env, training=True, **config) logging.info("Precomputing normalization. This may take a while.") env.reset() log_step = 5000 // num_envs for i in range(samples // num_envs): actions = [env.action_space.sample() for _ in range(num_envs)] obs, rewards, dones, info = env.step(actions) if i % log_step == 0: logging.info("Progress: {}/{}".format(i * num_envs, samples)) logging.info("Successfully precomputed normalization parameters.") env.reset() env.training = False return env