def test_reset_passes_reset_params(self): SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker( worker_id, EnvironmentResponse("reset", worker_id, worker_id)) manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 1) params = {"test": "params"} manager.reset(params) manager.env_workers[0].send.assert_called_with("reset", (params))
def test_subprocess_env_raises_errors(num_envs): def failing_env_factory(worker_id, config): import time # Sleep momentarily to allow time for the EnvManager to be waiting for the # subprocess response. We won't be able to capture failures from the subprocess # that cause it to close the pipe before we can send the first message. time.sleep(0.5) raise UnityEnvironmentException() env_manager = SubprocessEnvManager(failing_env_factory, RunOptions(), num_envs) with pytest.raises(UnityEnvironmentException): env_manager.reset() env_manager.close()
def maybe_add_samplers( sampler_config: Optional[Dict], env: SubprocessEnvManager, run_seed: int ) -> None: """ Adds samplers to env if sampler config provided and sets seed if not configured. :param sampler_config: validated dict of sampler configs. None if not included. :param env: env manager to pass samplers via reset :param run_seed: Random seed used for training. """ if sampler_config is not None: # If the seed is not specified in yaml, this will grab the run seed for offset, v in enumerate(sampler_config.values()): if v.seed == -1: v.seed = run_seed + offset env.reset(config=sampler_config)
def test_reset_collects_results_from_all_envs(self): SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker( worker_id, EnvironmentResponse("reset", worker_id, worker_id)) manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 4) params = {"test": "params"} res = manager.reset(params) for i, env in enumerate(manager.env_workers): env.send.assert_called_with("reset", (params)) env.recv.assert_called() # Check that the "last steps" are set to the value returned for each step self.assertEqual( manager.env_workers[i].previous_step.current_all_brain_info, i) assert res == list( map(lambda ew: ew.previous_step, manager.env_workers))