def __init__(self, process: Process, worker_id: int, conn: Connection): self.process = process self.worker_id = worker_id self.conn = conn self.previous_step: EnvironmentStep = EnvironmentStep.empty(worker_id) self.previous_all_action_info: Dict[str, ActionInfo] = {} self.waiting = False
def test_advance(self, external_brains_mock, step_mock): brain_name = "testbrain" action_info_dict = {brain_name: MagicMock()} SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker( worker_id, EnvironmentResponse("step", worker_id, worker_id) ) env_manager = SubprocessEnvManager( mock_env_factory, EngineConfig.default_config(), 3 ) external_brains_mock.return_value = [brain_name] agent_manager_mock = mock.Mock() env_manager.set_agent_manager(brain_name, agent_manager_mock) step_info_dict = {brain_name: Mock()} step_info = EnvironmentStep(step_info_dict, 0, action_info_dict) step_mock.return_value = [step_info] env_manager.advance() # Test add_experiences env_manager._step.assert_called_once() agent_manager_mock.add_experiences.assert_called_once_with( step_info.current_all_step_result[brain_name], 0, step_info.brain_name_to_action_info[brain_name], ) # Test policy queue mock_policy = mock.Mock() agent_manager_mock.policy_queue.get_nowait.return_value = mock_policy env_manager.advance() assert env_manager.policies[brain_name] == mock_policy assert agent_manager_mock.policy == mock_policy
def _postprocess_steps( self, env_steps: List[EnvironmentResponse]) -> List[EnvironmentStep]: step_infos = [] timer_nodes = [] for step in env_steps: payload: StepResponse = step.payload env_worker = self.env_workers[step.worker_id] new_step = EnvironmentStep( payload.all_step_result, step.worker_id, env_worker.previous_all_action_info, payload.environment_stats, ) step_infos.append(new_step) env_worker.previous_step = new_step if payload.timer_root: timer_nodes.append(payload.timer_root) if timer_nodes: with hierarchical_timer("workers") as main_timer_node: for worker_timer_node in timer_nodes: main_timer_node.merge(worker_timer_node, root_name="worker_root", is_parallel=True) return step_infos
def __init__(self, env: BaseEnv, float_prop_channel: FloatPropertiesChannel): super().__init__() self.shared_float_properties = float_prop_channel self.env = env self.previous_step: EnvironmentStep = EnvironmentStep({}, {}, {}) self.previous_all_action_info: Dict[str, ActionInfo] = {}
def _reset_env( self, config: Dict[BehaviorName, float] = None ) -> List[EnvironmentStep]: # type: ignore self.set_env_parameters(config) self.env.reset() all_step_result = self._generate_all_results() self.previous_step = EnvironmentStep(all_step_result, 0, {}, {}) return [self.previous_step]
def reset( self, config: Dict[str, float] = None ) -> List[EnvironmentStep]: # type: ignore if config is not None: for k, v in config.items(): self.shared_float_properties.set_property(k, v) self.env.reset() all_brain_info = self._generate_all_brain_info() self.previous_step = EnvironmentStep(all_brain_info, {}) return [self.previous_step]
def _reset_env( self, config: Dict[AgentGroup, float] = None ) -> List[EnvironmentStep]: # type: ignore if config is not None: for k, v in config.items(): self.shared_float_properties.set_property(k, v) self.env.reset() all_step_result = self._generate_all_results() self.previous_step = EnvironmentStep(all_step_result, 0, {}, {}) return [self.previous_step]
def _reset_env( self, config: Dict[BehaviorName, float] = None ) -> List[EnvironmentStep]: # type: ignore if config is not None: for k, v in config.items(): self.env_params.set_float_parameter(k, v) self.env.reset() all_step_result = self._generate_all_results() self.previous_step = EnvironmentStep(all_step_result, 0, {}, {}) return [self.previous_step]
def _reset_env(self, config: Optional[Dict] = None) -> List[EnvironmentStep]: while any(ew.waiting for ew in self.env_workers): if not self.step_queue.empty(): step = self.step_queue.get_nowait() self.env_workers[step.worker_id].waiting = False # First enqueue reset commands for all workers so that they reset in parallel for ew in self.env_workers: ew.send("reset", config) # Next (synchronously) collect the reset observations from each worker in sequence for ew in self.env_workers: ew.previous_step = EnvironmentStep(ew.recv().payload, ew.worker_id, {}) return list(map(lambda ew: ew.previous_step, self.env_workers))
def _step(self) -> List[EnvironmentStep]: all_action_info = self._take_step(self.previous_step) self.previous_all_action_info = all_action_info for brain_name, action_info in all_action_info.items(): self.env.set_actions(brain_name, action_info.action) self.env.step() all_step_result = self._generate_all_results() step_info = EnvironmentStep(all_step_result, 0, self.previous_all_action_info) self.previous_step = step_info return [step_info]
def _reset_env( self, config: Dict[BehaviorName, float] = None ) -> List[EnvironmentStep]: # type: ignore if config is not None: for k, v in config.items(): if isinstance(v, float): self.env_params.set_float_parameter(k, v) elif isinstance(v, ParameterRandomizationSettings): v.apply(k, self.env_params) self.env.reset() all_step_result = self._generate_all_results() self.previous_step = EnvironmentStep(all_step_result, 0, {}, {}) return [self.previous_step]
def step(self) -> List[EnvironmentStep]: all_action_info = self._take_step(self.previous_step) self.previous_all_action_info = all_action_info for brain_name, action_info in all_action_info.items(): self.env.set_actions(brain_name, action_info.action) self.env.step() all_brain_info = self._generate_all_brain_info() step_brain_info = all_brain_info step_info = EnvironmentStep( self.previous_step.current_all_brain_info, step_brain_info, self.previous_all_action_info, ) self.previous_step = step_info return [step_info]
def test_advance(self, mock_create_worker, training_behaviors_mock, step_mock): brain_name = "testbrain" action_info_dict = {brain_name: MagicMock()} mock_create_worker.side_effect = create_worker_mock env_manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 3) training_behaviors_mock.return_value = [brain_name] agent_manager_mock = mock.Mock() mock_policy = mock.Mock() agent_manager_mock.policy_queue.get_nowait.side_effect = [ mock_policy, mock_policy, AgentManagerQueue.Empty(), ] env_manager.set_agent_manager(brain_name, agent_manager_mock) step_info_dict = {brain_name: (Mock(), Mock())} env_stats = { "averaged": (1.0, StatsAggregationMethod.AVERAGE), "most_recent": (2.0, StatsAggregationMethod.MOST_RECENT), } step_info = EnvironmentStep(step_info_dict, 0, action_info_dict, env_stats) step_mock.return_value = [step_info] env_manager.process_steps(env_manager.get_steps()) # Test add_experiences env_manager._step.assert_called_once() agent_manager_mock.add_experiences.assert_called_once_with( step_info.current_all_step_result[brain_name][0], step_info.current_all_step_result[brain_name][1], 0, step_info.brain_name_to_action_info[brain_name], ) # Test policy queue assert env_manager.policies[brain_name] == mock_policy assert agent_manager_mock.policy == mock_policy
def __init__(self, env: BaseEnv, env_params: EnvironmentParametersChannel): super().__init__() self.env_params = env_params self.env = env self.previous_step: EnvironmentStep = EnvironmentStep.empty(0) self.previous_all_action_info: Dict[str, ActionInfo] = {}
def _reset_env(self, config: ArenaConfig = None) -> List[EnvironmentStep]: self.env.reset(arenas_configurations=config) all_step_result = self._generate_all_results() self.previous_step = EnvironmentStep(all_step_result, 0, {}) return [self.previous_step]
def __init__(self, env: AnimalAIEnvironment, float_prop_channel: FloatPropertiesChannel): self.shared_float_properties = float_prop_channel self.env = env self.previous_step: EnvironmentStep = EnvironmentStep.empty(0) self.previous_all_action_info: Dict[str, ActionInfo] = {}