def __init__(self, process: Process, worker_id: int, conn: Connection):
     self.process = process
     self.worker_id = worker_id
     self.conn = conn
     self.previous_step: EnvironmentStep = EnvironmentStep.empty(worker_id)
     self.previous_all_action_info: Dict[str, ActionInfo] = {}
     self.waiting = False
    def test_advance(self, external_brains_mock, step_mock):
        brain_name = "testbrain"
        action_info_dict = {brain_name: MagicMock()}
        SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
            worker_id, EnvironmentResponse("step", worker_id, worker_id)
        )
        env_manager = SubprocessEnvManager(
            mock_env_factory, EngineConfig.default_config(), 3
        )
        external_brains_mock.return_value = [brain_name]
        agent_manager_mock = mock.Mock()
        env_manager.set_agent_manager(brain_name, agent_manager_mock)

        step_info_dict = {brain_name: Mock()}
        step_info = EnvironmentStep(step_info_dict, 0, action_info_dict)
        step_mock.return_value = [step_info]
        env_manager.advance()

        # Test add_experiences
        env_manager._step.assert_called_once()

        agent_manager_mock.add_experiences.assert_called_once_with(
            step_info.current_all_step_result[brain_name],
            0,
            step_info.brain_name_to_action_info[brain_name],
        )

        # Test policy queue
        mock_policy = mock.Mock()
        agent_manager_mock.policy_queue.get_nowait.return_value = mock_policy
        env_manager.advance()
        assert env_manager.policies[brain_name] == mock_policy
        assert agent_manager_mock.policy == mock_policy
    def _postprocess_steps(
            self,
            env_steps: List[EnvironmentResponse]) -> List[EnvironmentStep]:
        step_infos = []
        timer_nodes = []
        for step in env_steps:
            payload: StepResponse = step.payload
            env_worker = self.env_workers[step.worker_id]
            new_step = EnvironmentStep(
                payload.all_step_result,
                step.worker_id,
                env_worker.previous_all_action_info,
                payload.environment_stats,
            )
            step_infos.append(new_step)
            env_worker.previous_step = new_step

            if payload.timer_root:
                timer_nodes.append(payload.timer_root)

        if timer_nodes:
            with hierarchical_timer("workers") as main_timer_node:
                for worker_timer_node in timer_nodes:
                    main_timer_node.merge(worker_timer_node,
                                          root_name="worker_root",
                                          is_parallel=True)

        return step_infos
 def __init__(self, env: BaseEnv,
              float_prop_channel: FloatPropertiesChannel):
     super().__init__()
     self.shared_float_properties = float_prop_channel
     self.env = env
     self.previous_step: EnvironmentStep = EnvironmentStep({}, {}, {})
     self.previous_all_action_info: Dict[str, ActionInfo] = {}
 def _reset_env(
     self,
     config: Dict[BehaviorName, float] = None
 ) -> List[EnvironmentStep]:  # type: ignore
     self.set_env_parameters(config)
     self.env.reset()
     all_step_result = self._generate_all_results()
     self.previous_step = EnvironmentStep(all_step_result, 0, {}, {})
     return [self.previous_step]
Example #6
0
 def reset(
     self, config: Dict[str, float] = None
 ) -> List[EnvironmentStep]:  # type: ignore
     if config is not None:
         for k, v in config.items():
             self.shared_float_properties.set_property(k, v)
     self.env.reset()
     all_brain_info = self._generate_all_brain_info()
     self.previous_step = EnvironmentStep(all_brain_info, {})
     return [self.previous_step]
Example #7
0
 def _reset_env(
     self, config: Dict[AgentGroup, float] = None
 ) -> List[EnvironmentStep]:  # type: ignore
     if config is not None:
         for k, v in config.items():
             self.shared_float_properties.set_property(k, v)
     self.env.reset()
     all_step_result = self._generate_all_results()
     self.previous_step = EnvironmentStep(all_step_result, 0, {}, {})
     return [self.previous_step]
Example #8
0
 def _reset_env(
     self,
     config: Dict[BehaviorName, float] = None
 ) -> List[EnvironmentStep]:  # type: ignore
     if config is not None:
         for k, v in config.items():
             self.env_params.set_float_parameter(k, v)
     self.env.reset()
     all_step_result = self._generate_all_results()
     self.previous_step = EnvironmentStep(all_step_result, 0, {}, {})
     return [self.previous_step]
 def _reset_env(self, config: Optional[Dict] = None) -> List[EnvironmentStep]:
     while any(ew.waiting for ew in self.env_workers):
         if not self.step_queue.empty():
             step = self.step_queue.get_nowait()
             self.env_workers[step.worker_id].waiting = False
     # First enqueue reset commands for all workers so that they reset in parallel
     for ew in self.env_workers:
         ew.send("reset", config)
     # Next (synchronously) collect the reset observations from each worker in sequence
     for ew in self.env_workers:
         ew.previous_step = EnvironmentStep(ew.recv().payload, ew.worker_id, {})
     return list(map(lambda ew: ew.previous_step, self.env_workers))
Example #10
0
    def _step(self) -> List[EnvironmentStep]:
        all_action_info = self._take_step(self.previous_step)
        self.previous_all_action_info = all_action_info

        for brain_name, action_info in all_action_info.items():
            self.env.set_actions(brain_name, action_info.action)
        self.env.step()
        all_step_result = self._generate_all_results()

        step_info = EnvironmentStep(all_step_result, 0, self.previous_all_action_info)
        self.previous_step = step_info
        return [step_info]
 def _reset_env(
     self,
     config: Dict[BehaviorName, float] = None
 ) -> List[EnvironmentStep]:  # type: ignore
     if config is not None:
         for k, v in config.items():
             if isinstance(v, float):
                 self.env_params.set_float_parameter(k, v)
             elif isinstance(v, ParameterRandomizationSettings):
                 v.apply(k, self.env_params)
     self.env.reset()
     all_step_result = self._generate_all_results()
     self.previous_step = EnvironmentStep(all_step_result, 0, {}, {})
     return [self.previous_step]
Example #12
0
    def step(self) -> List[EnvironmentStep]:
        all_action_info = self._take_step(self.previous_step)
        self.previous_all_action_info = all_action_info

        for brain_name, action_info in all_action_info.items():
            self.env.set_actions(brain_name, action_info.action)
        self.env.step()
        all_brain_info = self._generate_all_brain_info()
        step_brain_info = all_brain_info

        step_info = EnvironmentStep(
            self.previous_step.current_all_brain_info,
            step_brain_info,
            self.previous_all_action_info,
        )
        self.previous_step = step_info
        return [step_info]
Example #13
0
    def test_advance(self, mock_create_worker, training_behaviors_mock,
                     step_mock):
        brain_name = "testbrain"
        action_info_dict = {brain_name: MagicMock()}
        mock_create_worker.side_effect = create_worker_mock
        env_manager = SubprocessEnvManager(mock_env_factory,
                                           EngineConfig.default_config(), 3)
        training_behaviors_mock.return_value = [brain_name]
        agent_manager_mock = mock.Mock()
        mock_policy = mock.Mock()
        agent_manager_mock.policy_queue.get_nowait.side_effect = [
            mock_policy,
            mock_policy,
            AgentManagerQueue.Empty(),
        ]
        env_manager.set_agent_manager(brain_name, agent_manager_mock)

        step_info_dict = {brain_name: (Mock(), Mock())}
        env_stats = {
            "averaged": (1.0, StatsAggregationMethod.AVERAGE),
            "most_recent": (2.0, StatsAggregationMethod.MOST_RECENT),
        }
        step_info = EnvironmentStep(step_info_dict, 0, action_info_dict,
                                    env_stats)
        step_mock.return_value = [step_info]
        env_manager.process_steps(env_manager.get_steps())

        # Test add_experiences
        env_manager._step.assert_called_once()

        agent_manager_mock.add_experiences.assert_called_once_with(
            step_info.current_all_step_result[brain_name][0],
            step_info.current_all_step_result[brain_name][1],
            0,
            step_info.brain_name_to_action_info[brain_name],
        )

        # Test policy queue
        assert env_manager.policies[brain_name] == mock_policy
        assert agent_manager_mock.policy == mock_policy
 def __init__(self, env: BaseEnv, env_params: EnvironmentParametersChannel):
     super().__init__()
     self.env_params = env_params
     self.env = env
     self.previous_step: EnvironmentStep = EnvironmentStep.empty(0)
     self.previous_all_action_info: Dict[str, ActionInfo] = {}
 def _reset_env(self, config: ArenaConfig = None) -> List[EnvironmentStep]:
     self.env.reset(arenas_configurations=config)
     all_step_result = self._generate_all_results()
     self.previous_step = EnvironmentStep(all_step_result, 0, {})
     return [self.previous_step]
 def __init__(self, env: AnimalAIEnvironment,
              float_prop_channel: FloatPropertiesChannel):
     self.shared_float_properties = float_prop_channel
     self.env = env
     self.previous_step: EnvironmentStep = EnvironmentStep.empty(0)
     self.previous_all_action_info: Dict[str, ActionInfo] = {}