예제 #1
0
    def advance(self, env: EnvManager) -> int:
        with hierarchical_timer("env_step"):
            time_start_step = time()
            new_step_infos = env.step()
            delta_time_step = time() - time_start_step

        for step_info in new_step_infos:
            for brain_name, trainer in self.trainers.items():
                if brain_name in self.trainer_metrics:
                    self.trainer_metrics[brain_name].add_delta_step(
                        delta_time_step)
                trainer.add_experiences(
                    step_info.previous_all_brain_info,
                    step_info.current_all_brain_info,
                    step_info.brain_name_to_action_info[brain_name].outputs,
                )
                trainer.process_experiences(step_info.previous_all_brain_info,
                                            step_info.current_all_brain_info)
        for brain_name, trainer in self.trainers.items():
            if brain_name in self.trainer_metrics:
                self.trainer_metrics[brain_name].add_delta_step(
                    delta_time_step)
            if self.train_model and trainer.get_step <= trainer.get_max_steps:
                trainer.increment_step(len(new_step_infos))
                if trainer.is_ready_update():
                    # Perform gradient descent with experience buffer
                    with hierarchical_timer("update_policy"):
                        trainer.update_policy()
                    env.set_policy(brain_name, trainer.policy)
        return len(new_step_infos)
    def _postprocess_steps(
        self, env_steps: List[EnvironmentResponse]
    ) -> List[EnvironmentStep]:
        step_infos = []
        timer_nodes = []
        for step in env_steps:
            payload: StepResponse = step.payload
            env_worker = self.env_workers[step.worker_id]
            new_step = EnvironmentStep(
                env_worker.previous_step.current_all_brain_info,
                payload.all_brain_info,
                env_worker.previous_all_action_info,
            )
            step_infos.append(new_step)
            env_worker.previous_step = new_step

            if payload.timer_root:
                timer_nodes.append(payload.timer_root)

        if timer_nodes:
            with hierarchical_timer("workers") as main_timer_node:
                for worker_timer_node in timer_nodes:
                    main_timer_node.merge(
                        worker_timer_node, root_name="worker_root", is_parallel=True
                    )

        return step_infos
예제 #3
0
 def advance(self, env: EnvManager) -> int:
     with hierarchical_timer("env_step"):
         time_start_step = time()
         new_step_infos = env.step()
         delta_time_step = time() - time_start_step
     for step_info in new_step_infos:
         for brain_name, trainer in self.trainers.items():
             if brain_name in self.trainer_metrics:
                 self.trainer_metrics[brain_name].add_delta_step(
                     delta_time_step)
             if step_info.has_actions_for_brain(brain_name):
                 trainer.add_experiences(
                     step_info.previous_all_brain_info[brain_name],
                     step_info.current_all_brain_info[brain_name],
                     step_info.brain_name_to_action_info[brain_name].
                     outputs,
                 )
                 trainer.process_experiences(
                     step_info.previous_all_brain_info[brain_name],
                     step_info.current_all_brain_info[brain_name],
                 )
     for brain_name, trainer in self.trainers.items():
         if brain_name in self.trainer_metrics:
             self.trainer_metrics[brain_name].add_delta_step(
                 delta_time_step)
         if self.train_model and trainer.get_step <= trainer.get_max_steps:
             trainer.increment_step(len(new_step_infos))
             if trainer.is_ready_update():
                 # Perform gradient descent with experience buffer
                 with hierarchical_timer("update_policy"):
                     trainer.update_policy()
                 env.set_policy(brain_name, trainer.policy)
         else:
             # Avoid memory leak during inference
             # Eventually this whole block will take place in advance()
             # But currently this only calls clear_update_buffer() in RLTrainer
             # and nothing in the base class
             trainer.advance()
     return len(new_step_infos)
예제 #4
0
 def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray:
     """
     Converts byte array observation image into numpy array, re-sizes it,
     and optionally converts it to grey scale
     :param gray_scale: Whether to convert the image to grayscale.
     :param image_bytes: input byte array corresponding to image
     :return: processed numpy array of observation from environment
     """
     with hierarchical_timer("image_decompress"):
         image_bytearray = bytearray(image_bytes)
         image = Image.open(io.BytesIO(image_bytearray))
         # Normally Image loads lazily, this forces it to do loading in the timer scope.
         image.load()
     s = np.array(image) / 255.0
     if gray_scale:
         s = np.mean(s, axis=2)
         s = np.reshape(s, [s.shape[0], s.shape[1], 1])
     return s
예제 #5
0
 def step(self) -> None:
     if self._is_first_message:
         return self.reset()
     if not self._loaded:
         raise UnityEnvironmentException("No Unity environment is loaded.")
     # fill the blanks for missing actions
     for group_name in self._env_specs:
         if group_name not in self._env_actions:
             n_agents = 0
             if group_name in self._env_state:
                 n_agents = self._env_state[group_name].n_agents()
             self._env_actions[group_name] = self._env_specs[
                 group_name
             ].create_empty_action(n_agents)
     step_input = self._generate_step_input(self._env_actions)
     with hierarchical_timer("communicator.exchange"):
         outputs = self.communicator.exchange(step_input)
     if outputs is None:
         raise UnityCommunicationException("Communicator has stopped.")
     self._update_group_specs(outputs)
     rl_output = outputs.rl_output
     self._update_state(rl_output)
     self._env_actions.clear()
예제 #6
0
    def step(
        self,
        vector_action: Dict[str, np.ndarray] = None,
        memory: Optional[Dict[str, np.ndarray]] = None,
        text_action: Optional[Dict[str, List[str]]] = None,
        value: Optional[Dict[str, np.ndarray]] = None,
        custom_action: Dict[str, Any] = None,
    ) -> AllBrainInfo:
        """
        Provides the environment with an action, moves the environment dynamics forward accordingly,
        and returns observation, state, and reward information to the agent.
        :param value: Value estimates provided by agents.
        :param vector_action: Agent's vector action. Can be a scalar or vector of int/floats.
        :param memory: Vector corresponding to memory used for recurrent policies.
        :param text_action: Text action to send to environment for.
        :param custom_action: Optional instance of a CustomAction protobuf message.
        :return: AllBrainInfo  : A Data structure corresponding to the new state of the environment.
        """
        if self._is_first_message:
            return self.reset()
        vector_action = {} if vector_action is None else vector_action
        memory = {} if memory is None else memory
        text_action = {} if text_action is None else text_action
        value = {} if value is None else value
        custom_action = {} if custom_action is None else custom_action

        # Check that environment is loaded, and episode is currently running.
        if not self._loaded:
            raise UnityEnvironmentException("No Unity environment is loaded.")
        else:
            if isinstance(vector_action, self.SINGLE_BRAIN_ACTION_TYPES):
                if self._num_external_brains == 1:
                    vector_action = {
                        self._external_brain_names[0]: vector_action
                    }
                elif self._num_external_brains > 1:
                    raise UnityActionException(
                        "You have {0} brains, you need to feed a dictionary of brain names a keys, "
                        "and vector_actions as values".format(
                            self._num_brains))
                else:
                    raise UnityActionException(
                        "There are no external brains in the environment, "
                        "step cannot take a vector_action input")

            if isinstance(memory, self.SINGLE_BRAIN_ACTION_TYPES):
                if self._num_external_brains == 1:
                    memory = {self._external_brain_names[0]: memory}
                elif self._num_external_brains > 1:
                    raise UnityActionException(
                        "You have {0} brains, you need to feed a dictionary of brain names as keys "
                        "and memories as values".format(self._num_brains))
                else:
                    raise UnityActionException(
                        "There are no external brains in the environment, "
                        "step cannot take a memory input")

            if isinstance(text_action, self.SINGLE_BRAIN_TEXT_TYPES):
                if self._num_external_brains == 1:
                    text_action = {self._external_brain_names[0]: text_action}
                elif self._num_external_brains > 1:
                    raise UnityActionException(
                        "You have {0} brains, you need to feed a dictionary of brain names as keys "
                        "and text_actions as values".format(self._num_brains))
                else:
                    raise UnityActionException(
                        "There are no external brains in the environment, "
                        "step cannot take a value input")

            if isinstance(value, self.SINGLE_BRAIN_ACTION_TYPES):
                if self._num_external_brains == 1:
                    value = {self._external_brain_names[0]: value}
                elif self._num_external_brains > 1:
                    raise UnityActionException(
                        "You have {0} brains, you need to feed a dictionary of brain names as keys "
                        "and state/action value estimates as values".format(
                            self._num_brains))
                else:
                    raise UnityActionException(
                        "There are no external brains in the environment, "
                        "step cannot take a value input")

            if isinstance(custom_action, CustomAction):
                if self._num_external_brains == 1:
                    custom_action = {
                        self._external_brain_names[0]: custom_action
                    }
                elif self._num_external_brains > 1:
                    raise UnityActionException(
                        "You have {0} brains, you need to feed a dictionary of brain names as keys "
                        "and CustomAction instances as values".format(
                            self._num_brains))
                else:
                    raise UnityActionException(
                        "There are no external brains in the environment, "
                        "step cannot take a custom_action input")

            for brain_name in (list(vector_action.keys()) +
                               list(memory.keys()) + list(text_action.keys())):
                if brain_name not in self._external_brain_names:
                    raise UnityActionException(
                        "The name {0} does not correspond to an external brain "
                        "in the environment".format(brain_name))

            for brain_name in self._external_brain_names:
                n_agent = self._n_agents[brain_name]
                if brain_name not in vector_action:
                    if self._brains[
                            brain_name].vector_action_space_type == "discrete":
                        vector_action[brain_name] = ([0.0] * n_agent * len(
                            self._brains[brain_name].vector_action_space_size))
                    else:
                        vector_action[brain_name] = (
                            [0.0] * n_agent * self._brains[brain_name].
                            vector_action_space_size[0])
                else:
                    vector_action[brain_name] = self._flatten(
                        vector_action[brain_name])
                if brain_name not in memory:
                    memory[brain_name] = []
                else:
                    if memory[brain_name] is None:
                        memory[brain_name] = []
                    else:
                        memory[brain_name] = self._flatten(memory[brain_name])
                if brain_name not in text_action:
                    text_action[brain_name] = [""] * n_agent
                else:
                    if text_action[brain_name] is None:
                        text_action[brain_name] = [""] * n_agent
                if brain_name not in custom_action:
                    custom_action[brain_name] = [None] * n_agent
                else:
                    if custom_action[brain_name] is None:
                        custom_action[brain_name] = [None] * n_agent
                    if isinstance(custom_action[brain_name], CustomAction):
                        custom_action[brain_name] = [
                            custom_action[brain_name]
                        ] * n_agent

                number_text_actions = len(text_action[brain_name])
                if not ((number_text_actions == n_agent)
                        or number_text_actions == 0):
                    raise UnityActionException(
                        "There was a mismatch between the provided text_action and "
                        "the environment's expectation: "
                        "The brain {0} expected {1} text_action but was given {2}"
                        .format(brain_name, n_agent, number_text_actions))

                discrete_check = (self._brains[brain_name].
                                  vector_action_space_type == "discrete")

                expected_discrete_size = n_agent * len(
                    self._brains[brain_name].vector_action_space_size)

                continuous_check = (self._brains[brain_name].
                                    vector_action_space_type == "continuous")

                expected_continuous_size = (
                    self._brains[brain_name].vector_action_space_size[0] *
                    n_agent)

                if not ((discrete_check and len(vector_action[brain_name])
                         == expected_discrete_size) or
                        (continuous_check and len(vector_action[brain_name])
                         == expected_continuous_size)):
                    raise UnityActionException(
                        "There was a mismatch between the provided action and "
                        "the environment's expectation: "
                        "The brain {0} expected {1} {2} action(s), but was provided: {3}"
                        .format(
                            brain_name,
                            str(expected_discrete_size) if discrete_check else
                            str(expected_continuous_size),
                            self._brains[brain_name].vector_action_space_type,
                            str(vector_action[brain_name]),
                        ))

            step_input = self._generate_step_input(vector_action, memory,
                                                   text_action, value,
                                                   custom_action)
            with hierarchical_timer("communicator.exchange"):
                outputs = self.communicator.exchange(step_input)
            if outputs is None:
                raise UnityCommunicationException("Communicator has stopped.")
            rl_output = outputs.rl_output
            state = self._get_state(rl_output)
            for _b in self._external_brain_names:
                self._n_agents[_b] = len(state[_b].agents)
            return state
예제 #7
0
def load_demonstration(
    file_path: str
) -> Tuple[BrainParameters, List[AgentInfoActionPairProto], int]:
    """
    Loads and parses a demonstration file.
    :param file_path: Location of demonstration file (.demo).
    :return: BrainParameter and list of AgentInfoActionPairProto containing demonstration data.
    """

    # First 32 bytes of file dedicated to meta-data.
    INITIAL_POS = 33
    file_paths = []
    if os.path.isdir(file_path):
        all_files = os.listdir(file_path)
        for _file in all_files:
            if _file.endswith(".demo"):
                file_paths.append(os.path.join(file_path, _file))
        if not all_files:
            raise ValueError(
                "There are no '.demo' files in the provided directory.")
    elif os.path.isfile(file_path):
        file_paths.append(file_path)
        file_extension = pathlib.Path(file_path).suffix
        if file_extension != ".demo":
            raise ValueError(
                "The file is not a '.demo' file. Please provide a file with the "
                "correct extension.")
    else:
        raise FileNotFoundError(
            "The demonstration file or directory {} does not exist.".format(
                file_path))

    brain_params = None
    brain_param_proto = None
    info_action_pairs = []
    total_expected = 0
    for _file_path in file_paths:
        with open(_file_path, "rb") as fp:
            with hierarchical_timer("read_file"):
                data = fp.read()
            next_pos, pos, obs_decoded = 0, 0, 0
            while pos < len(data):
                next_pos, pos = _DecodeVarint32(data, pos)
                if obs_decoded == 0:
                    meta_data_proto = DemonstrationMetaProto()
                    meta_data_proto.ParseFromString(data[pos:pos + next_pos])
                    total_expected += meta_data_proto.number_steps
                    pos = INITIAL_POS
                if obs_decoded == 1:
                    brain_param_proto = BrainParametersProto()
                    brain_param_proto.ParseFromString(data[pos:pos + next_pos])
                    pos += next_pos
                if obs_decoded > 1:
                    agent_info_action = AgentInfoActionPairProto()
                    agent_info_action.ParseFromString(data[pos:pos + next_pos])
                    if brain_params is None:
                        brain_params = BrainParameters.from_proto(
                            brain_param_proto, agent_info_action.agent_info)
                    info_action_pairs.append(agent_info_action)
                    if len(info_action_pairs) == total_expected:
                        break
                    pos += next_pos
                obs_decoded += 1
    return brain_params, info_action_pairs, total_expected
def test_timers() -> None:
    with mock.patch("mlagents.envs.timers._global_timer_stack",
                    new_callable=timers.TimerStack) as test_timer:
        # First, run some simple code
        with timers.hierarchical_timer("top_level"):
            for i in range(3):
                with timers.hierarchical_timer("multiple"):
                    decorated_func(i, i)

            raised = False
            try:
                with timers.hierarchical_timer("raises"):
                    raise RuntimeError("timeout!")
            except RuntimeError:
                raised = True

            with timers.hierarchical_timer("post_raise"):
                assert raised
                pass

        # We expect the hierarchy to look like
        #   (root)
        #       top_level
        #           multiple
        #               decorated_func
        #           raises
        #           post_raise
        root = test_timer.root
        assert root.children.keys() == {"top_level"}

        top_level = root.children["top_level"]
        assert top_level.children.keys() == {
            "multiple", "raises", "post_raise"
        }

        # make sure the scope was closed properly when the exception was raised
        raises = top_level.children["raises"]
        assert raises.count == 1

        multiple = top_level.children["multiple"]
        assert multiple.count == 3

        timer_tree = test_timer.get_timing_tree()

        expected_tree = {
            "name":
            "root",
            "total":
            mock.ANY,
            "count":
            1,
            "self":
            mock.ANY,
            "children": [{
                "name":
                "top_level",
                "total":
                mock.ANY,
                "count":
                1,
                "self":
                mock.ANY,
                "children": [
                    {
                        "name":
                        "multiple",
                        "total":
                        mock.ANY,
                        "count":
                        3,
                        "self":
                        mock.ANY,
                        "children": [{
                            "name": "decorated_func",
                            "total": mock.ANY,
                            "count": 3,
                            "self": mock.ANY,
                        }],
                    },
                    {
                        "name": "raises",
                        "total": mock.ANY,
                        "count": 1,
                        "self": mock.ANY,
                    },
                    {
                        "name": "post_raise",
                        "total": mock.ANY,
                        "count": 1,
                        "self": mock.ANY,
                    },
                ],
            }],
            "gauges": [{
                "name": "my_gauge",
                "value": 4.0,
                "max": 4.0,
                "min": 0.0,
                "count": 3
            }],
        }

        assert timer_tree == expected_tree