Exemple #1
0
 def test_step_await_combines_brain_info(self):
     all_brain_info_env0 = {
         "MockBrain":
         BrainInfo([], [[1.0, 2.0], [1.0, 2.0]], [],
                   agents=[1, 2],
                   memory=np.zeros((0, 0)))
     }
     all_brain_info_env1 = {
         "MockBrain":
         BrainInfo([], [[3.0, 4.0]], [],
                   agents=[3],
                   memory=np.zeros((0, 0)))
     }
     env_worker_0 = MockEnvWorker(0)
     env_worker_0.recv.return_value = EnvironmentResponse(
         "step", 0, all_brain_info_env0)
     env_worker_1 = MockEnvWorker(1)
     env_worker_1.recv.return_value = EnvironmentResponse(
         "step", 1, all_brain_info_env1)
     env = SubprocessUnityEnvironment(mock_env_factory, 0)
     env.envs = [env_worker_0, env_worker_1]
     env.waiting = True
     combined_braininfo = env.step_await()["MockBrain"]
     self.assertEqual(
         combined_braininfo.vector_observations.tolist(),
         [[1.0, 2.0], [1.0, 2.0], [3.0, 4.0]],
     )
     self.assertEqual(combined_braininfo.agents, ["0-1", "0-2", "1-3"])
 def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo:
     """
     Constructs a BrainInfo which contains the most recent previous experiences for all agents info
     which correspond to the agents in a provided next_info.
     :BrainInfo next_info: A t+1 BrainInfo.
     :return: curr_info: Reconstructed BrainInfo to match agents of next_info.
     """
     visual_observations = [[]]
     vector_observations = []
     text_observations = []
     memories = []
     rewards = []
     local_dones = []
     max_reacheds = []
     agents = []
     prev_vector_actions = []
     prev_text_actions = []
     action_masks = []
     for agent_id in next_info.agents:
         agent_brain_info = self.training_buffer[agent_id].last_brain_info
         if agent_brain_info is None:
             agent_brain_info = next_info
         agent_index = agent_brain_info.agents.index(agent_id)
         for i in range(len(next_info.visual_observations)):
             visual_observations[i].append(
                 agent_brain_info.visual_observations[i][agent_index])
         vector_observations.append(
             agent_brain_info.vector_observations[agent_index])
         text_observations.append(
             agent_brain_info.text_observations[agent_index])
         if self.policy.use_recurrent:
             if len(agent_brain_info.memories) > 0:
                 memories.append(agent_brain_info.memories[agent_index])
             else:
                 memories.append(self.policy.make_empty_memory(1))
         rewards.append(agent_brain_info.rewards[agent_index])
         local_dones.append(agent_brain_info.local_done[agent_index])
         max_reacheds.append(agent_brain_info.max_reached[agent_index])
         agents.append(agent_brain_info.agents[agent_index])
         prev_vector_actions.append(
             agent_brain_info.previous_vector_actions[agent_index])
         prev_text_actions.append(
             agent_brain_info.previous_text_actions[agent_index])
         action_masks.append(agent_brain_info.action_masks[agent_index])
     if self.policy.use_recurrent:
         memories = np.vstack(memories)
     curr_info = BrainInfo(
         visual_observations,
         vector_observations,
         text_observations,
         memories,
         rewards,
         agents,
         local_dones,
         prev_vector_actions,
         prev_text_actions,
         max_reacheds,
         action_masks,
     )
     return curr_info
 def _revert_actions(self, brain_info: BrainInfo):
     action_in = brain_info.previous_vector_actions
     action = np.zeros(len(self._mlagent_action_space), dtype=int)
     i = 0
     for k, v in self._mlagent_action_space.items():
         VIEW_STEP = 1
         if k == 'camera_left_right':
             velocity = action_in['camera'][1]
             rand = np.random.random_sample() * VIEW_STEP
             if velocity > 0 and velocity > rand:
                 action[i] = 2
             elif velocity < 0 and velocity < -rand:
                 action[i] = 1
         elif k == 'camera_up_down':
             velocity = action_in['camera'][0]
             rand = np.random.random_sample() * VIEW_STEP
             if velocity > 0 and velocity > rand:
                 action[i] = 2
             elif velocity < 0 and velocity < -rand:
                 action[i] = 1
         elif k in [
                 'craft', 'equip', 'nearbyCraft', 'nearbySmelt', 'place'
         ]:
             action[i] = action_in[k]
         else:
             for move_i, move in enumerate(v):
                 if move in action_in and action_in[move] == 1:
                     action[i] = move_i
         i += 1
     brain_info.previous_vector_actions = [action]
     # TODO action masks
     # vector_action_space_size = [len(v) for k,v in self._mlagent_action_space.items()]
     return brain_info
    def step(
        self,
        vector_action: Dict[str, Any] = None,
        memory: Dict[str, Any] = None,
        text_action: Dict[str, Any] = None,
        value: Dict[str, Any] = None,
    ) -> AllBrainInfo:
        assert vector_action is not None

        delta = vector_action[BRAIN_NAME][0][0]
        delta = clamp(delta, -STEP_SIZE, STEP_SIZE)
        self.position += delta
        self.position = clamp(self.position, -1, 1)
        self.step_count += 1
        done = self.position >= 1.0 or self.position <= -1.0
        if done:
            reward = SUCCESS_REWARD * self.position
        else:
            reward = -TIME_PENALTY

        agent_info = AgentInfoProto(
            stacked_vector_observation=[self.position] * OBS_SIZE,
            reward=reward,
            done=done,
        )

        if done:
            self._reset_agent()

        return {
            BRAIN_NAME:
            BrainInfo.from_agent_proto(0, [agent_info],
                                       self._brains[BRAIN_NAME])
        }
    def reset(
        self,
        config: Dict[str, float] = None,
        train_mode: bool = True,
        custom_reset_parameters: Any = None,
    ) -> AllBrainInfo:  # type: ignore
        self._reset_agent()

        agent_info = AgentInfoProto(
            stacked_vector_observation=[self.position] * OBS_SIZE,
            done=False,
            max_step_reached=False,
        )
        return {
            BRAIN_NAME:
            BrainInfo.from_agent_proto(0, [agent_info],
                                       self._brains[BRAIN_NAME])
        }
    def create_brain_info(ob,
                          agent_id,
                          brain_params,
                          reward=None,
                          done=None,
                          info=None,
                          action=None,
                          max_reached=False) -> BrainInfo:
        vector_obs = []
        vis_obs = []
        for k, v in ob.items():
            if k == 'pov':
                v = ob['pov']
                v = v.reshape(1, v.shape[0], v.shape[1], v.shape[2])
                vis_obs = v
            elif k == 'equipped_items':
                v = float(ob['equipped_items']['mainhand']['damage'])
                max_damage = int(ob['equipped_items']['mainhand']['maxDamage'])
                v = v if max_damage == 0 else v / max_damage
                vector_obs.append(v)
                num_items = 9
                types = [
                    'none', 'air', 'wooden_axe', 'wooden_pickaxe', 'stone_axe',
                    'stone_pickaxe', 'iron_axe', 'iron_pickaxe', 'other'
                ]
                item_type = ob['equipped_items']['mainhand']['type']
                if type(item_type) is str:
                    item_type = types.index(item_type)
                item_type = np.array(int(item_type))
                item_type_onehot = np.squeeze(
                    np.eye(num_items)[item_type.reshape(-1)])
                vector_obs.extend(item_type_onehot.tolist())
            elif k == 'inventory':
                inventory = np.array([b for a, b in ob['inventory'].items()])
                has_one_item = np.clip(inventory, 0, 1)
                num_items = np.clip(inventory, 0, 10) / 10
                vector_obs.extend(has_one_item.tolist())
                vector_obs.extend(num_items.tolist())

                # OrderedDict([('coal', 0), ('cobblestone', 4), ('crafting_table', 1), ('dirt', 10), ('furnace', 1), ('iron_axe', 0),
                # ('iron_ingot', 0), ('iron_ore', 8), ('iron_pickaxe', 0), ('log', 0), ('planks', 15), ('stick', 0), ('stone', 0), ('stone_axe', 0), ...])
                # 2 planks for stick
                v = 1 if ob['inventory']['planks'] >= 2 else 0
                vector_obs.append(v)
                # 4 planks for crafting table
                v = 1 if ob['inventory']['planks'] >= 4 else 0
                vector_obs.append(v)
                # 3 planks and two sticks for wooden sword
                v = 1 if ob['inventory']['planks'] >= 3 and ob['inventory'][
                    'stick'] >= 2 else 0
                vector_obs.append(v)
                # 8 cobblestones for furnace
                v = 1 if ob['inventory']['cobblestone'] >= 8 else 0
                vector_obs.append(v)
                # 3 cobblestone and two sticks for stone pick axe
                v = 1 if ob['inventory']['cobblestone'] >= 3 and ob[
                    'inventory']['stick'] >= 2 else 0
                vector_obs.append(v)
                # 3 iron and two sticks for iron pick axe
                v = 1 if ob['inventory']['iron_ingot'] >= 3 and ob[
                    'inventory']['stick'] >= 2 else 0
                vector_obs.append(v)

            elif type(v) is dict or type(v) is OrderedDict:
                for a, b in ob['inventory'].items():
                    vector_obs.append((float)(b))
                inventory = np.array([b for a, b in ob['inventory'].items()])
            else:
                vector_obs.append((float)(v))
        vector_obs = np.array(vector_obs)
        vector_obs = vector_obs.reshape(1, vector_obs.shape[0])
        # vector_obs: List[np.ndarray] = [vector_obs]

        # vis_obs = vis_obs.reshape(1, vis_obs.shape[0], vis_obs.shape[1], vis_obs.shape[2])
        vis_obs = [vis_obs]

        text_obs = []
        memory = np.zeros((0, 0))

        rew = reward if reward is not None else 0.0
        rew = rew if not np.isnan(rew) else 0.0
        rew = [rew]

        local_done = [done] if done is not None else [False]
        text_action = []
        max_reached = [max_reached]
        agents = [agent_id]
        total_num_actions = sum(brain_params.vector_action_space_size)
        mask_actions = np.ones((len(agents), total_num_actions))
        vector_action = action if action is not None else np.zeros(
            (len(agents), len(brain_params.vector_action_space_size)))
        custom_observations = []
        brain_info = BrainInfo(visual_observation=vis_obs,
                               vector_observation=vector_obs,
                               text_observations=text_obs,
                               memory=memory,
                               reward=rew,
                               agents=agents,
                               local_done=local_done,
                               vector_action=vector_action,
                               text_action=text_action,
                               max_reached=max_reached,
                               action_mask=mask_actions,
                               custom_observations=custom_observations)
        return brain_info