def evaluate_agent(self, agent):
        """
    This function evaluates the agent in the environment. This function should be run in parallel
    :param agent: agent to evaluate
    :return:
    """
        done = False
        cumulated_reward = 0

        obs = self.env.reset()
        t = 0
        while not done:
            if 'FastsimSimpleNavigation' in self.params.env_tag:
                agent_input = [
                    t / self.params.max_episode_len, obs
                ]  # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent
            elif 'Ant' in self.params.env_tag:
                agent_input = [t]
            else:
                agent_input = [t / self.params.max_episode_len]
            action = utils.action_formatting(self.params.env_tag,
                                             agent['agent'](agent_input))

            obs, reward, done, info = self.env.step(action)
            t += 1
            cumulated_reward += reward
            if t >= self.params.max_episode_len:
                done = True

            if 'Ant' in self.params.env_tag:
                CoM = np.array([self.env.robot.body_xyz[:2]])
                if np.any(np.abs(CoM) >= np.array([3, 3])):
                    done = True

        agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done, info)
        agent['reward'] = cumulated_reward

        # Extract genome as a feature
        feat = []
        for k in agent['agent'].genome:
            if isinstance(k, dict):
                for i in k:
                    if i is not 'name':
                        feat.append(np.array(k[i]).flatten())
            else:
                feat.append(np.array([k]))

        agent['features'] = [
            np.concatenate(np.array(feat)), None
        ]  #PS uses the genome as feature to calculate the BD
        return cumulated_reward

    # ---------------------------------------------------
Exemple #2
0
    def evaluate_agent(self, agent):
        """
    This function evaluates the agent in the environment.
    :param agent: agent to evaluate
    :return:
    """
        done = False
        cumulated_reward = 0

        obs = self.env.reset()
        t = 0
        while not done:
            if 'FastsimSimpleNavigation' in self.params.env_tag:
                agent_input = [
                    t / self.params.max_episode_len, obs
                ]  # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent
            elif 'Ant' in self.params.env_tag:
                agent_input = [t]
            else:
                agent_input = [t / self.params.max_episode_len]
            action = utils.action_formatting(self.params.env_tag,
                                             agent['agent'](agent_input))

            obs, reward, done, info = self.env.step(action)
            t += 1
            cumulated_reward += reward

            if t >= self.params.max_episode_len:
                done = True

            if 'Ant' in self.params.env_tag:
                CoM = np.array([self.env.robot.body_xyz[:2]])
                if np.any(np.abs(CoM) >= np.array([3, 3])):
                    done = True

        try:
            state = self.env.render(mode='rgb_array', top_bottom=True)
        except:
            state = self.env.render(mode='rgb_array')
        state = state / np.max((np.max(state), 1))
        state = resize(state, (64, 64))

        agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done, info)
        agent['reward'] = cumulated_reward
        agent['features'] = [
            state.ravel(), None
        ]  #Here we use HD images as features to calculate the BD
        return cumulated_reward
    def evaluate_agent(self, agent):
        """
    This function evaluates the agent in the environment. This function should be run in parallel
    :param agent: agent to evaluate
    :return:
    """
        done = False
        cumulated_reward = 0

        obs = self.env.reset()
        t = 0
        while not done:
            if 'FastsimSimpleNavigation' in self.params.env_tag:
                agent_input = [
                    t / self.params.max_episode_len, obs
                ]  # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent
            elif 'Ant' in self.params.env_tag:
                agent_input = [t]
            else:
                agent_input = [t / self.params.max_episode_len]
            action = utils.action_formatting(self.params.env_tag,
                                             agent['agent'](agent_input))

            obs, reward, done, info = self.env.step(action)
            t += 1
            cumulated_reward += reward

            if t >= self.params.max_episode_len:
                done = True

            if 'Ant' in self.params.env_tag:
                CoM = np.array([self.env.robot.body_xyz[:2]])
                if np.any(np.abs(CoM) >= np.array([3, 3])):
                    done = True

        agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done, info)
        agent['reward'] = cumulated_reward
        agent['features'] = [
            np.random.random(self.params.feature_size), None
        ]  #RBD uses random vectors as features to calculate the BD
        return cumulated_reward

    # ---------------------------------------------------
    def evaluate_agent(self, agent):
        """
    This function evaluates the agent in the environment. This function should be run in parallel
    :param agent: agent to evaluate
    :return:
    """
        done = False
        cumulated_reward = 0

        obs = self.env.reset()
        t = 0
        while not done:
            if 'FastsimSimpleNavigation' in self.params.env_tag:
                agent_input = [
                    t / self.params.max_episode_len, obs
                ]  # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent
            elif 'Ant' in self.params.env_tag:
                agent_input = [t]
            else:
                agent_input = [t / self.params.max_episode_len]

            action = utils.action_formatting(self.params.env_tag,
                                             agent['agent'](agent_input))

            obs, reward, done, info = self.env.step(action)
            t += 1
            cumulated_reward += reward

            if t >= self.params.max_episode_len:
                done = True

            if 'Ant' in self.params.env_tag:
                CoM = np.array([self.env.robot.body_xyz[:2]])
                if np.any(np.abs(CoM) >= np.array([3, 3])):
                    done = True
        state = self.env.render(mode='rgb_array', top_bottom=True)
        state = state / np.max((np.max(state), 1))

        agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done, info)
        agent['reward'] = cumulated_reward
        # Here we use instead the features of the AE to calculate the BD. This is done outside this function, in update_agents
        return state, None, cumulated_reward  # TODO check why there is a None here
Exemple #5
0
    def evaluate_agent_xy(self):
        print('Calculating agent XY final pose')
        with progressbar.ProgressBar(max_value=len(self.pop)) as bar:
            for agent_idx, agent in enumerate(self.pop):
                done = False
                t = 0
                obs = self.env.reset()
                while not done:
                    if self.render_test:
                        self.env.render()
                    if 'FastsimSimpleNavigation' in self.params.env_tag:
                        agent_input = [
                            obs, t / self.params.max_episode_len
                        ]  # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent
                    elif 'Ant' in self.params.env_tag:  # TODO metti questi anche nelle baselines
                        agent_input = t
                    else:
                        agent_input = t / self.params.max_episode_len
                    action = utils.action_formatting(
                        self.params.env_tag, agent['agent'](agent_input))

                    obs, reward, done, info = self.env.step(action)
                    t += 1
                    if t >= self.params.max_episode_len:
                        done = True

                    if 'Ant' in self.params.env_tag:
                        CoM = np.array([
                            self.env.env.data.qpos[:2]
                        ])  #CoM = np.array([self.env.robot.body_xyz[:2]])
                        if np.any(np.abs(CoM) >= np.array([3, 3])):
                            done = True

                agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done,
                                                 info)
                bar.update(agent_idx)
      saved_robot_pose.append(np.array(info['robot_pos'][:2]))

  images = np.array(images)
  with open('./maze_video_images3.npy', 'wb') as file:
    np.save(file, images)
  if 'Billiard' in params.env_tag:
    env.params.SHOW_ARM_IN_ARRAY = True
    saved_balls_pose = np.array(saved_balls_pose) * np.array([100., -100.]) + np.array([150., 150.])
    saved_joints_pose = np.array(saved_joints_pose)
    point_pose = np.array([np.sin(saved_joints_pose[:, 0]) + .9 * np.cos(np.pi / 2. - saved_joints_pose[:, 1] - saved_joints_pose[:, 0]),
                           np.cos(saved_joints_pose[:, 0]) + .9 * np.sin(np.pi / 2. - saved_joints_pose[:, 1] - saved_joints_pose[:, 0])]).transpose()
    point_pose = point_pose * np.array([-100., -100.]) + np.array([150., 300.])
  else:
    saved_robot_pose = np.array(saved_robot_pose)

  f_pose = utils.extact_hd_bs(env, obs, reward, done, info)

  state = env.render(mode='rgb_array')#, top_bottom=True)
  state = state / np.max((np.max(state), 1))

  plt.figure()
  if 'Billiard' in params.env_tag:
    for i in range(state.shape[0]):
      for j in range(state.shape[1]):
        if np.all(state[i, j] == np.zeros(3)):
          state[i, j] = np.ones(3)
    plt.imshow(state)
    plt.plot(saved_balls_pose[:, 0], saved_balls_pose[:, 1], 'r-')
    plt.plot(point_pose[:, 0], point_pose[:, 1], 'b-')
  elif 'Fastsim' in params.env_tag:
    state = 1 - state