def _test_agent(self, agent):
    """
    Tests agent in the environment
    :param agent:
    :return: final state image and final (x,y) pose
    """
    done = False
    ts = 0
    obs = utils.obs_formatting(self.env_tag, self.env.reset())

    while not done:
      if self.render_test:
        self.env.render()
      agent_input = ts
      action = utils.action_formatting(self.env_tag, agent['agent'](agent_input))# TODO /self.params.max_episode_len))

      obs, reward, done, info = self.env.step(action)
      obs = utils.obs_formatting(self.env_tag, obs, reward, done, info)
      ts += 1

      if ts >= self.params.max_episode_len:
        done = True

      if 'Ant' in self.env_tag:
        CoM = np.array(self.env.env.data.qpos[:2])
        f_pose = CoM
        if np.any(np.abs(CoM) >= np.array([3, 3])):
          done = True
      elif 'Billiard' in self.env_tag:
        f_pose = obs[0][:2]

    state = self.env.render(mode='rgb_array')
    state = state / np.max((np.max(state), 1))
    gc.collect()
    return state, f_pose
    def evaluate_agent(self, agent):
        """
    This function evaluates the agent in the environment. This function should be run in parallel
    :param agent: agent to evaluate
    :return:
    """
        done = False
        cumulated_reward = 0

        obs = self.env.reset()
        t = 0
        while not done:
            if 'FastsimSimpleNavigation' in self.params.env_tag:
                agent_input = [
                    t / self.params.max_episode_len, obs
                ]  # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent
            elif 'Ant' in self.params.env_tag:
                agent_input = [t]
            else:
                agent_input = [t / self.params.max_episode_len]
            action = utils.action_formatting(self.params.env_tag,
                                             agent['agent'](agent_input))

            obs, reward, done, info = self.env.step(action)
            t += 1
            cumulated_reward += reward
            if t >= self.params.max_episode_len:
                done = True

            if 'Ant' in self.params.env_tag:
                CoM = np.array([self.env.robot.body_xyz[:2]])
                if np.any(np.abs(CoM) >= np.array([3, 3])):
                    done = True

        agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done, info)
        agent['reward'] = cumulated_reward

        # Extract genome as a feature
        feat = []
        for k in agent['agent'].genome:
            if isinstance(k, dict):
                for i in k:
                    if i is not 'name':
                        feat.append(np.array(k[i]).flatten())
            else:
                feat.append(np.array([k]))

        agent['features'] = [
            np.concatenate(np.array(feat)), None
        ]  #PS uses the genome as feature to calculate the BD
        return cumulated_reward

    # ---------------------------------------------------
Esempio n. 3
0
    def evaluate_agent(self, agent):
        """
    This function evaluates the agent in the environment.
    :param agent: agent to evaluate
    :return:
    """
        done = False
        cumulated_reward = 0

        obs = self.env.reset()
        t = 0
        while not done:
            if 'FastsimSimpleNavigation' in self.params.env_tag:
                agent_input = [
                    t / self.params.max_episode_len, obs
                ]  # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent
            elif 'Ant' in self.params.env_tag:
                agent_input = [t]
            else:
                agent_input = [t / self.params.max_episode_len]
            action = utils.action_formatting(self.params.env_tag,
                                             agent['agent'](agent_input))

            obs, reward, done, info = self.env.step(action)
            t += 1
            cumulated_reward += reward

            if t >= self.params.max_episode_len:
                done = True

            if 'Ant' in self.params.env_tag:
                CoM = np.array([self.env.robot.body_xyz[:2]])
                if np.any(np.abs(CoM) >= np.array([3, 3])):
                    done = True

        try:
            state = self.env.render(mode='rgb_array', top_bottom=True)
        except:
            state = self.env.render(mode='rgb_array')
        state = state / np.max((np.max(state), 1))
        state = resize(state, (64, 64))

        agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done, info)
        agent['reward'] = cumulated_reward
        agent['features'] = [
            state.ravel(), None
        ]  #Here we use HD images as features to calculate the BD
        return cumulated_reward
    def evaluate_agent(self, agent):
        """
    This function evaluates the agent in the environment. This function should be run in parallel
    :param agent: agent to evaluate
    :return:
    """
        done = False
        cumulated_reward = 0

        obs = self.env.reset()
        t = 0
        while not done:
            if 'FastsimSimpleNavigation' in self.params.env_tag:
                agent_input = [
                    t / self.params.max_episode_len, obs
                ]  # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent
            elif 'Ant' in self.params.env_tag:
                agent_input = [t]
            else:
                agent_input = [t / self.params.max_episode_len]
            action = utils.action_formatting(self.params.env_tag,
                                             agent['agent'](agent_input))

            obs, reward, done, info = self.env.step(action)
            t += 1
            cumulated_reward += reward

            if t >= self.params.max_episode_len:
                done = True

            if 'Ant' in self.params.env_tag:
                CoM = np.array([self.env.robot.body_xyz[:2]])
                if np.any(np.abs(CoM) >= np.array([3, 3])):
                    done = True

        agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done, info)
        agent['reward'] = cumulated_reward
        agent['features'] = [
            np.random.random(self.params.feature_size), None
        ]  #RBD uses random vectors as features to calculate the BD
        return cumulated_reward

    # ---------------------------------------------------
Esempio n. 5
0
    def evaluate_agent(self, agent):
        """
    This function evaluates the agent in the environment. This function should be run in parallel
    :param agent: agent to evaluate
    :return:
    """
        done = False
        cumulated_reward = 0

        obs = self.env.reset()
        t = 0
        while not done:
            if 'FastsimSimpleNavigation' in self.params.env_tag:
                agent_input = [
                    t / self.params.max_episode_len, obs
                ]  # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent
            elif 'Ant' in self.params.env_tag:
                agent_input = [t]
            else:
                agent_input = [t / self.params.max_episode_len]

            action = utils.action_formatting(self.params.env_tag,
                                             agent['agent'](agent_input))

            obs, reward, done, info = self.env.step(action)
            t += 1
            cumulated_reward += reward

            if t >= self.params.max_episode_len:
                done = True

            if 'Ant' in self.params.env_tag:
                CoM = np.array([self.env.robot.body_xyz[:2]])
                if np.any(np.abs(CoM) >= np.array([3, 3])):
                    done = True
        state = self.env.render(mode='rgb_array', top_bottom=True)
        state = state / np.max((np.max(state), 1))

        agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done, info)
        agent['reward'] = cumulated_reward
        # Here we use instead the features of the AE to calculate the BD. This is done outside this function, in update_agents
        return state, None, cumulated_reward  # TODO check why there is a None here
Esempio n. 6
0
    def evaluate_agent_xy(self):
        print('Calculating agent XY final pose')
        with progressbar.ProgressBar(max_value=len(self.pop)) as bar:
            for agent_idx, agent in enumerate(self.pop):
                done = False
                t = 0
                obs = self.env.reset()
                while not done:
                    if self.render_test:
                        self.env.render()
                    if 'FastsimSimpleNavigation' in self.params.env_tag:
                        agent_input = [
                            obs, t / self.params.max_episode_len
                        ]  # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent
                    elif 'Ant' in self.params.env_tag:  # TODO metti questi anche nelle baselines
                        agent_input = t
                    else:
                        agent_input = t / self.params.max_episode_len
                    action = utils.action_formatting(
                        self.params.env_tag, agent['agent'](agent_input))

                    obs, reward, done, info = self.env.step(action)
                    t += 1
                    if t >= self.params.max_episode_len:
                        done = True

                    if 'Ant' in self.params.env_tag:
                        CoM = np.array([
                            self.env.env.data.qpos[:2]
                        ])  #CoM = np.array([self.env.robot.body_xyz[:2]])
                        if np.any(np.abs(CoM) >= np.array([3, 3])):
                            done = True

                agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done,
                                                 info)
                bar.update(agent_idx)
  pop = population.Population(agent=agent_type, pop_size=0, shapes=params.agent_shapes)
  pop.load_pop(os.path.join(load_path, 'models/qd_archive.pkl'))
  # -----------------------------------------------

  # Evaluate archive agents BS points
  # -----------------------------------------------
  if pop[0]['features'] is None:
    for i, agent in enumerate(pop):
      if i % 50 == 0:
        print('Evaluating agent {}'.format(i))
      done = False
      obs = env.reset()
      t = 0
      while not done:
        agent_input = t
        action = utils.action_formatting(params.env_tag, agent['agent'](agent_input))

        obs, reward, done, info = env.step(action)
        t += 1

        if t >= params.max_episode_len:
          done = True

        if "Ant" in params.env_tag:
          CoM = np.array([env.env.data.qpos[:2]])
          if np.any(np.abs(CoM) >= np.array([3, 3])):
            done = True

      state = env.render(mode='rgb_array')
      state = state/np.max((np.max(state), 1))
      state = selector.subsample(torch.Tensor(state).permute(2, 0, 1).unsqueeze(0).to(device))