def evaluate_agent(self, agent): """ This function evaluates the agent in the environment. This function should be run in parallel :param agent: agent to evaluate :return: """ done = False cumulated_reward = 0 obs = self.env.reset() t = 0 while not done: if 'FastsimSimpleNavigation' in self.params.env_tag: agent_input = [ t / self.params.max_episode_len, obs ] # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent elif 'Ant' in self.params.env_tag: agent_input = [t] else: agent_input = [t / self.params.max_episode_len] action = utils.action_formatting(self.params.env_tag, agent['agent'](agent_input)) obs, reward, done, info = self.env.step(action) t += 1 cumulated_reward += reward if t >= self.params.max_episode_len: done = True if 'Ant' in self.params.env_tag: CoM = np.array([self.env.robot.body_xyz[:2]]) if np.any(np.abs(CoM) >= np.array([3, 3])): done = True agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done, info) agent['reward'] = cumulated_reward # Extract genome as a feature feat = [] for k in agent['agent'].genome: if isinstance(k, dict): for i in k: if i is not 'name': feat.append(np.array(k[i]).flatten()) else: feat.append(np.array([k])) agent['features'] = [ np.concatenate(np.array(feat)), None ] #PS uses the genome as feature to calculate the BD return cumulated_reward # ---------------------------------------------------
def evaluate_agent(self, agent): """ This function evaluates the agent in the environment. :param agent: agent to evaluate :return: """ done = False cumulated_reward = 0 obs = self.env.reset() t = 0 while not done: if 'FastsimSimpleNavigation' in self.params.env_tag: agent_input = [ t / self.params.max_episode_len, obs ] # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent elif 'Ant' in self.params.env_tag: agent_input = [t] else: agent_input = [t / self.params.max_episode_len] action = utils.action_formatting(self.params.env_tag, agent['agent'](agent_input)) obs, reward, done, info = self.env.step(action) t += 1 cumulated_reward += reward if t >= self.params.max_episode_len: done = True if 'Ant' in self.params.env_tag: CoM = np.array([self.env.robot.body_xyz[:2]]) if np.any(np.abs(CoM) >= np.array([3, 3])): done = True try: state = self.env.render(mode='rgb_array', top_bottom=True) except: state = self.env.render(mode='rgb_array') state = state / np.max((np.max(state), 1)) state = resize(state, (64, 64)) agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done, info) agent['reward'] = cumulated_reward agent['features'] = [ state.ravel(), None ] #Here we use HD images as features to calculate the BD return cumulated_reward
def evaluate_agent(self, agent): """ This function evaluates the agent in the environment. This function should be run in parallel :param agent: agent to evaluate :return: """ done = False cumulated_reward = 0 obs = self.env.reset() t = 0 while not done: if 'FastsimSimpleNavigation' in self.params.env_tag: agent_input = [ t / self.params.max_episode_len, obs ] # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent elif 'Ant' in self.params.env_tag: agent_input = [t] else: agent_input = [t / self.params.max_episode_len] action = utils.action_formatting(self.params.env_tag, agent['agent'](agent_input)) obs, reward, done, info = self.env.step(action) t += 1 cumulated_reward += reward if t >= self.params.max_episode_len: done = True if 'Ant' in self.params.env_tag: CoM = np.array([self.env.robot.body_xyz[:2]]) if np.any(np.abs(CoM) >= np.array([3, 3])): done = True agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done, info) agent['reward'] = cumulated_reward agent['features'] = [ np.random.random(self.params.feature_size), None ] #RBD uses random vectors as features to calculate the BD return cumulated_reward # ---------------------------------------------------
def evaluate_agent(self, agent): """ This function evaluates the agent in the environment. This function should be run in parallel :param agent: agent to evaluate :return: """ done = False cumulated_reward = 0 obs = self.env.reset() t = 0 while not done: if 'FastsimSimpleNavigation' in self.params.env_tag: agent_input = [ t / self.params.max_episode_len, obs ] # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent elif 'Ant' in self.params.env_tag: agent_input = [t] else: agent_input = [t / self.params.max_episode_len] action = utils.action_formatting(self.params.env_tag, agent['agent'](agent_input)) obs, reward, done, info = self.env.step(action) t += 1 cumulated_reward += reward if t >= self.params.max_episode_len: done = True if 'Ant' in self.params.env_tag: CoM = np.array([self.env.robot.body_xyz[:2]]) if np.any(np.abs(CoM) >= np.array([3, 3])): done = True state = self.env.render(mode='rgb_array', top_bottom=True) state = state / np.max((np.max(state), 1)) agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done, info) agent['reward'] = cumulated_reward # Here we use instead the features of the AE to calculate the BD. This is done outside this function, in update_agents return state, None, cumulated_reward # TODO check why there is a None here
def evaluate_agent_xy(self): print('Calculating agent XY final pose') with progressbar.ProgressBar(max_value=len(self.pop)) as bar: for agent_idx, agent in enumerate(self.pop): done = False t = 0 obs = self.env.reset() while not done: if self.render_test: self.env.render() if 'FastsimSimpleNavigation' in self.params.env_tag: agent_input = [ obs, t / self.params.max_episode_len ] # Observation and time. The time is used to see when to stop the action. TODO move the action stopping outside of the agent elif 'Ant' in self.params.env_tag: # TODO metti questi anche nelle baselines agent_input = t else: agent_input = t / self.params.max_episode_len action = utils.action_formatting( self.params.env_tag, agent['agent'](agent_input)) obs, reward, done, info = self.env.step(action) t += 1 if t >= self.params.max_episode_len: done = True if 'Ant' in self.params.env_tag: CoM = np.array([ self.env.env.data.qpos[:2] ]) #CoM = np.array([self.env.robot.body_xyz[:2]]) if np.any(np.abs(CoM) >= np.array([3, 3])): done = True agent['bs'] = utils.extact_hd_bs(self.env, obs, reward, done, info) bar.update(agent_idx)
saved_robot_pose.append(np.array(info['robot_pos'][:2])) images = np.array(images) with open('./maze_video_images3.npy', 'wb') as file: np.save(file, images) if 'Billiard' in params.env_tag: env.params.SHOW_ARM_IN_ARRAY = True saved_balls_pose = np.array(saved_balls_pose) * np.array([100., -100.]) + np.array([150., 150.]) saved_joints_pose = np.array(saved_joints_pose) point_pose = np.array([np.sin(saved_joints_pose[:, 0]) + .9 * np.cos(np.pi / 2. - saved_joints_pose[:, 1] - saved_joints_pose[:, 0]), np.cos(saved_joints_pose[:, 0]) + .9 * np.sin(np.pi / 2. - saved_joints_pose[:, 1] - saved_joints_pose[:, 0])]).transpose() point_pose = point_pose * np.array([-100., -100.]) + np.array([150., 300.]) else: saved_robot_pose = np.array(saved_robot_pose) f_pose = utils.extact_hd_bs(env, obs, reward, done, info) state = env.render(mode='rgb_array')#, top_bottom=True) state = state / np.max((np.max(state), 1)) plt.figure() if 'Billiard' in params.env_tag: for i in range(state.shape[0]): for j in range(state.shape[1]): if np.all(state[i, j] == np.zeros(3)): state[i, j] = np.ones(3) plt.imshow(state) plt.plot(saved_balls_pose[:, 0], saved_balls_pose[:, 1], 'r-') plt.plot(point_pose[:, 0], point_pose[:, 1], 'b-') elif 'Fastsim' in params.env_tag: state = 1 - state