def step(self, action): observation, reward, done, info = ProxyEnv.step(self, action) info['reward_inner'] = reward_inner = self.inner_weight * reward obsIdx = observation[self.idx, ] info['distance'] = dist = np.linalg.norm(obsIdx - self.current_goal) info['reward_dist'] = reward_dist = self._compute_dist_reward(obsIdx) if self.terminal_bonus and dist <= self.terminal_eps: # print("*****done!!*******") done = True reward_dist += self.terminal_bonus return (self.get_current_obs(), reward_dist + reward_inner, done, info)
def step(self, action): xy_pos = self.transform_to_goal_space( self.wrapped_env.get_current_obs()) observation, reward, done, info = ProxyEnv.step(self, action) info['reward_inner'] = reward_inner = self.inner_weight * reward # print("REWARD INNER", reward) if 'distance' not in info: info['distance'] = dist = self.dist_to_goal(observation) info['reward_dist'] = reward_dist = self.compute_dist_reward( observation) info['goal_reached'] = 1.0 * self.is_goal_reached(observation) else: # modified so that inner environment can pass in goal via step dist = info['distance'] # info['goal_reached'] = 1.0 * (dist < self.terminal_eps) info['goal_reached'] = 1.0 * self.is_goal_reached(observation) info[ 'reward_dist'] = reward_dist = -self.extend_dist_rew_weight * dist info['goal'] = self.current_goal info['obs2goal'] = self._obs2goal_transform(observation) info['xy_pos'] = xy_pos info['xy_pos_new'] = self.transform_to_goal_space(observation) # print(reward_dist) # print(reward_inner) # print("step: obs={}, goal={}, dist={}".format(self.append_goal_observation(observation), self.current_goal, dist)) if self.terminate_env and info['goal_reached']: done = True if self.append_goal_to_observation: observation = self.append_goal_observation(observation) if not self.include_goal_obs: observation = observation[2:] return (observation, reward_dist + reward_inner + info['goal_reached'] * self.goal_weight, done, info)