コード例 #1
0
ファイル: base.py プロジェクト: dingyiming0427/goalgail
 def step(self, action):
     observation, reward, done, info = ProxyEnv.step(self, action)
     info['reward_inner'] = reward_inner = self.inner_weight * reward
     obsIdx = observation[self.idx, ]
     info['distance'] = dist = np.linalg.norm(obsIdx - self.current_goal)
     info['reward_dist'] = reward_dist = self._compute_dist_reward(obsIdx)
     if self.terminal_bonus and dist <= self.terminal_eps:
         # print("*****done!!*******")
         done = True
         reward_dist += self.terminal_bonus
     return (self.get_current_obs(), reward_dist + reward_inner, done, info)
コード例 #2
0
ファイル: goal_env.py プロジェクト: dingyiming0427/goalgail
    def step(self, action):
        xy_pos = self.transform_to_goal_space(
            self.wrapped_env.get_current_obs())
        observation, reward, done, info = ProxyEnv.step(self, action)
        info['reward_inner'] = reward_inner = self.inner_weight * reward
        # print("REWARD INNER", reward)
        if 'distance' not in info:
            info['distance'] = dist = self.dist_to_goal(observation)
            info['reward_dist'] = reward_dist = self.compute_dist_reward(
                observation)
            info['goal_reached'] = 1.0 * self.is_goal_reached(observation)
        else:
            # modified so that inner environment can pass in goal via step
            dist = info['distance']
            # info['goal_reached'] = 1.0 * (dist < self.terminal_eps)
            info['goal_reached'] = 1.0 * self.is_goal_reached(observation)
            info[
                'reward_dist'] = reward_dist = -self.extend_dist_rew_weight * dist

        info['goal'] = self.current_goal
        info['obs2goal'] = self._obs2goal_transform(observation)

        info['xy_pos'] = xy_pos
        info['xy_pos_new'] = self.transform_to_goal_space(observation)

        # print(reward_dist)
        # print(reward_inner)
        # print("step: obs={}, goal={}, dist={}".format(self.append_goal_observation(observation), self.current_goal, dist))
        if self.terminate_env and info['goal_reached']:
            done = True
        if self.append_goal_to_observation:
            observation = self.append_goal_observation(observation)
        if not self.include_goal_obs:
            observation = observation[2:]

        return (observation, reward_dist + reward_inner +
                info['goal_reached'] * self.goal_weight, done, info)