def success_condition(self, achieved_goal, desired_goal):
        success = []
        for key, value in achieved_goal.items():
            # Achieve desired curvature
            if key is ObservationConfig.ObservationType.DLO_CURVATURE.value:
                success.append(goal_distance(value, desired_goal[key]) < self.kwargs['dlo_curvature_threshold'])
            # Achieve desired ee positions
            elif key is ObservationConfig.ObservationType.EE_POSITION.value:
                for ee_key, ee_value in value.items():
                    success.append(
                        goal_distance(ee_value, desired_goal[key][ee_key]) < self.kwargs['ee_position_threshold'])

        return all(success)
Beispiel #2
0
    def reward_function(self, achieved_goal, desired_goal, info):
        curvature_distance = goal_distance(achieved_goal['dlo_curvature'],
                                           desired_goal['dlo_curvature'])
        info['distance'] = curvature_distance

        if self.is_success(achieved_goal, desired_goal):
            reward = self.reward_range[1]
        else:
            reward = -curvature_distance

        return reward, info
Beispiel #3
0
 def reward_function(self, achieved_goal, desired_goal, info):
     curvature_distance = goal_distance(achieved_goal['dlo_curvature'], desired_goal['dlo_curvature'])
     info['distance'] = curvature_distance
     if not self.is_success(achieved_goal, desired_goal):
         # penalize large distances to goal
         reward = np.clip(-curvature_distance, self.reward_range[0], self.reward_range[1])
     else:
         # reward achieving goal, but penalize non-zero velocity
         velocity_scale = 100
         ee_velocity = np.linalg.norm(achieved_goal['ee_velocity']['gripper_right'])
         reward = np.clip(self.reward_range[1] - velocity_scale*ee_velocity, 0, self.reward_range[1])
     return reward, info
Beispiel #4
0
 def success_condition(self, achieved_goal, desired_goal):
     curvature_distance = goal_distance(achieved_goal['dlo_curvature'],
                                        desired_goal['dlo_curvature'])
     return bool(
         curvature_distance < self.kwargs['dlo_curvature_threshold'])