Esempio n. 1
0
    def __del__(self):
        print('Arm random goal destructor called')
        if self.is_validation:
            # If fail_points is not defined yet, do nothing
            if not hasattr(self, 'fail_points'):
                return

            def print_list(list_):
                for item in list_:
                    print(item)

            print('Saving failed goals to data.pkl')

            with open('failed_points.pkl', 'wb') as f:
                pickle.dump(self.fail_points, f, pickle.HIGHEST_PROTOCOL)
            print('Failed goals, target coords: ')
            print_list(enumerate([x for x, y in self.fail_points]))

            # We spawn gazebo markers here to exploit the fact that spinningup test_policy somehow doesn't close gazebo after it's done
            k = 100
            ut_gazebo.create_marker(self.node, 0.0, 0.0, 0.0, diameter=0.004)
            for x, y in self.fail_points:
                ut_gazebo.create_marker(self.node,
                                        x[0],
                                        x[1],
                                        x[2],
                                        diameter=0.001,
                                        id=k)
                k += 1
    def __init__(self, node: rclpy.node.Node, robot, **kwargs):
        super().__init__(node, robot, **kwargs)

        # The target coords is currently arbitrarily set to some point achievable
        # This is the target for grip_end_point when target joint values are: [1.00, -1.00, 1.00]
        target_x = 0.10175
        target_y = -0.05533
        target_z = 0.1223
        self.target_coords = numpy.array([target_x, target_y, target_z])
        if isinstance(robot, LobotArmSim):  # Check if is gazebo or not
            # Spawn the target marker if it is gazebo
            print(f"Fixed goal: spawning to: {self.target_coords}")
            ut_gazebo.create_marker(node, target_x, target_y, target_z, diameter=0.004)
        self.previous_coords = numpy.array([0.0, 0.0, 0.0])
 def reset(self):
     self.previous_coords = numpy.array([0.0, 0.0, 0.0])
     if isinstance(self.robot, LobotArmSim):  # Check if is simulator or real
         # Move the target marker if it is gazebo
         print(f'Moving to {(self.target_coords[0], self.target_coords[1], self.target_coords[2])}')
         spawn_success = ut_gazebo.create_marker(self.node, self.target_coords[0],
                                                 self.target_coords[1], self.target_coords[2], diameter=0.004)
Esempio n. 4
0
 def reset(self):
     self.reward_noise_sigma = self.original_reward_noise_sigma
     # Set initial coordinates
     initial_joint_values = numpy.array([0.0, 0.0, 0.0])
     res = self._fk.calculate('world', 'grip_end_point',
                              initial_joint_values)
     self.previous_coords = numpy.array(
         [res.translation.x, res.translation.y, res.translation.z])
     self.initial_coords = numpy.array(
         [res.translation.x, res.translation.y, res.translation.z])
     self.__reset_count += 1
     if self.__reset_count % self.episodes_per_goal == 0:
         self.target_coords_ik, self.target_coords = self.__get_target_coords(
         )
         print(
             f'Moving to [{self.target_coords[0]:.6f}, {self.target_coords[1]:.6f}, {self.target_coords[2]:.6f}], '
             f'Given by joint values [{self.target_coords_ik[0]:.6f}, {self.target_coords_ik[1]:.6f}, {self.target_coords_ik[2]:.6f}]'
         )
         if isinstance(self.robot,
                       LobotArmSim):  # Check if is simulator or real
             # Move the target marker if it is gazebo
             spawn_success = ut_gazebo.create_marker(self.node,
                                                     self.target_coords[0],
                                                     self.target_coords[1],
                                                     self.target_coords[2],
                                                     diameter=0.004)
    def __init__(self,
                 node: rclpy.node.Node,
                 robot,
                 task_kwargs: Dict = None,
                 max_time_step: int = 500):
        if task_kwargs is None:
            task_kwargs = {}
        self.node = node
        self.robot = robot
        self.accepted_dist_to_bounds = task_kwargs.get(
            'accepted_dist_to_bounds', 0.001)
        self.accepted_error = task_kwargs.get('accepted_error', 0.001)
        self.reach_target_bonus_reward = task_kwargs.get(
            'reach_target_bonus_reward', 0.0)
        self.reach_bounds_penalty = task_kwargs.get('reach_bounds_penalty',
                                                    0.0)
        self.contact_penalty = task_kwargs.get('contact_penalty', 0.0)
        self.episodes_per_goal = task_kwargs.get('episodes_per_goal', 1)
        self.goal_buffer_size = task_kwargs.get('goal_buffer_size', 20)
        self.goal_from_buffer_prob = task_kwargs.get('goal_from_buffer_prob',
                                                     0.0)
        print(
            f'-------------------------------Setting task parameters-------------------------------'
        )
        print(
            'accepted_dist_to_bounds: %f   # Allowable distance to joint limits'
            % self.accepted_dist_to_bounds)
        print(
            'accepted_error: %f            # Allowable distance from target coordinates'
            % self.accepted_error)
        print(
            'reach_target_bonus_reward: %f # Bonus reward upon reaching target'
            % self.reach_target_bonus_reward)
        print(
            'reach_bounds_penalty: %f      # Reward penalty when reaching joint limit'
            % self.reach_bounds_penalty)
        print('contact_penalty: %f           # Reward penalty for collision' %
              self.contact_penalty)
        print(
            'episodes_per_goal: %d         # Number of episodes before generating another random goal'
            % self.episodes_per_goal)
        print(
            'goal_buffer_size: %d          # Number goals to store in buffer to be reused later'
            % self.goal_buffer_size)
        print(
            'goal_from_buffer_prob: %f     # Probability of selecting a random goal from the goal buffer, value between 0 and 1'
            % self.goal_from_buffer_prob)
        print(
            f'-------------------------------------------------------------------------------------'
        )

        assert self.accepted_dist_to_bounds >= 0.0, 'Allowable distance to joint limits should be positive'
        assert self.accepted_error >= 0.0, 'Accepted error to end coordinates should be positive'
        assert self.reach_target_bonus_reward >= 0.0, 'Reach target bonus reward should be positive'
        assert self.contact_penalty >= 0.0, 'Contact penalty should be positive'
        assert self.reach_bounds_penalty >= 0.0, 'Reach bounds penalty should be positive'
        assert isinstance(
            self.episodes_per_goal, int
        ), f'Episodes per goal should be an integer, current type: {type(self.episodes_per_goal)}'
        assert self.episodes_per_goal >= 1, 'Episodes per goal be greather than or equal to 1, i.e. episodes_per_goal >= 1'
        assert isinstance(
            self.goal_buffer_size, int
        ), f'Goal buffer size should be an integer, current type: {type(self.goal_buffer_size)}'
        assert self.goal_buffer_size > 0, 'Goal buffer size should be greather than or equal to 1, i.e. episodes_per_goal >= 1'
        assert 0 <= self.goal_from_buffer_prob <= 1, 'Probability of selecting goal from buffer should be between 0 and 1'

        self._max_time_step = max_time_step
        lobot_desc_share_path = get_package_share_directory(
            'lobot_description')
        arm_urdf_path = os.path.join(lobot_desc_share_path,
                                     'robots/arm_standalone.urdf')
        self._fk = fk.ForwardKinematics(arm_urdf_path)

        self.target_coords = self.__generate_target_coords()
        self.coords_buffer = deque(maxlen=self.goal_buffer_size)
        self.coords_buffer.append(self.target_coords)
        target_x = self.target_coords[0]
        target_y = self.target_coords[1]
        target_z = self.target_coords[2]
        if isinstance(robot, LobotArmSim):  # Check if is gazebo
            # Spawn the target marker if it is gazebo
            print(f'Spawning to: {(target_x, target_y, target_z)}')
            spawn_success = ut_gazebo.create_marker(node,
                                                    target_x,
                                                    target_y,
                                                    target_z,
                                                    diameter=0.004)
        self.previous_coords = numpy.array([0.0, 0.0, 0.0])
        self.__reset_count = 0
Esempio n. 6
0
    def compute_reward(self, joint_states: numpy.ndarray,
                       arm_state: ArmState) -> Tuple[float, Dict]:
        assert len(
            joint_states
        ) == 3, f'Expected 3 values for joint states, but got {len(joint_states)} values instead'
        coords_get_result = self.__get_coords(joint_states)
        assert len(
            coords_get_result
        ) == 3, f'Expected 3 values after getting coordinates, but got {len(coords_get_result)} values instead'
        current_coords = coords_get_result

        assert arm_state != ArmState.Undefined, f'Arm state cannot be undefined, please check logic'

        # Give 0 reward on initial state
        # if numpy.array_equal(self.previous_coords, numpy.array([0.0, 0.0, 0.0])):
        #     # print('Initial state detected, giving 0 reward')
        #     reward = 0.0
        # else:
        reward = self.__calc_dist_change(self.previous_coords, current_coords)

        # normalise rewards
        mag_target = numpy.linalg.norm(self.initial_coords -
                                       self.target_coords)
        normalised_reward = reward / mag_target

        # Scale up normalised reward slightly such that the total reward is between 0 and 10 instead of between 0 and 1
        normalised_reward *= 10

        # Scale up reward so that it is not so small if not normalised
        normal_scaled_reward = reward * 100

        # Calculate current distance to goal (for information purposes only)
        dist = numpy.linalg.norm(current_coords - self.target_coords)

        reward_info = {
            'normalised_reward': normalised_reward,
            'normal_reward': normal_scaled_reward,
            'distance_to_goal': dist,
            'target_coords': self.target_coords,
            'current_coords': current_coords
        }

        if self.normalise_reward:
            reward = normalised_reward
        else:
            reward = normal_scaled_reward

        # Calculate exponential reward component
        if self.exp_rew_scaling is not None:
            exp_reward = self.__calc_exponential_reward(
                self.previous_coords, current_coords)
            reward_info['exp_reward'] = exp_reward
            reward += exp_reward
        else:
            reward_info['exp_reward'] = 0.0

        # Add reward noise
        if self.reward_noise_mu is not None and self.reward_noise_sigma is not None:
            rew_noise = numpy.random.normal(self.reward_noise_mu,
                                            self.reward_noise_sigma)
            if self.reward_noise_decay is not None:
                self.reward_noise_sigma = self.reward_noise_sigma * math.exp(
                    -self.reward_noise_decay)
            reward_info['rew_noise'] = rew_noise
            reward += rew_noise
        else:
            reward_info['rew_noise'] = 0.0

        self.previous_coords = current_coords

        # Reward shaping logic

        # Check if it has reached target destination
        if arm_state == ArmState.Reached:
            # if reached target destination and is continuous run, we generate another set of coordinates
            # This has to be after the __calc_dist_change function because that uses self.target_coords to calculate
            if self.continuous_run:
                self.target_coords_ik, self.target_coords = self.__get_target_coords(
                )
                print(
                    f'Moving to [{self.target_coords[0]:.6f}, {self.target_coords[1]:.6f}, {self.target_coords[2]:.6f}], '
                    f'Given by joint values [{self.target_coords_ik[0]:.6f}, {self.target_coords_ik[1]:.6f}, {self.target_coords_ik[2]:.6f}]'
                )
                ut_gazebo.create_marker(self.node,
                                        self.target_coords[0],
                                        self.target_coords[1],
                                        self.target_coords[2],
                                        diameter=0.004)
            reward += self.reach_target_bonus_reward

        # Check if it has approached any joint limits
        if arm_state == ArmState.ApproachJointLimits:
            reward -= self.reach_bounds_penalty

        # Check for collision
        if arm_state == ArmState.Collision:
            reward -= self.contact_penalty

        if arm_state == ArmState.Timeout:
            reward -= self.timeout_penalty

        return reward, reward_info
Esempio n. 7
0
    def __init__(self,
                 node: rclpy.node.Node,
                 robot,
                 max_time_step: int = 500,
                 accepted_dist_to_bounds=0.001,
                 accepted_error=0.001,
                 reach_target_bonus_reward=0.0,
                 reach_bounds_penalty=0.0,
                 contact_penalty=0.0,
                 timeout_penalty=0.0,
                 episodes_per_goal=1,
                 goal_buffer_size=20,
                 goal_from_buffer_prob=0.0,
                 num_adjacent_goals=0,
                 is_validation=False,
                 random_goal_seed=None,
                 random_goal_file=None,
                 normalise_reward=False,
                 continuous_run=False,
                 reward_noise_mu=None,
                 reward_noise_sigma=None,
                 reward_noise_decay=None,
                 exp_rew_scaling=None):
        self.node = node
        self.robot = robot
        self._max_time_step = max_time_step
        self.accepted_dist_to_bounds = accepted_dist_to_bounds
        self.accepted_error = accepted_error
        self.reach_target_bonus_reward = reach_target_bonus_reward
        self.reach_bounds_penalty = reach_bounds_penalty
        self.contact_penalty = contact_penalty
        self.timeout_penalty = timeout_penalty
        self.episodes_per_goal = episodes_per_goal
        self.goal_buffer_size = goal_buffer_size
        self.goal_from_buffer_prob = goal_from_buffer_prob
        self.num_adjacent_goals = num_adjacent_goals
        self.is_validation = is_validation
        self.random_goal_seed = random_goal_seed
        self.random_goal_file = random_goal_file
        self.normalise_reward = normalise_reward
        self.continuous_run = continuous_run
        self.reward_noise_mu = reward_noise_mu
        self.reward_noise_sigma = reward_noise_sigma
        self.original_reward_noise_sigma = reward_noise_sigma
        self.reward_noise_decay = reward_noise_decay
        self.exp_rew_scaling = exp_rew_scaling
        print(
            f'-------------------------------Setting task parameters-------------------------------'
        )
        print(
            'max_time_step: %8d               # Maximum time step before stopping the episode'
            % self._max_time_step)
        print(
            'accepted_dist_to_bounds: %8.7f    # Allowable distance to joint limits (radians)'
            % self.accepted_dist_to_bounds)
        print(
            'accepted_error: %8.7f             # Allowable distance from target coordinates (metres)'
            % self.accepted_error)
        print(
            'reach_target_bonus_reward: %8.7f # Bonus reward upon reaching target'
            % self.reach_target_bonus_reward)
        print(
            'reach_bounds_penalty: %8.7f      # Reward penalty when reaching joint limit'
            % self.reach_bounds_penalty)
        print(
            'contact_penalty: %8.7f           # Reward penalty for collision' %
            self.contact_penalty)
        print(
            'timeout_penalty: %8.7f           # Reward penalty for collision' %
            self.timeout_penalty)
        print(
            'episodes_per_goal: %8d           # Number of episodes before generating another random goal'
            % self.episodes_per_goal)
        print(
            'goal_buffer_size: %8d            # Number goals to store in buffer to be reused later'
            % self.goal_buffer_size)
        print(
            'goal_from_buffer_prob: %8.7f      # Probability of selecting a random goal from the goal buffer, value between 0 and 1'
            % self.goal_from_buffer_prob)
        print(
            'num_adjacent_goals: %8d          # Number of nearby goals to be generated for each randomly generated goal '
            % self.num_adjacent_goals)
        print(
            f'random_goal_seed: {str(self.random_goal_seed):8}            # Seed used to generate the random goals'
        )
        print(
            f'random_goal_file: {self.random_goal_file}       # Path to the numpy save file containing the random goals'
        )
        print(
            'is_validation: %8r               # Whether this is a validation run, if true will print which points failed and how many reached'
            % self.is_validation)
        print(
            'normalise_reward: %8r            # Perform reward normalisation, this happens before reward bonus and penalties'
            % self.normalise_reward)
        print(
            'continuous_run: %8r              # Continuously run the simulation, even after it reaches the destination'
            % self.continuous_run)
        print(
            f'reward_noise_mu: {self.reward_noise_mu}            # Reward noise mean (reward noise follows gaussian distribution)'
        )
        print(
            f'reward_noise_sigma: {self.reward_noise_sigma}         # Reward noise standard deviation, recommended 0.5'
        )
        print(
            f'reward_noise_decay: {self.reward_noise_decay}            # Constant for exponential reward noise decay (recommended 0.31073, decays to 0.002 in 20 steps)'
        )
        print(
            f'exp_rew_scaling: {self.exp_rew_scaling}            # Constant for exponential reward scaling (None by default, recommended 5.0, cumulative exp_reward = 29.48)'
        )
        print(
            f'-------------------------------------------------------------------------------------'
        )

        assert self.accepted_dist_to_bounds >= 0.0, 'Allowable distance to joint limits should be positive'
        assert self.accepted_error >= 0.0, 'Accepted error to end coordinates should be positive'
        assert self.reach_target_bonus_reward >= 0.0, 'Reach target bonus reward should be positive'
        assert self.contact_penalty >= 0.0, 'Contact penalty should be positive'
        assert self.timeout_penalty >= 0.0, 'Timeout penalty should be positive'
        assert self.reach_bounds_penalty >= 0.0, 'Reach bounds penalty should be positive'
        assert isinstance(
            self.episodes_per_goal, int
        ), f'Episodes per goal should be an integer, current type: {type(self.episodes_per_goal)}'
        assert self.episodes_per_goal >= 1, 'Episodes per goal be greather than or equal to 1, i.e. episodes_per_goal >= 1'
        assert isinstance(
            self.goal_buffer_size, int
        ), f'Goal buffer size should be an integer, current type: {type(self.goal_buffer_size)}'
        assert self.goal_buffer_size > 0, 'Goal buffer size should be greather than or equal to 1, i.e. episodes_per_goal >= 1'
        assert 0 <= self.goal_from_buffer_prob <= 1, 'Probability of selecting goal from buffer should be between 0 and 1'
        assert isinstance(
            self.num_adjacent_goals, int
        ), f'Number of adjacent goals should be an integer, current type: {type(self.num_adjacent_goals)}'
        assert self.num_adjacent_goals >= 0, f'Number of adjacent goals should be positive, current value: {self.num_adjacent_goals}'
        if self.random_goal_seed is not None:
            assert isinstance(
                self.random_goal_seed, int
            ), f'Random goal seed should be an integer, current type: {type(self.random_goal_seed)}'
        if self.random_goal_file is not None:
            assert os.path.exists(self.random_goal_file)
        if reward_noise_decay is not None:
            assert self.reward_noise_mu is not None and self.reward_noise_sigma is not None
            assert isinstance(self.reward_noise_mu, float) and isinstance(
                self.reward_noise_sigma, float)
        if exp_rew_scaling is not None:
            assert isinstance(
                self.exp_rew_scaling, float
            ), f'Exponential reward scaling factor should be a float, current type: {type(self.exp_rew_scaling)}'

        self._max_time_step = max_time_step
        lobot_desc_share_path = get_package_share_directory(
            'lobot_description')
        arm_urdf_path = os.path.join(lobot_desc_share_path,
                                     'robots/arm_standalone.urdf')
        self._fk = fk.ForwardKinematics(arm_urdf_path)

        self.coords_buffer = deque(maxlen=self.goal_buffer_size)
        self.target_coords_ik, self.target_coords = self.__get_target_coords()
        target_x = self.target_coords[0]
        target_y = self.target_coords[1]
        target_z = self.target_coords[2]
        if isinstance(robot, LobotArmSim):  # Check if is gazebo
            # Spawn the target marker if it is gazebo
            print(f'Spawning to: {(target_x, target_y, target_z)}')
            spawn_success = ut_gazebo.create_marker(node,
                                                    target_x,
                                                    target_y,
                                                    target_z,
                                                    diameter=0.004)
        self.previous_coords = None
        self.initial_coords = None
        self.__reset_count: int = 0
        self.fail_points = []
        self.__reach_count: int = 0