def compute_score(self,
                      states_list,
                      timestep=0,
                      print_states=True,
                      print_additionnal_info=True):
        """
        Available information:
        x : horizontal position
        y : vertical position
        angle : angle relative to the vertical (negative = right, positive = left)
        first_leg_contact : Left leg touches ground
        second_leg_contact : Right leg touches ground
        throttle : Throttle intensity
        gimbal : Gimbal angle relative to the rocket axis
        velocity_x : horizontal velocity (negative : going Left, positive : going Right)
        velocity_y : vertical velocity (negative : going Down, positive : going Up)
        angular_velocity : angular velocity (negative : turning anti-clockwise, positive : turning clockwise)
        distance : distance from the center of the ship
        velocity : norm of the velocity vector (velocity_x,velocity_y)
        landed : both legs touching the ground
        landed_full : both legs touching ground for a second (60frames)
        states : dictionnary containing all variables in the state vector. For display purpose
        additionnal_information : dictionnary containing additionnal information. For display purpose

        """
        # states information extraction
        (
            x,
            y,
            angle,
            first_leg_contact,
            second_leg_contact,
            throttle,
            gimbal,
            velocity_x,
            velocity_y,
            angular_velocity,
            distance,
            velocity,
            landed,
            landed_full,
            states,
            additionnal_information,
        ) = info_extractor(states_list, self.env)
        if self.env.environment.landed_ticks > 59:
            score = 1 - abs(x)
        else:
            score = 0
        return score
    def reward_function(self,
                        states_list,
                        timestep=0,
                        print_states=True,
                        print_additionnal_info=True):
        ########## WORK NEEDED #############
        ### You need to shape the reward ###
        ####################################
        """
        Available information:
        x : horizontal position
        y : vertical position
        angle : angle relative to the vertical (negative = right, positive = left)
        first_leg_contact : Left leg touches ground
        second_leg_contact : Right leg touches ground
        throttle : Throttle intensity
        gimbal : Gimbal angle relative to the rocket axis
        velocity_x : horizontal velocity (negative : going Left, positive : going Right)
        velocity_y : vertical velocity (negative : going Down, positive : going Up)
        angular_velocity : angular velocity (negative : turning anti-clockwise, positive : turning clockwise)
        distance : distance from the center of the ship
        velocity : norm of the velocity vector (velocity_x,velocity_y)
        landed : both legs touching the ground
        landed_full : both legs touching ground for a second (60frames)
        states : dictionnary containing all variables in the state vector. For display purpose
        additionnal_information : dictionnary containing additionnal information. For display purpose

        **Hints**
        Be careful with the sign of the different variables

        Go on and shape the reward !
        """
        # states information extraction
        (
            x,
            y,
            angle,
            first_leg_contact,
            second_leg_contact,
            throttle,
            gimbal,
            velocity_x,
            velocity_y,
            angular_velocity,
            distance,
            velocity,
            landed,
            landed_full,
            states,
            additionnal_information,
        ) = info_extractor(states_list, self.env)

        ######## REWARD SHAPING ###########

        # state variables for reward
        groundcontact = first_leg_contact or second_leg_contact
        reward = 0

        # let's start with rewards in case of failure
        if not landed_full:
            reward = 0 - abs(angle) - abs(x) - abs(throttle) / abs(y) - abs(
                velocity) / abs(y) - (velocity * timestep)
            reward = reward / 100
            print('\ry: {}'.format(y), end='')
            #print('\rflying: {}'.format(reward), end='')
            # if groundcontact:
            #     # case in which the rocket landed (one or both legs), but didn't stabilize (broken).
            #     # -> we set the reward to 0.5 (as ground contact is good), and substract a value depending on angle,
            #     # horizontal distance, velocity and angular velocity, i.e. the variables we want to bring to 0
            #     # (ingredients for a successful landing!). We clip this value to 1, so we don't go under -0.5.
            #     reward = 1 - min(1, (abs(x) - abs(angle) - abs(angular_velocity) -

#                    abs(angle*angular_velocity) - abs(throttle)/abs(y))/100)
#     print('\rlanded improperly: {}'.format(reward), end='')

# else:
#  # case in which the rocket is still flying.
#  # -> we want to incitate the rocket to go towards the center and to stabilize, so we
#  # start from reward = 0 and we substract a value that we want to be minimized. We clip
#  # this value to make sure the reward doesn't go under -1.
#  reward = 0 - (((abs(x) +
#               abs(angle) + abs(angular_velocity) + abs(angle*angular_velocity) +
#               abs(throttle)/abs(y)) / 100) * np.log(timestep))
#  print('\rflying: {}'.format(reward), end='')

        # and now the rewards in case of success
        if landed_full:
            reward = 10000
            print('\rlanded properly: {}'.format(reward), end='')
        # if distance > 0:
        #  # case in which the rocket didn't land in the center.
        #  # -> it's a success: we set the reward to 1 and we substract a value depending on
        #  # the distance from the center of the platform, but not going under 0
        #  reward += 10000 #- abs(x)**2)
        #  print('\rlanded uncentered: {}'.format(reward), end='')
        # else:
        #  # full successful landing, right in the center!
        #  # -> Highest reward, +1
        #  reward += 10000
        #  print('\rlanded perfectly: {}'.format(reward), end='')

        #reward = np.clip(reward, -1, 1) #just in case - normally it should already be clipped above

        display_info(states,
                     additionnal_information,
                     reward,
                     timestep,
                     verbose=False)

        return reward
Beispiel #3
0
    def reward_function(self,
                        states_list,
                        timestep=0,
                        print_states=True,
                        print_additionnal_info=True):
        ########## WORK NEEDED #############
        ### You need to shape the reward ###
        ####################################
        """
        Available information:
        x : horizontal position
        y : vertical position
        angle : angle relative to the vertical (negative = right, positive = left)
        first_leg_contact : Left leg touches ground
        second_leg_contact : Right leg touches ground
        throttle : Throttle intensity
        gimbal : Gimbal angle relative to the rocket axis
        velocity_x : horizontal velocity (negative : going Left, positive : going Right)
        velocity_y : vertical velocity (negative : going Down, positive : going Up)
        angular_velocity : angular velocity (negative : turning anti-clockwise, positive : turning clockwise)
        distance : distance from the center of the ship
        velocity : norm of the velocity vector (velocity_x,velocity_y)
        landed : both legs touching the ground
        landed_full : both legs touching ground for a second (60frames)
        states : dictionnary containing all variables in the state vector. For display purpose
        additionnal_information : dictionnary containing additionnal information. For display purpose

        **Hints**
        Be careful with the sign of the different variables

        Go on and shape the reward !
        """
        # states information extraction
        (
            x,
            y,
            angle,
            first_leg_contact,
            second_leg_contact,
            throttle,
            gimbal,
            velocity_x,
            velocity_y,
            angular_velocity,
            distance,
            velocity,
            landed,
            landed_full,
            states,
            additionnal_information,
        ) = info_extractor(states_list, self.env)

        ######## REWARD SHAPING ###########
        # reward definition (per timestep) : You have to fill it !
        reward = -1

        display_info(states,
                     additionnal_information,
                     reward,
                     timestep,
                     verbose=False)

        return reward
Beispiel #4
0
    def reward_function(self,
                        states_list,
                        timestep=0,
                        print_states=True,
                        print_additionnal_info=True):
        ########## WORK NEEDED #############
        ### You need to shape the reward ###
        ####################################
        """
        Available information:
        x : horizontal position
        y : vertical position
        angle : angle relative to the vertical (negative = right, positive = left)
        first_leg_contact : Left leg touches ground
        second_leg_contact : Right leg touches ground
        throttle : Throttle intensity
        gimbal : Gimbal angle relative to the rocket axis
        velocity_x : horizontal velocity (negative : going Left, positive : going Right)
        velocity_y : vertical velocity (negative : going Down, positive : going Up)
        angular_velocity : angular velocity (negative : turning anti-clockwise, positive : turning clockwise)
        distance : distance from the center of the ship
        velocity : norm of the velocity vector (velocity_x,velocity_y)
        landed : both legs touching the ground
        landed_full : both legs touching ground for a second (60frames)
        states : dictionnary containing all variables in the state vector. For display purpose
        additionnal_information : dictionnary containing additionnal information. For display purpose

        **Hints**
        Be careful with the sign of the different variables

        Go on and shape the reward !
        """
        # states information extraction
        (
            x,
            y,
            angle,
            first_leg_contact,
            second_leg_contact,
            throttle,
            gimbal,
            velocity_x,
            velocity_y,
            angular_velocity,
            distance,
            velocity,
            landed,
            landed_full,
            states,
            additionnal_information,
        ) = info_extractor(states_list, self.env)

        #if timestep%10 == 0:
        #    print(f"velocity_y {velocity_y}")
        #    print(f"angle {angle}")
        ######## REWARD SHAPING ###########
        # reward definition (per timestep) : You have to fill it !
        shape = 0
        reward = 0
        # Penalty on ill position
        shape -= \
                 .1 * abs(distance) + \
                 0.5 * abs(velocity) + \
                 5 * abs(angle) + \
                 0.15 * abs(angular_velocity) + \
                 10 * abs(x) + \
                 0.5 * max(velocity_y - y, 0)
        #.1 * max((velocity - y), 0)

        # Reward for partial failure scenarios
        shape += 0.1 * (float(first_leg_contact) + float(second_leg_contact))

        if self.prev_shape is not None:
            reward += shape - self.prev_shape
        self.prev_shape = shape

        reward = np.clip(reward, -1, 1)

        if landed_full:
            reward = 100 - 100 * abs(x)

        display_info(states,
                     additionnal_information,
                     reward,
                     timestep,
                     verbose=False)

        return reward