Ejemplo n.º 1
0
    def _normalise_error(self, angular_error: float):
        """
        Given an angle off of a target direction in degrees, calculates a
        normalised error in [0,1]. The angular error is firstly transformed
        to interval [-180,180] to account for the fact the agent can turn
        left or right to face the target.

        :param angular_error: float, angle off target in degrees
        :return: float, normalised error in [0,1]
        """
        reduced_angle_error = abs(reduce_reflex_angle_deg(angular_error))
        return super()._normalise_error(reduced_angle_error)
Ejemplo n.º 2
0
    def test_observe_first_state_correct_track_error(self):
        self.setUp()
        sim = SimStub.make_valid_state_stub(self.task)
        _ = self.task.observe_first_state(sim)

        track_deg = prp.Vector2.from_sim(sim).heading_deg()
        target_track_deg = sim[self.task.target_track_deg]
        error_deg = track_deg - target_track_deg
        expected_acute_error_deg = utils.reduce_reflex_angle_deg(error_deg)
        actual_error_deg = sim[self.task.track_error_deg]

        self.assertAlmostEqual(expected_acute_error_deg, actual_error_deg)
Ejemplo n.º 3
0
    def _get_reward(self, sim: Simulation, last_state: NamedTuple, action: NamedTuple, new_state: NamedTuple) -> float:
        # Get negative reward proportional to normalised heading and altitude errors
        track_deg = prp.Vector2(last_state.velocities_v_east_fps, last_state.velocities_v_north_fps).heading_deg()
        normalised_error_track_deg = math.fabs(utils.reduce_reflex_angle_deg(track_deg - self.INITIAL_HEADING_DEG)) / 180.0
        normalised_altitude_error = min(math.fabs(last_state.position_h_sl_ft - self.TARGET_ALTITUDE_FT) / self.INITIAL_ALTITUDE_FT, 1.0)
        target_reward = - normalised_error_track_deg - normalised_altitude_error

        # Get negative reward proportional to normalised speed angles and vertical speed
        normalised_angle_speed = min((math.fabs(last_state.velocities_p_rad_sec) + math.fabs(last_state.velocities_q_rad_sec) + math.fabs(last_state.velocities_r_rad_sec)) / (3*2*math.pi), 1.0)
        normalised_vertical_speed = min(math.fabs(last_state.velocities_v_down_fps) / self.INITIAL_ALTITUDE_FT, 1.0)
        stabilisation_reward = - math.exp(- sim[self.nb_episodes] / 100) * (normalised_angle_speed + normalised_vertical_speed)
        
        return target_reward + stabilisation_reward
Ejemplo n.º 4
0
 def _update_track_error(self, sim: Simulation):
     v_north_fps, v_east_fps = sim[prp.v_north_fps], sim[prp.v_east_fps]
     track_deg = prp.Vector2(v_east_fps, v_north_fps).heading_deg()
     target_track_deg = sim[self.target_track_deg]
     error_deg = utils.reduce_reflex_angle_deg(track_deg - target_track_deg)
     sim[self.track_error_deg] = error_deg
Ejemplo n.º 5
0
 def _update_heading_error(self, sim: Simulation):
     error_deg = utils.reduce_reflex_angle_deg(
         sim[self.target_heading_deg] - sim[prp.heading_deg])
     sim[self.heading_error_deg] = error_deg