def play(self, pod: PodState) -> PlayOutput: check1 = self.board.checkpoints[pod.nextCheckId] check2 = self.board.get_check(pod.nextCheckId + 1) c1_to_p = (pod.pos - check1) c1_to_p_len = c1_to_p.length() c1_to_c2 = (check2 - check1) c1_to_c2_len = c1_to_c2.length() midpoint = ((c1_to_p / c1_to_c2_len) - (c1_to_c2 / c1_to_c2_len)).normalize() target = check1 if c1_to_p_len > Constants.max_vel() * 6: # Still far away. Aim for a point that will help us turn toward the next check target = target + (midpoint * Constants.check_radius() * 2) # else: We're getting close to the check. Stop fooling around and go to it. # OK, now we've got a target point. Do whatever it takes to get there. pod_to_target = target - pod.pos ang_diff_to_target = math.fabs( clean_angle(math.fabs(pod.angle - pod_to_target.angle()))) if ang_diff_to_target < 2 * Constants.max_turn(): thrust = Constants.max_thrust() elif ang_diff_to_target < 4 * Constants.max_turn(): thrust = (ang_diff_to_target - (4 * Constants.max_turn())) / ( 2 * Constants.max_turn()) * Constants.max_thrust() else: thrust = 0 return PlayOutput(target - (2 * pod.vel), thrust)
def play_to_action(self, thrust: int, angle: float) -> int: """ Given a legal play (angle/thrust), find the nearest discrete action """ thrust_pct = thrust / Constants.max_thrust() angle_pct = (angle + Constants.max_turn()) / (2 * Constants.max_turn()) thrust_idx = math.floor(thrust_pct * (self.num_thrust - 1)) angle_idx = math.floor(angle_pct * (self.num_angle - 1)) return math.floor(thrust_idx * self.num_angle + angle_idx)
def __init__(self, num_thrust: int = 2, num_angle: int = 3): self.num_thrust = num_thrust self.num_angle = num_angle self.num_actions = num_thrust * num_angle thrusts = _arange(0, Constants.max_thrust(), num_thrust) angs = _arange(-Constants.max_turn(), Constants.max_turn(), num_angle) self._action_table = [(int(thr), ang) for thr in thrusts for ang in angs]
def legal_angle(req_angle: float, pod_angle: float) -> float: """ Get the actual angle to apply, given the player's input :param req_angle: Angle that the player requested :param pod_angle: Angle in which the pod is facing :return: Angle to use for calculations (within [-pi, pi]) """ d_angle = within(clean_angle(req_angle - pod_angle), -Constants.max_turn(), Constants.max_turn()) return clean_angle(pod_angle + d_angle)
def test_action_to_output_turn_right(self): action = self.ad.play_to_action(50, Constants.max_turn()) pod_pos = Vec2(100, 100) po = self.ad.action_to_output(action, 1.23, pod_pos) # The thrust should not have changed self.assertEqual(po.thrust, 50) # The pod is at (100, 100), angle 1.23, requested turn max_turn... # If we undo the move and rotate, we should have a vector down the X-axis (i.e. angle 0) rel_target = (po.target - pod_pos).rotate(-1.23 - Constants.max_turn()) self.assertAlmostEqual(rel_target.y, 0) self.assertGreater(rel_target.x, 1)
def re_dcat(board: PodBoard, pod: PodState) -> float: pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos # Scaled distance to next check dist_penalty = pod_to_check.length() / DIST_BASE # Bonus for each check hit. By making it 2 per check, we ensure that the reward is always # higher after hitting a check. (If left at 1, the dist_penalty could be slightly greater # than 1, leading to a DECREASE in reward for hitting a check) checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId # A tiny bit for the angle. This should really be tiny - its purpose is to serve as a # tie-breaker (to prevent the pod from going into orbit around a check). angle = math.fabs(clean_angle(pod_to_check.angle() - pod.angle)) a_penalty = (angle / math.pi) / 10 if angle > Constants.max_turn() else 0 # And finally: this can be important to prevent agents from doing nothing. # The reduction factor is slightly more than the number of turns it takes # (on average) to get from one check to another turn_penalty = pod.turns / 20 return 3 * (checks_hit + 1) \ - dist_penalty \ - a_penalty \ - turn_penalty
def test_get_best_action_works_behind_right(self): board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)]) # Pod is directly right of check, but facing away (slightly to the right) pod = PodState(Vec2(7000, 5000)) pod.angle = -0.000001 self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
def test_get_best_action_works_right(self): board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)]) # Pod is directly below the check, but the check is behind and to its right pod = PodState(Vec2(5000, 0)) pod.angle = math.pi * 1.25 self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
def re_dca(board: PodBoard, pod: PodState) -> float: checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos angle = math.fabs(clean_angle(pod_to_check.angle() - pod.angle)) a_penalty = (angle / math.pi) / 10 if angle > Constants.max_turn() else 0 dist_penalty = pod_to_check.length() / DIST_BASE return 3 * (checks_hit + 1) - dist_penalty - a_penalty
def __init__(self, board: PodBoard): super().__init__() # Allow the agent to go beyond the bounds - due to the nature of # the rounding functions, it's unlikely the agent will ever give # us the actual min or max scaled_max_turn = Constants.max_turn() * 1.1 scaled_max_thrust = Constants.max_thrust() + 2 * THRUST_PADDING angle_spec = array_spec.BoundedArraySpec( (), np.float, minimum=-scaled_max_turn, maximum=scaled_max_turn) thrust_spec = array_spec.BoundedArraySpec( (), np.int32, minimum=0, maximum=scaled_max_thrust) self._action_spec = { 'angle': angle_spec, 'thrust': thrust_spec } angles_spec = array_spec.BoundedArraySpec( (3,), np.float, minimum=-math.pi, maximum=math.pi) dist_spec = array_spec.BoundedArraySpec( (3,), np.float, minimum=0, maximum=Constants.world_x() * 10) self._observation_spec = { 'angles': angles_spec, 'distances': dist_spec } self._time_step_spec = ts.TimeStep( step_type=array_spec.ArraySpec(shape=(), dtype=np.int32, name='step_type'), reward=array_spec.ArraySpec(shape=(), dtype=np.float32, name='reward'), discount=array_spec.ArraySpec(shape=(), dtype=np.float32, name='discount'), observation=self._observation_spec ) self._board = board self._player = Player(AgentController()) self._initial_state = self.get_state() self._episode_ended = False
def test_actions_produce_all_possible_combinations(self): # First, collect all unique values outputs = set() angles = set() thrusts = set() for action in range(0, self.ad.num_actions): thrust, angle = self.ad.action_to_play(action) outputs.add((thrust, angle)) angles.add(angle) thrusts.add(thrust) # Ensure that we have the correct number of each self.assertEqual(len(outputs), self.ad.num_actions) self.assertEqual(len(angles), self.ad.num_angle) self.assertEqual(len(thrusts), self.ad.num_thrust) # Ensure that each possibility is present thrust_inc = Constants.max_thrust() / (self.ad.num_thrust - 1) for t in range(0, self.ad.num_thrust): self.assertIn(t * thrust_inc, thrusts) ang_inc = (Constants.max_turn() * 2) / (self.ad.num_angle - 1) for a in range(0, self.ad.num_angle): self.assertIn(a * ang_inc - Constants.max_turn(), angles)
def test_play_to_action_to_play_rounds_angle(self): action = self.ad.play_to_action(0, Constants.max_turn() * 0.001) thrust, angle = self.ad.action_to_play(action) self.assertEqual(thrust, 0) self.assertEqual(angle, 0)
def test_play_to_action_to_play_both_min(self): self.__do_p_a_p(0, -Constants.max_turn())
def test_play_to_action_to_play_both_max(self): self.__do_p_a_p(Constants.max_thrust(), Constants.max_turn())
def test_play_to_action_to_play_max_angle(self): self.__do_p_a_p(50, Constants.max_turn())