def play(self, pod: PodState) -> PlayOutput: check1 = self.board.checkpoints[pod.nextCheckId] check2 = self.board.get_check(pod.nextCheckId + 1) c1_to_p = (pod.pos - check1) c1_to_p_len = c1_to_p.length() c1_to_c2 = (check2 - check1) c1_to_c2_len = c1_to_c2.length() midpoint = ((c1_to_p / c1_to_c2_len) - (c1_to_c2 / c1_to_c2_len)).normalize() target = check1 if c1_to_p_len > Constants.max_vel() * 6: # Still far away. Aim for a point that will help us turn toward the next check target = target + (midpoint * Constants.check_radius() * 2) # else: We're getting close to the check. Stop fooling around and go to it. # OK, now we've got a target point. Do whatever it takes to get there. pod_to_target = target - pod.pos ang_diff_to_target = math.fabs( clean_angle(math.fabs(pod.angle - pod_to_target.angle()))) if ang_diff_to_target < 2 * Constants.max_turn(): thrust = Constants.max_thrust() elif ang_diff_to_target < 4 * Constants.max_turn(): thrust = (ang_diff_to_target - (4 * Constants.max_turn())) / ( 2 * Constants.max_turn()) * Constants.max_thrust() else: thrust = 0 return PlayOutput(target - (2 * pod.vel), thrust)
def test_get_best_action_works_straight(self): board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)]) # Pod is directly below the check, but looking straight at it pod = PodState(Vec2(5000, 0)) pod.angle = math.pi / 2 self.__do_get_best_action_assert(board, pod, Constants.max_thrust(), 0)
def play_to_action(self, thrust: int, angle: float) -> int: """ Given a legal play (angle/thrust), find the nearest discrete action """ thrust_pct = thrust / Constants.max_thrust() angle_pct = (angle + Constants.max_turn()) / (2 * Constants.max_turn()) thrust_idx = math.floor(thrust_pct * (self.num_thrust - 1)) angle_idx = math.floor(angle_pct * (self.num_angle - 1)) return math.floor(thrust_idx * self.num_angle + angle_idx)
def __init__(self, num_thrust: int = 2, num_angle: int = 3): self.num_thrust = num_thrust self.num_angle = num_angle self.num_actions = num_thrust * num_angle thrusts = _arange(0, Constants.max_thrust(), num_thrust) angs = _arange(-Constants.max_turn(), Constants.max_turn(), num_angle) self._action_table = [(int(thr), ang) for thr in thrusts for ang in angs]
def step(self, pod: PodState, play: PlayOutput, output: PodState = None) -> PodState: """ For the given pod, implement the given play. On each turn the pods movements are computed this way: Rotation: the pod rotates to face the target point, with a maximum of 18 degrees (except for the 1rst round). Acceleration: the pod's facing vector is multiplied by the given thrust value. The result is added to the current speed vector. Movement: The speed vector is added to the position of the pod. If a collision would occur at this point, the pods rebound off each other. Friction: the current speed vector of each pod is multiplied by 0.85 The speed's values are truncated and the position's values are rounded to the nearest integer. Collisions are elastic. The minimum impulse of a collision is 120. A boost is in fact an acceleration of 650. A shield multiplies the Pod mass by 10. The provided angle is absolute. 0° means facing EAST while 90° means facing SOUTH. :param pod: Initial state :param play: Action to play :param output: Output state to update (may be the same as input pod). If not given, a new one will be created. :return: The new pod state (same object as output if given) """ if output is None: output = PodState() # 1. Rotation requested_angle = (play.target - pod.pos).angle() angle = legal_angle(requested_angle, pod.angle) output.angle = angle # 2. Acceleration dir = UNIT.rotate(angle) thrust = int(within(play.thrust, 0, Constants.max_thrust())) output.vel = pod.vel + (dir * thrust) # 3. Movement output.pos = pod.pos + output.vel # 4. Friction output.vel = output.vel * Constants.friction() # 5. Rounding output.pos = output.pos.round() output.vel = output.vel.truncate() # Update progress output.turns = pod.turns + 1 output.nextCheckId = pod.nextCheckId output.laps = pod.laps check = self.checkpoints[pod.nextCheckId] if (check - output.pos).square_length() < Constants.check_radius_sq(): output.nextCheckId += 1 if output.nextCheckId >= len(self.checkpoints): output.nextCheckId = 0 output.laps += 1 return output
def __init__(self, board: PodBoard): super().__init__() # Allow the agent to go beyond the bounds - due to the nature of # the rounding functions, it's unlikely the agent will ever give # us the actual min or max scaled_max_turn = Constants.max_turn() * 1.1 scaled_max_thrust = Constants.max_thrust() + 2 * THRUST_PADDING angle_spec = array_spec.BoundedArraySpec( (), np.float, minimum=-scaled_max_turn, maximum=scaled_max_turn) thrust_spec = array_spec.BoundedArraySpec( (), np.int32, minimum=0, maximum=scaled_max_thrust) self._action_spec = { 'angle': angle_spec, 'thrust': thrust_spec } angles_spec = array_spec.BoundedArraySpec( (3,), np.float, minimum=-math.pi, maximum=math.pi) dist_spec = array_spec.BoundedArraySpec( (3,), np.float, minimum=0, maximum=Constants.world_x() * 10) self._observation_spec = { 'angles': angles_spec, 'distances': dist_spec } self._time_step_spec = ts.TimeStep( step_type=array_spec.ArraySpec(shape=(), dtype=np.int32, name='step_type'), reward=array_spec.ArraySpec(shape=(), dtype=np.float32, name='reward'), discount=array_spec.ArraySpec(shape=(), dtype=np.float32, name='discount'), observation=self._observation_spec ) self._board = board self._player = Player(AgentController()) self._initial_state = self.get_state() self._episode_ended = False
def test_actions_produce_all_possible_combinations(self): # First, collect all unique values outputs = set() angles = set() thrusts = set() for action in range(0, self.ad.num_actions): thrust, angle = self.ad.action_to_play(action) outputs.add((thrust, angle)) angles.add(angle) thrusts.add(thrust) # Ensure that we have the correct number of each self.assertEqual(len(outputs), self.ad.num_actions) self.assertEqual(len(angles), self.ad.num_angle) self.assertEqual(len(thrusts), self.ad.num_thrust) # Ensure that each possibility is present thrust_inc = Constants.max_thrust() / (self.ad.num_thrust - 1) for t in range(0, self.ad.num_thrust): self.assertIn(t * thrust_inc, thrusts) ang_inc = (Constants.max_turn() * 2) / (self.ad.num_angle - 1) for a in range(0, self.ad.num_angle): self.assertIn(a * ang_inc - Constants.max_turn(), angles)
def play(self, pod: PodState) -> PlayOutput: return PlayOutput( Vec2(random() * Constants.world_x(), random() * Constants.world_y()), math.ceil(random() * Constants.max_thrust()) )
def test_play_to_action_to_play_both_max(self): self.__do_p_a_p(Constants.max_thrust(), Constants.max_turn())
def test_play_to_action_to_play_max_thrust(self): self.__do_p_a_p(Constants.max_thrust(), 0)