Ejemplo n.º 1
0
    def grid(rows: int = 3,
             cols: int = 3,
             x_spacing: int = 4000,
             y_spacing: int = 3000) -> 'PodBoard':
        """
        Generate a board with checks in grid form:
        1 2 3
        4 5 6
        7 8 9
        """
        checks = []

        x_center = Constants.world_x() / 2
        y_center = Constants.world_y() / 2

        # 5 rows: -2, -1, 0, 1, 2
        # 4 rows: -1.5, -0.5, 0.5, 1.5
        # 3 rows: -1, 0, 1
        # => start at -(r-1)/2
        row_start = (1 - rows) / 2
        col_start = (1 - cols) / 2

        for row in range(rows):
            y_off = (row_start + row) * y_spacing
            for col in range(cols):
                x_off = (col_start + col) * x_spacing
                checks.append(Vec2(x_center + x_off, y_center + y_off))

        return PodBoard(checks)
Ejemplo n.º 2
0
def _prepare_size():
    plt.rcParams['figure.figsize'] = [
        Constants.world_x() / 1000,
        Constants.world_y() / 1000
    ]
    plt.rcParams['figure.dpi'] = 100
    matplotlib.rcParams['animation.embed_limit'] = 2**27
Ejemplo n.º 3
0
    def tester() -> 'PodBoard':
        """
        Generate a board laid out to test as many situations as possible
        (start) -> 0 -> 1: straight line
        1 -> 2: 180° turn
        2 -> 3: 90° turn
        3 -> 4 -> 5 -> 6: curve around to the right
        6 -> 7 (start): curve to the left
        """
        checks = []
        start = Vec2(Constants.world_x() / 10, Constants.world_y() / 2)
        checks.append(start + Vec2(5000, 0))  # straight ahead
        checks.append(checks[-1] + Vec2(6000, 0))  # straight ahead

        checks.append(checks[-1] + Vec2(-3000, 0))  # straight back

        checks.append(checks[-1] + Vec2(0, 2500))  # turn 90°

        checks.append(checks[-1] + Vec2(-3000, 1500))  # curve around
        checks.append(checks[-1] + Vec2(-3000, -1500))  # curve around
        checks.append(checks[-1] + Vec2(0, -5500))  # curve around

        checks.append(start)  # turn other way

        return PodBoard(checks)
Ejemplo n.º 4
0
def _get_field_artist() -> Rectangle:
    """
    Get an artist to draw the board
    """
    return Rectangle((0, 0),
                     Constants.world_x(),
                     Constants.world_y(),
                     ec="black",
                     fc="white")
Ejemplo n.º 5
0
    def __init__(self, num_thrust: int = 2, num_angle: int = 3):
        self.num_thrust = num_thrust
        self.num_angle = num_angle
        self.num_actions = num_thrust * num_angle

        thrusts = _arange(0, Constants.max_thrust(), num_thrust)
        angs = _arange(-Constants.max_turn(), Constants.max_turn(), num_angle)
        self._action_table = [(int(thr), ang) for thr in thrusts
                              for ang in angs]
Ejemplo n.º 6
0
 def play_to_action(self, thrust: int, angle: float) -> int:
     """
     Given a legal play (angle/thrust), find the nearest discrete action
     """
     thrust_pct = thrust / Constants.max_thrust()
     angle_pct = (angle + Constants.max_turn()) / (2 * Constants.max_turn())
     thrust_idx = math.floor(thrust_pct * (self.num_thrust - 1))
     angle_idx = math.floor(angle_pct * (self.num_angle - 1))
     return math.floor(thrust_idx * self.num_angle + angle_idx)
Ejemplo n.º 7
0
    def step(self,
             pod: PodState,
             play: PlayOutput,
             output: PodState = None) -> PodState:
        """
        For the given pod, implement the given play.
        On each turn the pods movements are computed this way:
            Rotation: the pod rotates to face the target point, with a maximum of 18 degrees (except for the 1rst round).
            Acceleration: the pod's facing vector is multiplied by the given thrust value. The result is added to the current speed vector.
            Movement: The speed vector is added to the position of the pod. If a collision would occur at this point, the pods rebound off each other.
            Friction: the current speed vector of each pod is multiplied by 0.85
            The speed's values are truncated and the position's values are rounded to the nearest integer.
        Collisions are elastic. The minimum impulse of a collision is 120.
        A boost is in fact an acceleration of 650.
        A shield multiplies the Pod mass by 10.
        The provided angle is absolute. 0° means facing EAST while 90° means facing SOUTH.
        :param pod: Initial state
        :param play: Action to play
        :param output: Output state to update (may be the same as input pod). If not given, a new one will be created.
        :return: The new pod state (same object as output if given)
        """
        if output is None:
            output = PodState()

        # 1. Rotation
        requested_angle = (play.target - pod.pos).angle()
        angle = legal_angle(requested_angle, pod.angle)
        output.angle = angle

        # 2. Acceleration
        dir = UNIT.rotate(angle)
        thrust = int(within(play.thrust, 0, Constants.max_thrust()))
        output.vel = pod.vel + (dir * thrust)

        # 3. Movement
        output.pos = pod.pos + output.vel

        # 4. Friction
        output.vel = output.vel * Constants.friction()

        # 5. Rounding
        output.pos = output.pos.round()
        output.vel = output.vel.truncate()

        # Update progress
        output.turns = pod.turns + 1
        output.nextCheckId = pod.nextCheckId
        output.laps = pod.laps
        check = self.checkpoints[pod.nextCheckId]
        if (check - output.pos).square_length() < Constants.check_radius_sq():
            output.nextCheckId += 1
            if output.nextCheckId >= len(self.checkpoints):
                output.nextCheckId = 0
                output.laps += 1

        return output
Ejemplo n.º 8
0
 def circle(num_points: int = 3, radius: float = 4000) -> 'PodBoard':
     """
     Generate a PodBoard with checkpoints arranged in a circle around the
     center of the board
     """
     center = Vec2(Constants.world_x() / 2, Constants.world_y() / 2)
     angle_diff = 2 * math.pi / num_points
     v = UNIT * radius
     checks = [center + v.rotate(i * angle_diff) for i in range(num_points)]
     return PodBoard(checks)
Ejemplo n.º 9
0
def legal_angle(req_angle: float, pod_angle: float) -> float:
    """
    Get the actual angle to apply, given the player's input
    :param req_angle: Angle that the player requested
    :param pod_angle: Angle in which the pod is facing
    :return: Angle to use for calculations (within [-pi, pi])
    """
    d_angle = within(clean_angle(req_angle - pod_angle), -Constants.max_turn(),
                     Constants.max_turn())
    return clean_angle(pod_angle + d_angle)
Ejemplo n.º 10
0
def _to_state(board: PodBoard, pod: PodState) -> Tuple[int, int, int, int]:
    vel = pod.vel.rotate(-pod.angle)

    check1 = (board.get_check(pod.nextCheckId) - pod.pos).rotate(-pod.angle)

    return (
        _discretize(vel.x / Constants.max_vel(), 10),
        _discretize(vel.y / Constants.max_vel(), 10),
        _discretize(check1.x / MAX_DIST, 30),
        _discretize(check1.y / MAX_DIST, 30),
    )
Ejemplo n.º 11
0
 def test_action_to_output_turn_right(self):
     action = self.ad.play_to_action(50, Constants.max_turn())
     pod_pos = Vec2(100, 100)
     po = self.ad.action_to_output(action, 1.23, pod_pos)
     # The thrust should not have changed
     self.assertEqual(po.thrust, 50)
     # The pod is at (100, 100), angle 1.23, requested turn max_turn...
     # If we undo the move and rotate, we should have a vector down the X-axis (i.e. angle 0)
     rel_target = (po.target - pod_pos).rotate(-1.23 - Constants.max_turn())
     self.assertAlmostEqual(rel_target.y, 0)
     self.assertGreater(rel_target.x, 1)
Ejemplo n.º 12
0
    def __init__(self, board: PodBoard):
        super().__init__()

        # Allow the agent to go beyond the bounds - due to the nature of
        # the rounding functions, it's unlikely the agent will ever give
        # us the actual min or max
        scaled_max_turn = Constants.max_turn() * 1.1
        scaled_max_thrust = Constants.max_thrust() + 2 * THRUST_PADDING
        angle_spec = array_spec.BoundedArraySpec(
            (),
            np.float,
            minimum=-scaled_max_turn,
            maximum=scaled_max_turn)
        thrust_spec = array_spec.BoundedArraySpec(
            (),
            np.int32,
            minimum=0,
            maximum=scaled_max_thrust)
        self._action_spec = {
            'angle': angle_spec,
            'thrust': thrust_spec
        }

        angles_spec = array_spec.BoundedArraySpec(
            (3,),
            np.float,
            minimum=-math.pi,
            maximum=math.pi)
        dist_spec = array_spec.BoundedArraySpec(
            (3,),
            np.float,
            minimum=0,
            maximum=Constants.world_x() * 10)

        self._observation_spec = {
            'angles': angles_spec,
            'distances': dist_spec
        }

        self._time_step_spec = ts.TimeStep(
            step_type=array_spec.ArraySpec(shape=(), dtype=np.int32, name='step_type'),
            reward=array_spec.ArraySpec(shape=(), dtype=np.float32, name='reward'),
            discount=array_spec.ArraySpec(shape=(), dtype=np.float32, name='discount'),
            observation=self._observation_spec
        )

        self._board = board
        self._player = Player(AgentController())
        self._initial_state = self.get_state()
        self._episode_ended = False
Ejemplo n.º 13
0
    def play(self, pod: PodState) -> PlayOutput:
        check1 = self.board.checkpoints[pod.nextCheckId]
        check2 = self.board.get_check(pod.nextCheckId + 1)
        c1_to_p = (pod.pos - check1)
        c1_to_p_len = c1_to_p.length()
        c1_to_c2 = (check2 - check1)
        c1_to_c2_len = c1_to_c2.length()

        midpoint = ((c1_to_p / c1_to_c2_len) -
                    (c1_to_c2 / c1_to_c2_len)).normalize()
        target = check1

        if c1_to_p_len > Constants.max_vel() * 6:
            # Still far away. Aim for a point that will help us turn toward the next check
            target = target + (midpoint * Constants.check_radius() * 2)
        # else: We're getting close to the check. Stop fooling around and go to it.

        # OK, now we've got a target point. Do whatever it takes to get there.
        pod_to_target = target - pod.pos
        ang_diff_to_target = math.fabs(
            clean_angle(math.fabs(pod.angle - pod_to_target.angle())))

        if ang_diff_to_target < 2 * Constants.max_turn():
            thrust = Constants.max_thrust()
        elif ang_diff_to_target < 4 * Constants.max_turn():
            thrust = (ang_diff_to_target - (4 * Constants.max_turn())) / (
                2 * Constants.max_turn()) * Constants.max_thrust()
        else:
            thrust = 0

        return PlayOutput(target - (2 * pod.vel), thrust)
Ejemplo n.º 14
0
    def test_get_best_action_works_right(self):
        board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)])
        # Pod is directly below the check, but the check is behind and to its right
        pod = PodState(Vec2(5000, 0))
        pod.angle = math.pi * 1.25

        self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
Ejemplo n.º 15
0
    def trainer(num_checks: int = 3) -> 'PodBoard':
        """
        Generate a board with the given number of checks.
        They are all in a row, but at varying distances.
        The goal is to use it with gen_pods to generate test data with varying distances to the next check.
        """
        checks = [
            Vec2(Constants.check_radius() * ((i + 1)**2),
                 Constants.world_y() / 2) for i in range(num_checks)
        ]

        # Shift the checks to center them
        width = checks[-1].x - checks[0].x
        x_start = (Constants.world_x() - width) / 2 - checks[0].x

        return PodBoard([check + Vec2(x_start, 0) for check in checks])
Ejemplo n.º 16
0
    def test_get_best_action_works_behind_right(self):
        board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)])
        # Pod is directly right of check, but facing away (slightly to the right)
        pod = PodState(Vec2(7000, 5000))
        pod.angle = -0.000001

        self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
Ejemplo n.º 17
0
    def train(self,
              num_episodes: int = 10,
              prob_rand_action: float = 0.5,
              max_turns: int = 50,
              learning_rate: float = 1.0,
              future_discount: float = 0.8) -> List[float]:
        """
        Train starting at a random point
        """
        max_reward_per_ep = []

        for episode in range(num_episodes):
            # The pod starts in a random position at a fixed (far) distance from the check,
            # pointing in a random direction
            pos_offset = UNIT.rotate(
                random.random() * 2 * math.pi) * Constants.check_radius() * (
                    16 * random.random() + 1)
            pod = PodState(pos=self.board.checkpoints[0] + pos_offset,
                           angle=2 * math.pi * random.random() - math.pi)

            max_reward_per_ep.append(
                self.__do_train(pod, max_turns, prob_rand_action,
                                learning_rate, future_discount))

        return max_reward_per_ep
Ejemplo n.º 18
0
    def test_get_best_action_works_straight(self):
        board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)])
        # Pod is directly below the check, but looking straight at it
        pod = PodState(Vec2(5000, 0))
        pod.angle = math.pi / 2

        self.__do_get_best_action_assert(board, pod, Constants.max_thrust(), 0)
Ejemplo n.º 19
0
def re_dcat(board: PodBoard, pod: PodState) -> float:
    pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos

    # Scaled distance to next check
    dist_penalty = pod_to_check.length() / DIST_BASE

    # Bonus for each check hit. By making it 2 per check, we ensure that the reward is always
    # higher after hitting a check. (If left at 1, the dist_penalty could be slightly greater
    # than 1, leading to a DECREASE in reward for hitting a check)
    checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId

    # A tiny bit for the angle. This should really be tiny - its purpose is to serve as a
    # tie-breaker (to prevent the pod from going into orbit around a check).
    angle = math.fabs(clean_angle(pod_to_check.angle() - pod.angle))
    a_penalty = (angle / math.pi) / 10 if angle > Constants.max_turn() else 0

    # And finally: this can be important to prevent agents from doing nothing.
    # The reduction factor is slightly more than the number of turns it takes
    # (on average) to get from one check to another
    turn_penalty = pod.turns / 20

    return 3 * (checks_hit + 1) \
           - dist_penalty \
           - a_penalty \
           - turn_penalty
Ejemplo n.º 20
0
    def to_vector(self, board: PodBoard, pod: PodState) -> List[float]:
        # Velocity is already relative to the pod, so it just needs to be rotated
        vel = pod.vel.rotate(-pod.angle) / Constants.max_vel()

        check1 = (board.get_check(pod.nextCheckId) -
                  pod.pos).rotate(-pod.angle) / MAX_DIST

        return [vel.x, vel.y, check1.x, check1.y]
Ejemplo n.º 21
0
    def test_state_to_vector_works2(self):
        # A pod at (-100, -100) pointing up +Y, moving 45 degrees down-left
        pod = PodState(Vec2(-100, -100), Vec2(-3, -3), math.pi / 2)
        # The target checkpoint is directly in front
        board = PodBoard([Vec2(-100, 1000), ORIGIN])

        state = state_to_vector(pod, board)

        self.assertEqual(len(state), STATE_VECTOR_LEN)
        self.assertAlmostEqual(state[0],
                               -3 / Constants.max_vel(),
                               msg="velocity x")
        self.assertAlmostEqual(state[1],
                               3 / Constants.max_vel(),
                               msg="velocity y")
        self.assertAlmostEqual(state[2], 1100 / MAX_DIST, msg="check1 x")
        self.assertAlmostEqual(state[3], 0, msg="check1 y")
Ejemplo n.º 22
0
    def __generate_random_checks(self):
        min_x = Constants.border_padding()
        min_y = Constants.border_padding()
        max_x = Constants.world_x() - Constants.border_padding()
        max_y = Constants.world_y() - Constants.border_padding()
        min_dist_sq = Constants.check_spacing() * Constants.check_spacing()
        self.checkpoints = []

        num_checks = random.randrange(Constants.min_checks(),
                                      Constants.max_checks())
        while len(self.checkpoints) < num_checks:
            check = Vec2(random.randrange(min_x, max_x, 1),
                         random.randrange(min_y, max_y, 1))
            too_close = next((True for x in self.checkpoints
                              if (x - check).square_length() < min_dist_sq),
                             False)
            if not too_close:
                self.checkpoints.append(check)
Ejemplo n.º 23
0
def speed_reward(board: PodBoard, next_pod: PodState) -> float:
    """
    Indicates how much the speed is taking us toward the next check (scaled).
    """
    pod_to_check = board.checkpoints[next_pod.nextCheckId] - next_pod.pos
    dist_to_check = pod_to_check.length()

    # a*b = |a|*|b|*cos
    # Thus, vel*check / dist = how much the vel is taking us toward the check
    return (next_pod.vel * pod_to_check) / (dist_to_check * Constants.max_vel())
Ejemplo n.º 24
0
def _pod_wedge_info(pod: PodState) -> Tuple[float, float, Vec2]:
    """
    Get info for drawing a wedge for ta pod:
    angle from, angle to, center
    """
    angle_deg = math.degrees(pod.angle) + 180.0
    offset = Vec2(Constants.pod_radius() / 2,
                  0).rotate(math.radians(angle_deg))
    center = pod.pos - offset
    return angle_deg - 20, angle_deg + 20, center
Ejemplo n.º 25
0
def _get_pod_artist(pod: PodState, color: Tuple[float, float, float]) -> Wedge:
    # Draw the wedge
    theta1, theta2, center = _pod_wedge_info(pod)
    wedge = Wedge((center.x, center.y),
                  Constants.pod_radius(),
                  theta1,
                  theta2,
                  color=color)
    wedge.set_zorder(10)
    return wedge
Ejemplo n.º 26
0
    def gen_initial_state(self) -> PodState:
        """
        Generate a state at which to start a training episode
        """
        # The pod starts in a random position at a random distance from the check,
        # pointing in a random direction
        pos_offset = UNIT.rotate(random() * 2 * math.pi) * \
                     Constants.check_radius() * (15 * random() + 1)

        return PodState(pos=self.target.board.get_check(0) + pos_offset,
                        angle=2 * math.pi * random() - math.pi)
Ejemplo n.º 27
0
def re_dca(board: PodBoard, pod: PodState) -> float:
    checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId

    pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos

    angle = math.fabs(clean_angle(pod_to_check.angle() - pod.angle))
    a_penalty = (angle / math.pi) / 10 if angle > Constants.max_turn() else 0

    dist_penalty = pod_to_check.length() / DIST_BASE

    return 3 * (checks_hit + 1) - dist_penalty - a_penalty
Ejemplo n.º 28
0
    def test_state_to_vector_works1(self):
        # A pod at (100, 100) pointing down -X, moving full speed +Y
        pod = PodState(Vec2(100, 100), Vec2(0, Constants.max_vel()), -math.pi)
        # The target checkpoint is directly behind it
        board = PodBoard([Vec2(100 + MAX_DIST, 100), ORIGIN])

        state = state_to_vector(pod, board)

        self.assertEqual(len(state), STATE_VECTOR_LEN)
        self.assertAlmostEqual(state[0], 0, msg="velocity x")
        self.assertAlmostEqual(state[1], -1, msg="velocity y")
        self.assertAlmostEqual(state[2], -1, msg="check1 x")
        self.assertAlmostEqual(state[3], 0, msg="check1 y")
Ejemplo n.º 29
0
    def test_actions_produce_all_possible_combinations(self):
        # First, collect all unique values
        outputs = set()
        angles = set()
        thrusts = set()
        for action in range(0, self.ad.num_actions):
            thrust, angle = self.ad.action_to_play(action)
            outputs.add((thrust, angle))
            angles.add(angle)
            thrusts.add(thrust)

        # Ensure that we have the correct number of each
        self.assertEqual(len(outputs), self.ad.num_actions)
        self.assertEqual(len(angles), self.ad.num_angle)
        self.assertEqual(len(thrusts), self.ad.num_thrust)

        # Ensure that each possibility is present
        thrust_inc = Constants.max_thrust() / (self.ad.num_thrust - 1)
        for t in range(0, self.ad.num_thrust):
            self.assertIn(t * thrust_inc, thrusts)

        ang_inc = (Constants.max_turn() * 2) / (self.ad.num_angle - 1)
        for a in range(0, self.ad.num_angle):
            self.assertIn(a * ang_inc - Constants.max_turn(), angles)
Ejemplo n.º 30
0
    def train_progressively(self,
                            dist_increment: int,
                            ep_per_dist: int,
                            num_incr: int,
                            prob_rand_action: float = 0.5,
                            learning_rate: float = 0.5,
                            future_discount: float = 0.8) -> List[float]:
        """
        Train by randomly generating pods close to the checkpoint, and gradually backing away
        :param dist_increment: Increment by which to increase the distance to the check
        :param ep_per_dist: Number of episodes to run at each increment
        :param num_incr: Number of distance increments to run
        :param prob_rand_action:
        :param learning_rate:
        :param future_discount:
        :return: List of rewards for each episode
        """
        old_rew = self.reward_func
        self.reward_func = check_reward

        max_reward_per_ep = []

        for incr in range(1, num_incr + 1):
            for ep_inc in range(ep_per_dist):
                # Position is (radius + increment) distance from check
                pos_offset = UNIT.rotate(random.random() * 2 * math.pi) * \
                             (Constants.check_radius() + dist_increment * incr)
                pod = PodState(pos=self.board.checkpoints[0] + pos_offset,
                               angle=2 * math.pi * random.random() - math.pi)

                max_reward_per_ep.append(
                    self.__do_train(pod, 5 * incr, prob_rand_action,
                                    learning_rate, future_discount))

        self.reward_func = old_rew
        return max_reward_per_ep