예제 #1
0
    def play(self, pod: PodState) -> PlayOutput:
        check1 = self.board.checkpoints[pod.nextCheckId]
        check2 = self.board.get_check(pod.nextCheckId + 1)
        c1_to_p = (pod.pos - check1)
        c1_to_p_len = c1_to_p.length()
        c1_to_c2 = (check2 - check1)
        c1_to_c2_len = c1_to_c2.length()

        midpoint = ((c1_to_p / c1_to_c2_len) -
                    (c1_to_c2 / c1_to_c2_len)).normalize()
        target = check1

        if c1_to_p_len > Constants.max_vel() * 6:
            # Still far away. Aim for a point that will help us turn toward the next check
            target = target + (midpoint * Constants.check_radius() * 2)
        # else: We're getting close to the check. Stop fooling around and go to it.

        # OK, now we've got a target point. Do whatever it takes to get there.
        pod_to_target = target - pod.pos
        ang_diff_to_target = math.fabs(
            clean_angle(math.fabs(pod.angle - pod_to_target.angle())))

        if ang_diff_to_target < 2 * Constants.max_turn():
            thrust = Constants.max_thrust()
        elif ang_diff_to_target < 4 * Constants.max_turn():
            thrust = (ang_diff_to_target - (4 * Constants.max_turn())) / (
                2 * Constants.max_turn()) * Constants.max_thrust()
        else:
            thrust = 0

        return PlayOutput(target - (2 * pod.vel), thrust)
예제 #2
0
 def play_to_action(self, thrust: int, angle: float) -> int:
     """
     Given a legal play (angle/thrust), find the nearest discrete action
     """
     thrust_pct = thrust / Constants.max_thrust()
     angle_pct = (angle + Constants.max_turn()) / (2 * Constants.max_turn())
     thrust_idx = math.floor(thrust_pct * (self.num_thrust - 1))
     angle_idx = math.floor(angle_pct * (self.num_angle - 1))
     return math.floor(thrust_idx * self.num_angle + angle_idx)
예제 #3
0
    def __init__(self, num_thrust: int = 2, num_angle: int = 3):
        self.num_thrust = num_thrust
        self.num_angle = num_angle
        self.num_actions = num_thrust * num_angle

        thrusts = _arange(0, Constants.max_thrust(), num_thrust)
        angs = _arange(-Constants.max_turn(), Constants.max_turn(), num_angle)
        self._action_table = [(int(thr), ang) for thr in thrusts
                              for ang in angs]
예제 #4
0
파일: util.py 프로젝트: Kricket/tf-pods
def legal_angle(req_angle: float, pod_angle: float) -> float:
    """
    Get the actual angle to apply, given the player's input
    :param req_angle: Angle that the player requested
    :param pod_angle: Angle in which the pod is facing
    :return: Angle to use for calculations (within [-pi, pi])
    """
    d_angle = within(clean_angle(req_angle - pod_angle), -Constants.max_turn(),
                     Constants.max_turn())
    return clean_angle(pod_angle + d_angle)
예제 #5
0
 def test_action_to_output_turn_right(self):
     action = self.ad.play_to_action(50, Constants.max_turn())
     pod_pos = Vec2(100, 100)
     po = self.ad.action_to_output(action, 1.23, pod_pos)
     # The thrust should not have changed
     self.assertEqual(po.thrust, 50)
     # The pod is at (100, 100), angle 1.23, requested turn max_turn...
     # If we undo the move and rotate, we should have a vector down the X-axis (i.e. angle 0)
     rel_target = (po.target - pod_pos).rotate(-1.23 - Constants.max_turn())
     self.assertAlmostEqual(rel_target.y, 0)
     self.assertGreater(rel_target.x, 1)
예제 #6
0
파일: rewards.py 프로젝트: Kricket/tf-pods
def re_dcat(board: PodBoard, pod: PodState) -> float:
    pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos

    # Scaled distance to next check
    dist_penalty = pod_to_check.length() / DIST_BASE

    # Bonus for each check hit. By making it 2 per check, we ensure that the reward is always
    # higher after hitting a check. (If left at 1, the dist_penalty could be slightly greater
    # than 1, leading to a DECREASE in reward for hitting a check)
    checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId

    # A tiny bit for the angle. This should really be tiny - its purpose is to serve as a
    # tie-breaker (to prevent the pod from going into orbit around a check).
    angle = math.fabs(clean_angle(pod_to_check.angle() - pod.angle))
    a_penalty = (angle / math.pi) / 10 if angle > Constants.max_turn() else 0

    # And finally: this can be important to prevent agents from doing nothing.
    # The reduction factor is slightly more than the number of turns it takes
    # (on average) to get from one check to another
    turn_penalty = pod.turns / 20

    return 3 * (checks_hit + 1) \
           - dist_penalty \
           - a_penalty \
           - turn_penalty
예제 #7
0
    def test_get_best_action_works_behind_right(self):
        board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)])
        # Pod is directly right of check, but facing away (slightly to the right)
        pod = PodState(Vec2(7000, 5000))
        pod.angle = -0.000001

        self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
예제 #8
0
    def test_get_best_action_works_right(self):
        board = PodBoard([Vec2(5000, 5000), Vec2(1000, 1000)])
        # Pod is directly below the check, but the check is behind and to its right
        pod = PodState(Vec2(5000, 0))
        pod.angle = math.pi * 1.25

        self.__do_get_best_action_assert(board, pod, 0, -Constants.max_turn())
예제 #9
0
파일: rewards.py 프로젝트: Kricket/tf-pods
def re_dca(board: PodBoard, pod: PodState) -> float:
    checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId

    pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos

    angle = math.fabs(clean_angle(pod_to_check.angle() - pod.angle))
    a_penalty = (angle / math.pi) / 10 if angle > Constants.max_turn() else 0

    dist_penalty = pod_to_check.length() / DIST_BASE

    return 3 * (checks_hit + 1) - dist_penalty - a_penalty
예제 #10
0
파일: podagent.py 프로젝트: Kricket/tf-pods
    def __init__(self, board: PodBoard):
        super().__init__()

        # Allow the agent to go beyond the bounds - due to the nature of
        # the rounding functions, it's unlikely the agent will ever give
        # us the actual min or max
        scaled_max_turn = Constants.max_turn() * 1.1
        scaled_max_thrust = Constants.max_thrust() + 2 * THRUST_PADDING
        angle_spec = array_spec.BoundedArraySpec(
            (),
            np.float,
            minimum=-scaled_max_turn,
            maximum=scaled_max_turn)
        thrust_spec = array_spec.BoundedArraySpec(
            (),
            np.int32,
            minimum=0,
            maximum=scaled_max_thrust)
        self._action_spec = {
            'angle': angle_spec,
            'thrust': thrust_spec
        }

        angles_spec = array_spec.BoundedArraySpec(
            (3,),
            np.float,
            minimum=-math.pi,
            maximum=math.pi)
        dist_spec = array_spec.BoundedArraySpec(
            (3,),
            np.float,
            minimum=0,
            maximum=Constants.world_x() * 10)

        self._observation_spec = {
            'angles': angles_spec,
            'distances': dist_spec
        }

        self._time_step_spec = ts.TimeStep(
            step_type=array_spec.ArraySpec(shape=(), dtype=np.int32, name='step_type'),
            reward=array_spec.ArraySpec(shape=(), dtype=np.float32, name='reward'),
            discount=array_spec.ArraySpec(shape=(), dtype=np.float32, name='discount'),
            observation=self._observation_spec
        )

        self._board = board
        self._player = Player(AgentController())
        self._initial_state = self.get_state()
        self._episode_ended = False
예제 #11
0
    def test_actions_produce_all_possible_combinations(self):
        # First, collect all unique values
        outputs = set()
        angles = set()
        thrusts = set()
        for action in range(0, self.ad.num_actions):
            thrust, angle = self.ad.action_to_play(action)
            outputs.add((thrust, angle))
            angles.add(angle)
            thrusts.add(thrust)

        # Ensure that we have the correct number of each
        self.assertEqual(len(outputs), self.ad.num_actions)
        self.assertEqual(len(angles), self.ad.num_angle)
        self.assertEqual(len(thrusts), self.ad.num_thrust)

        # Ensure that each possibility is present
        thrust_inc = Constants.max_thrust() / (self.ad.num_thrust - 1)
        for t in range(0, self.ad.num_thrust):
            self.assertIn(t * thrust_inc, thrusts)

        ang_inc = (Constants.max_turn() * 2) / (self.ad.num_angle - 1)
        for a in range(0, self.ad.num_angle):
            self.assertIn(a * ang_inc - Constants.max_turn(), angles)
예제 #12
0
 def test_play_to_action_to_play_rounds_angle(self):
     action = self.ad.play_to_action(0, Constants.max_turn() * 0.001)
     thrust, angle = self.ad.action_to_play(action)
     self.assertEqual(thrust, 0)
     self.assertEqual(angle, 0)
예제 #13
0
 def test_play_to_action_to_play_both_min(self):
     self.__do_p_a_p(0, -Constants.max_turn())
예제 #14
0
 def test_play_to_action_to_play_both_max(self):
     self.__do_p_a_p(Constants.max_thrust(), Constants.max_turn())
예제 #15
0
 def test_play_to_action_to_play_max_angle(self):
     self.__do_p_a_p(50, Constants.max_turn())