コード例 #1
0
ファイル: controller.py プロジェクト: Kricket/tf-pods
    def play(self, pod: PodState) -> PlayOutput:
        check1 = self.board.checkpoints[pod.nextCheckId]
        check2 = self.board.get_check(pod.nextCheckId + 1)
        c1_to_p = (pod.pos - check1)
        c1_to_p_len = c1_to_p.length()
        c1_to_c2 = (check2 - check1)
        c1_to_c2_len = c1_to_c2.length()

        midpoint = ((c1_to_p / c1_to_c2_len) -
                    (c1_to_c2 / c1_to_c2_len)).normalize()
        target = check1

        if c1_to_p_len > Constants.max_vel() * 6:
            # Still far away. Aim for a point that will help us turn toward the next check
            target = target + (midpoint * Constants.check_radius() * 2)
        # else: We're getting close to the check. Stop fooling around and go to it.

        # OK, now we've got a target point. Do whatever it takes to get there.
        pod_to_target = target - pod.pos
        ang_diff_to_target = math.fabs(
            clean_angle(math.fabs(pod.angle - pod_to_target.angle())))

        if ang_diff_to_target < 2 * Constants.max_turn():
            thrust = Constants.max_thrust()
        elif ang_diff_to_target < 4 * Constants.max_turn():
            thrust = (ang_diff_to_target - (4 * Constants.max_turn())) / (
                2 * Constants.max_turn()) * Constants.max_thrust()
        else:
            thrust = 0

        return PlayOutput(target - (2 * pod.vel), thrust)
コード例 #2
0
    def train(self,
              num_episodes: int = 10,
              prob_rand_action: float = 0.5,
              max_turns: int = 50,
              learning_rate: float = 1.0,
              future_discount: float = 0.8) -> List[float]:
        """
        Train starting at a random point
        """
        max_reward_per_ep = []

        for episode in range(num_episodes):
            # The pod starts in a random position at a fixed (far) distance from the check,
            # pointing in a random direction
            pos_offset = UNIT.rotate(
                random.random() * 2 * math.pi) * Constants.check_radius() * (
                    16 * random.random() + 1)
            pod = PodState(pos=self.board.checkpoints[0] + pos_offset,
                           angle=2 * math.pi * random.random() - math.pi)

            max_reward_per_ep.append(
                self.__do_train(pod, max_turns, prob_rand_action,
                                learning_rate, future_discount))

        return max_reward_per_ep
コード例 #3
0
    def gen_initial_state(self) -> PodState:
        """
        Generate a state at which to start a training episode
        """
        # The pod starts in a random position at a random distance from the check,
        # pointing in a random direction
        pos_offset = UNIT.rotate(random() * 2 * math.pi) * \
                     Constants.check_radius() * (15 * random() + 1)

        return PodState(pos=self.target.board.get_check(0) + pos_offset,
                        angle=2 * math.pi * random() - math.pi)
コード例 #4
0
    def trainer(num_checks: int = 3) -> 'PodBoard':
        """
        Generate a board with the given number of checks.
        They are all in a row, but at varying distances.
        The goal is to use it with gen_pods to generate test data with varying distances to the next check.
        """
        checks = [
            Vec2(Constants.check_radius() * ((i + 1)**2),
                 Constants.world_y() / 2) for i in range(num_checks)
        ]

        # Shift the checks to center them
        width = checks[-1].x - checks[0].x
        x_start = (Constants.world_x() - width) / 2 - checks[0].x

        return PodBoard([check + Vec2(x_start, 0) for check in checks])
コード例 #5
0
    def train_progressively(self,
                            dist_increment: int,
                            ep_per_dist: int,
                            num_incr: int,
                            prob_rand_action: float = 0.5,
                            learning_rate: float = 0.5,
                            future_discount: float = 0.8) -> List[float]:
        """
        Train by randomly generating pods close to the checkpoint, and gradually backing away
        :param dist_increment: Increment by which to increase the distance to the check
        :param ep_per_dist: Number of episodes to run at each increment
        :param num_incr: Number of distance increments to run
        :param prob_rand_action:
        :param learning_rate:
        :param future_discount:
        :return: List of rewards for each episode
        """
        old_rew = self.reward_func
        self.reward_func = check_reward

        max_reward_per_ep = []

        for incr in range(1, num_incr + 1):
            for ep_inc in range(ep_per_dist):
                # Position is (radius + increment) distance from check
                pos_offset = UNIT.rotate(random.random() * 2 * math.pi) * \
                             (Constants.check_radius() + dist_increment * incr)
                pod = PodState(pos=self.board.checkpoints[0] + pos_offset,
                               angle=2 * math.pi * random.random() - math.pi)

                max_reward_per_ep.append(
                    self.__do_train(pod, 5 * incr, prob_rand_action,
                                    learning_rate, future_discount))

        self.reward_func = old_rew
        return max_reward_per_ep
コード例 #6
0
 def __draw_check(self, check: Vec2, idx: int) -> Circle:
     self.ax.annotate(str(idx),
                      xy=(check.x, check.y),
                      ha="center",
                      fontsize=14)
     return Circle((check.x, check.y), Constants.check_radius())