def play(self, pod: PodState) -> PlayOutput: check1 = self.board.checkpoints[pod.nextCheckId] check2 = self.board.get_check(pod.nextCheckId + 1) c1_to_p = (pod.pos - check1) c1_to_p_len = c1_to_p.length() c1_to_c2 = (check2 - check1) c1_to_c2_len = c1_to_c2.length() midpoint = ((c1_to_p / c1_to_c2_len) - (c1_to_c2 / c1_to_c2_len)).normalize() target = check1 if c1_to_p_len > Constants.max_vel() * 6: # Still far away. Aim for a point that will help us turn toward the next check target = target + (midpoint * Constants.check_radius() * 2) # else: We're getting close to the check. Stop fooling around and go to it. # OK, now we've got a target point. Do whatever it takes to get there. pod_to_target = target - pod.pos ang_diff_to_target = math.fabs( clean_angle(math.fabs(pod.angle - pod_to_target.angle()))) if ang_diff_to_target < 2 * Constants.max_turn(): thrust = Constants.max_thrust() elif ang_diff_to_target < 4 * Constants.max_turn(): thrust = (ang_diff_to_target - (4 * Constants.max_turn())) / ( 2 * Constants.max_turn()) * Constants.max_thrust() else: thrust = 0 return PlayOutput(target - (2 * pod.vel), thrust)
def train(self, num_episodes: int = 10, prob_rand_action: float = 0.5, max_turns: int = 50, learning_rate: float = 1.0, future_discount: float = 0.8) -> List[float]: """ Train starting at a random point """ max_reward_per_ep = [] for episode in range(num_episodes): # The pod starts in a random position at a fixed (far) distance from the check, # pointing in a random direction pos_offset = UNIT.rotate( random.random() * 2 * math.pi) * Constants.check_radius() * ( 16 * random.random() + 1) pod = PodState(pos=self.board.checkpoints[0] + pos_offset, angle=2 * math.pi * random.random() - math.pi) max_reward_per_ep.append( self.__do_train(pod, max_turns, prob_rand_action, learning_rate, future_discount)) return max_reward_per_ep
def gen_initial_state(self) -> PodState: """ Generate a state at which to start a training episode """ # The pod starts in a random position at a random distance from the check, # pointing in a random direction pos_offset = UNIT.rotate(random() * 2 * math.pi) * \ Constants.check_radius() * (15 * random() + 1) return PodState(pos=self.target.board.get_check(0) + pos_offset, angle=2 * math.pi * random() - math.pi)
def trainer(num_checks: int = 3) -> 'PodBoard': """ Generate a board with the given number of checks. They are all in a row, but at varying distances. The goal is to use it with gen_pods to generate test data with varying distances to the next check. """ checks = [ Vec2(Constants.check_radius() * ((i + 1)**2), Constants.world_y() / 2) for i in range(num_checks) ] # Shift the checks to center them width = checks[-1].x - checks[0].x x_start = (Constants.world_x() - width) / 2 - checks[0].x return PodBoard([check + Vec2(x_start, 0) for check in checks])
def train_progressively(self, dist_increment: int, ep_per_dist: int, num_incr: int, prob_rand_action: float = 0.5, learning_rate: float = 0.5, future_discount: float = 0.8) -> List[float]: """ Train by randomly generating pods close to the checkpoint, and gradually backing away :param dist_increment: Increment by which to increase the distance to the check :param ep_per_dist: Number of episodes to run at each increment :param num_incr: Number of distance increments to run :param prob_rand_action: :param learning_rate: :param future_discount: :return: List of rewards for each episode """ old_rew = self.reward_func self.reward_func = check_reward max_reward_per_ep = [] for incr in range(1, num_incr + 1): for ep_inc in range(ep_per_dist): # Position is (radius + increment) distance from check pos_offset = UNIT.rotate(random.random() * 2 * math.pi) * \ (Constants.check_radius() + dist_increment * incr) pod = PodState(pos=self.board.checkpoints[0] + pos_offset, angle=2 * math.pi * random.random() - math.pi) max_reward_per_ep.append( self.__do_train(pod, 5 * incr, prob_rand_action, learning_rate, future_discount)) self.reward_func = old_rew return max_reward_per_ep
def __draw_check(self, check: Vec2, idx: int) -> Circle: self.ax.annotate(str(idx), xy=(check.x, check.y), ha="center", fontsize=14) return Circle((check.x, check.y), Constants.check_radius())