Beispiel #1
0
    def tester() -> 'PodBoard':
        """
        Generate a board laid out to test as many situations as possible
        (start) -> 0 -> 1: straight line
        1 -> 2: 180° turn
        2 -> 3: 90° turn
        3 -> 4 -> 5 -> 6: curve around to the right
        6 -> 7 (start): curve to the left
        """
        checks = []
        start = Vec2(Constants.world_x() / 10, Constants.world_y() / 2)
        checks.append(start + Vec2(5000, 0))  # straight ahead
        checks.append(checks[-1] + Vec2(6000, 0))  # straight ahead

        checks.append(checks[-1] + Vec2(-3000, 0))  # straight back

        checks.append(checks[-1] + Vec2(0, 2500))  # turn 90°

        checks.append(checks[-1] + Vec2(-3000, 1500))  # curve around
        checks.append(checks[-1] + Vec2(-3000, -1500))  # curve around
        checks.append(checks[-1] + Vec2(0, -5500))  # curve around

        checks.append(start)  # turn other way

        return PodBoard(checks)
Beispiel #2
0
def _prepare_size():
    plt.rcParams['figure.figsize'] = [
        Constants.world_x() / 1000,
        Constants.world_y() / 1000
    ]
    plt.rcParams['figure.dpi'] = 100
    matplotlib.rcParams['animation.embed_limit'] = 2**27
Beispiel #3
0
    def grid(rows: int = 3,
             cols: int = 3,
             x_spacing: int = 4000,
             y_spacing: int = 3000) -> 'PodBoard':
        """
        Generate a board with checks in grid form:
        1 2 3
        4 5 6
        7 8 9
        """
        checks = []

        x_center = Constants.world_x() / 2
        y_center = Constants.world_y() / 2

        # 5 rows: -2, -1, 0, 1, 2
        # 4 rows: -1.5, -0.5, 0.5, 1.5
        # 3 rows: -1, 0, 1
        # => start at -(r-1)/2
        row_start = (1 - rows) / 2
        col_start = (1 - cols) / 2

        for row in range(rows):
            y_off = (row_start + row) * y_spacing
            for col in range(cols):
                x_off = (col_start + col) * x_spacing
                checks.append(Vec2(x_center + x_off, y_center + y_off))

        return PodBoard(checks)
Beispiel #4
0
def _get_field_artist() -> Rectangle:
    """
    Get an artist to draw the board
    """
    return Rectangle((0, 0),
                     Constants.world_x(),
                     Constants.world_y(),
                     ec="black",
                     fc="white")
Beispiel #5
0
 def circle(num_points: int = 3, radius: float = 4000) -> 'PodBoard':
     """
     Generate a PodBoard with checkpoints arranged in a circle around the
     center of the board
     """
     center = Vec2(Constants.world_x() / 2, Constants.world_y() / 2)
     angle_diff = 2 * math.pi / num_points
     v = UNIT * radius
     checks = [center + v.rotate(i * angle_diff) for i in range(num_points)]
     return PodBoard(checks)
Beispiel #6
0
    def __init__(self, board: PodBoard):
        super().__init__()

        # Allow the agent to go beyond the bounds - due to the nature of
        # the rounding functions, it's unlikely the agent will ever give
        # us the actual min or max
        scaled_max_turn = Constants.max_turn() * 1.1
        scaled_max_thrust = Constants.max_thrust() + 2 * THRUST_PADDING
        angle_spec = array_spec.BoundedArraySpec(
            (),
            np.float,
            minimum=-scaled_max_turn,
            maximum=scaled_max_turn)
        thrust_spec = array_spec.BoundedArraySpec(
            (),
            np.int32,
            minimum=0,
            maximum=scaled_max_thrust)
        self._action_spec = {
            'angle': angle_spec,
            'thrust': thrust_spec
        }

        angles_spec = array_spec.BoundedArraySpec(
            (3,),
            np.float,
            minimum=-math.pi,
            maximum=math.pi)
        dist_spec = array_spec.BoundedArraySpec(
            (3,),
            np.float,
            minimum=0,
            maximum=Constants.world_x() * 10)

        self._observation_spec = {
            'angles': angles_spec,
            'distances': dist_spec
        }

        self._time_step_spec = ts.TimeStep(
            step_type=array_spec.ArraySpec(shape=(), dtype=np.int32, name='step_type'),
            reward=array_spec.ArraySpec(shape=(), dtype=np.float32, name='reward'),
            discount=array_spec.ArraySpec(shape=(), dtype=np.float32, name='discount'),
            observation=self._observation_spec
        )

        self._board = board
        self._player = Player(AgentController())
        self._initial_state = self.get_state()
        self._episode_ended = False
Beispiel #7
0
    def trainer(num_checks: int = 3) -> 'PodBoard':
        """
        Generate a board with the given number of checks.
        They are all in a row, but at varying distances.
        The goal is to use it with gen_pods to generate test data with varying distances to the next check.
        """
        checks = [
            Vec2(Constants.check_radius() * ((i + 1)**2),
                 Constants.world_y() / 2) for i in range(num_checks)
        ]

        # Shift the checks to center them
        width = checks[-1].x - checks[0].x
        x_start = (Constants.world_x() - width) / 2 - checks[0].x

        return PodBoard([check + Vec2(x_start, 0) for check in checks])
Beispiel #8
0
    def __generate_random_checks(self):
        min_x = Constants.border_padding()
        min_y = Constants.border_padding()
        max_x = Constants.world_x() - Constants.border_padding()
        max_y = Constants.world_y() - Constants.border_padding()
        min_dist_sq = Constants.check_spacing() * Constants.check_spacing()
        self.checkpoints = []

        num_checks = random.randrange(Constants.min_checks(),
                                      Constants.max_checks())
        while len(self.checkpoints) < num_checks:
            check = Vec2(random.randrange(min_x, max_x, 1),
                         random.randrange(min_y, max_y, 1))
            too_close = next((True for x in self.checkpoints
                              if (x - check).square_length() < min_dist_sq),
                             False)
            if not too_close:
                self.checkpoints.append(check)
Beispiel #9
0
 def play(self, pod: PodState) -> PlayOutput:
     return PlayOutput(
         Vec2(random() * Constants.world_x(), random() * Constants.world_y()),
         math.ceil(random() * Constants.max_thrust())
     )
Beispiel #10
0
import math
from typing import Tuple, Callable, List

from pod.ai.ai_utils import MAX_DIST
from pod.board import PodBoard
from pod.constants import Constants
from pod.util import PodState, clean_angle

#################################################
# Reward functions: signature is
# func(board, state) -> float
#################################################

RewardFunc = Callable[[PodBoard, PodState], float]
DIST_BASE = math.sqrt(Constants.world_x() * Constants.world_y())

def pgr(board: PodBoard, pod: PodState) -> float:
    """
    Pretty Good Reward
    Attempts to estimate the distance without using a SQRT calculation.
    """
    pod_to_check = board.checkpoints[pod.nextCheckId] - pod.pos
    prev_to_next_check = board.checkpoints[pod.nextCheckId] - board.get_check(pod.nextCheckId - 1)
    pod_dist_estimate = (math.fabs(pod_to_check.x) + math.fabs(pod_to_check.y)) / 2
    check_dist_estimate = (math.fabs(prev_to_next_check.x) + math.fabs(prev_to_next_check.y)) / 2
    dist_estimate = pod_dist_estimate / check_dist_estimate

    checks_hit = len(board.checkpoints) * pod.laps + pod.nextCheckId

    return 2*checks_hit - dist_estimate + 1
Beispiel #11
0
 def __prepare_for_world(self):
     self.fig = plt.figure()
     self.ax = plt.axes(xlim=(-PADDING, Constants.world_x() + PADDING),
                        ylim=(-PADDING, Constants.world_y() + PADDING))
     self.ax.invert_yaxis()
Beispiel #12
0
 def random() -> 'PodState':
     return PodState(pos=Vec2.random(Constants.world_x(),
                                     Constants.world_y()),
                     vel=UNIT.rotate(2 * math.pi * random()) *
                     (random() * Constants.max_vel()),
                     angle=2 * math.pi * random())
Beispiel #13
0
 def _get_reward(self) -> int:
     reward = Constants.world_x() * Constants.world_y()
     reward += self._player.pod.nextCheckId * 10000
     reward -= (self._world.checkpoints[self._player.pod.nextCheckId] -  self._player.pod.pos).square_length()
     return np.asarray(reward, dtype=np.float32)
Beispiel #14
0
import math
from typing import List

import numpy as np
from pod.constants import Constants
from pod.controller import Controller
from pod.util import PodState, clean_angle
from vec2 import Vec2, UNIT

# Distance to use for scaling inputs
MAX_DIST = Vec2(Constants.world_x(), Constants.world_y()).length()


def gen_pods(checks: List[Vec2], pos_angles: List[float],
             pos_dists: List[float], angles: List[float],
             vel_angles: List[float], vel_mags: List[float]):
    """
    Generate pods in various states
    :param checks: Checkpoints around which to generate
    :param pos_angles: Angles from check to pod
    :param pos_dists: Distances from check to pod
    :param angles: Orientations of pods. This will be rotated so that 0 points toward the check!
    :param vel_angles: Angles of velocity. Also rotated so that 0 points toward the check.
    :param vel_mags: Magnitudes of velocity
    :return: One pod for each combination of parameters
    """
    relative_poss = [
        UNIT.rotate(ang) * dist for ang in pos_angles for dist in pos_dists
    ]
    relative_vels = [
        UNIT.rotate(ang) * mag for ang in vel_angles for mag in vel_mags