Ejemplo n.º 1
0
def preprocess(state):
    if Config.instance().ENV_TYPE == Config.instance().CUSTOM or Config.instance().ENV_TYPE == Config.instance().HIT_PRACTICE:
        return preprocess_custom(state)
    elif Config.instance().ENV_TYPE == Config.instance().ATARI:
        return preprocess_gym(state)
    else:
        raise NotImplementedError
Ejemplo n.º 2
0
 def load_sounds():
     if Config.instance().ENABLE_AUDIO:
         pygame.mixer.init()
         Pong.sounds = {}
         Pong.sounds["return"] = pygame.mixer.Sound(Config.instance().AUDIO_DIR + "return.ogg")
         Pong.sounds["score"] = pygame.mixer.Sound(Config.instance().AUDIO_DIR + "score.ogg")
         Pong.sounds["bounce"] = pygame.mixer.Sound(Config.instance().AUDIO_DIR + "bounce.ogg")
def get_weight_image(model,
                     neuron=0,
                     layer=0,
                     size=(Config.instance().HEIGHT // 2,
                           Config.instance().WIDTH // 2)):
    weights = model.get_weights()[layer][:, neuron]
    # Normalize and scale weights to pixel values
    weights /= np.max(weights)
    weights += 1
    weights *= 256
    image = weights.reshape(size).astype(np.uint8)
    return image
Ejemplo n.º 4
0
    def act(self, state):
        """
        Infer action from state
        :param state: ndarray representing game state
        :return: (action id, confidence vector)
        """
        state = state.reshape([1, state.shape[0]])
        self.last_hidden_activation = self.hl_model.predict(
            state, batch_size=1).squeeze()
        prob = self.model.predict(state, batch_size=1).flatten()
        self.last_output = prob
        action = np.random.choice(self.action_size, 1, p=prob)[0]
        state_ravel = state.reshape(Config.instance().CUSTOM_STATE_SHAPE)
        self.last_state = np.rot90(state_ravel, axes=(0, 1), k=1).flatten()

        return action, prob
Ejemplo n.º 5
0
def setup_custom(up=True, angle=None, hit_practice=False):
    default_angle = 90 if up else -90
    cfg = Config()
    cfg.RANDOMIZE_START = False
    if hit_practice:
        cfg.ENV_TYPE = hit_practice
    if angle is not None:
        cfg.BALL_START_ANGLES = [angle]
    else:
        cfg.BALL_START_ANGLES = [default_angle]
    env = Pong(config=cfg)
    env.reset()
    return env
Ejemplo n.º 6
0
    def __init__(self, hit_practice=False, config=None):
        """
        Initialize basic game state
        :param hit_practice: Trigger training mode with a single paddle and randomly spawned balls
                             See the Ball class's hit_practice method.
        """
        if Pong.sounds is None:
            Pong.load_sounds()

        if config is None:
            config = Config.instance()

        self.config = config

        # Holds last raw screen pixels for rendering
        self.last_screen = None
        self.hit_practice = hit_practice
        self.score_bottom = 0
        self.score_top = 0
        self.bottom = Pong.Paddle("bottom", config=config) if not self.hit_practice else None
        self.top = Pong.Paddle("top", config=config)
        self.ball = Pong.Ball(hit_practice=hit_practice, config=config)
        self.frames = 0
            self.state.publish("paddle2/action", {"action": str(action)})
            self.state.publish("paddle2/frame", {"frame": current_frame_id})

        model_activation = self.agent.get_activation_packet()
        self.state.publish("ai/activation", model_activation)

        if len(self.frame_diffs) > 1000:
            print(
                f"Frame distribution: mean {np.mean(self.frame_diffs)}, stdev {np.std(self.frame_diffs)} counts {np.unique(self.frame_diffs, return_counts=True)}"
            )
            self.frame_diffs = []

    def __init__(self, config, paddle1=True):
        self.config = config
        self.paddle1 = paddle1
        self.paddle2 = not self.paddle1
        self.agent = PGAgent(self.config.CUSTOM_STATE_SIZE,
                             self.config.CUSTOM_ACTION_SIZE)
        self.agent.load(AIDriver.MODEL)
        self.state = AISubscriber(
            self.config, trigger_event=lambda: self.publish_inference())
        self.last_frame_id = self.state.frame
        self.last_tick = time.time()
        self.frame_diffs = []
        self.state.start()


if __name__ == "__main__":
    config = Config.instance()
    instance = AIDriver(config)
from multiprocessing import Pool
from tqdm import tqdm
"""
This file is the driver for training a new DRL pong model.
It brings together the following elements:

* The environment simulator (either the custom one found in pong.py or the Atari emulator provided by OpenAI Gym)
  Both environments are wrapped by the interface in simulator.py
* The two agents (some combination of human-controlled, DRL, and hard-coded agents found in player.py)
The level of abstraction in this file is pretty high, and it really only exists to further abstract the training
process into a few environmental and training hyperparameters that are easy to experiment with and to provide
convenient monitoring and graphing of the training process.
"""

GAME_BATCH = 10
MODE = Config.instance().HIT_PRACTICE  # Config.instance().CUSTOM
LEARNING_RATE = 0.001
DENSE_STRUCTURE = (200, )
ALWAYS_FOLLOW = False
PARALLELIZE = False

if __name__ == "__main__":
    # Ensure directory safety
    os.makedirs("models/bottom", exist_ok=True)
    os.makedirs("models/top", exist_ok=True)
    os.makedirs("analytics", exist_ok=True)
    os.makedirs("analytics/plots", exist_ok=True)

    # Initialize for checks & scope
    start_index = None
Ejemplo n.º 9
0
 def play_sound(sound):
     if Config.instance().ENABLE_AUDIO and Pong.sounds is not None and sound in Pong.sounds:
         try:
             playback = Pong.sounds[sound].play()
         except Exception as e:
             print(e)
Ejemplo n.º 10
0
 def random_action():
     return choice(Config.instance().ACTIONS)
Ejemplo n.º 11
0
import numpy as np
import cv2
import math
import keyboard
import time
from random import choice, randint
from exhibit.shared.config import Config

if Config.instance().ENABLE_AUDIO:
    import pygame.mixer


class Pong:
    """
    This class captures all of the game logic for Pong.
    It was used instead of OpenAI Gym or various other publicly available alternatives
    in order to allow for complete flexibility.
    """
    sounds = None

    @staticmethod
    def read_key(up, down):
        """
        Converts keyboard state to internal action state
        :param up: key code for "up" control
        :param down: key code for "down" control
        :return: Action code: 0 for up, 1 for down, 2 for nothing
        """
        if keyboard.is_pressed(up):
            return 0
        elif keyboard.is_pressed(down):
def simulate_game(config, env_type=Config.instance().CUSTOM, left=None, right=None, batch=1, visualizer=None):
    """
    Wraps both the OpenAI Gym Atari Pong environment and the custom
    Pong environment in a common interface, useful to test the same training setup
    against both environments
    """
    env = None
    state_size = None
    games_remaining = batch
    state_shape = config.CUSTOM_STATE_SHAPE

    if env_type == config.CUSTOM:
        env = Pong()
        state_size = config.CUSTOM_STATE_SIZE
        state_shape = config.CUSTOM_STATE_SHAPE
        if type(left) == BotPlayer: left.attach_env(env)
        if type(right) == BotPlayer: right.attach_env(env)
    elif env_type == config.HIT_PRACTICE:
        env = Pong(hit_practice=True)
        state_size = config.CUSTOM_STATE_SIZE
        state_shape = config.CUSTOM_STATE_SHAPE
        if type(right) == BotPlayer: right.attach_env(env)

    # Training data
    states = []
    states_flipped = []
    actions_l = []
    actions_r = []
    rewards_l = []
    rewards_r = []
    probs_l = []
    probs_r = []

    # Prepare to collect fun data for visualizations
    render_states = []
    model_states = []
    score_l = 0
    score_r = 0
    last_state = np.zeros(state_shape)
    state = env.reset()
    if visualizer is not None:
        visualizer.base_render(utils.preprocess_custom(state))
    i = 0
    while True:
        render_states.append(state.astype(np.uint8))
        current_state = utils.preprocess_custom(state)
        diff_state = current_state - last_state
        model_states.append(diff_state.astype(np.uint8))
        diff_state_rev = np.flip(diff_state, axis=1)
        last_state = current_state
        action_l, prob_l, action_r, prob_r = None, None, None, None
        x = diff_state.ravel()
        x_flip = diff_state_rev.ravel()
        if left is not None: action_l, prob_l = left.act(x_flip)
        if right is not None: action_r, prob_r = right.act(x)
        states.append(x)

        state, reward, done = None, None, None
        if env_type == config.HIT_PRACTICE:
            state, reward, done = env.step(None, config.ACTIONS[action_r], frames=config.AI_FRAME_INTERVAL)
        else:
            state, reward, done = env.step(config.ACTIONS[action_l], config.ACTIONS[action_r], frames=config.AI_FRAME_INTERVAL)

        reward_l = float(reward[0])
        reward_r = float(reward[1])

        # Save observations
        probs_l.append(prob_l)
        probs_r.append(prob_r)
        actions_l.append(action_l)
        actions_r.append(action_r)
        rewards_l.append(reward_l)
        rewards_r.append(reward_r)

        if reward_r < 0: score_l -= reward_r
        if reward_r > 0: score_r += reward_r

        if done:
            games_remaining -= 1
            print('Score: %f - %f.' % (score_l, score_r))
            utils.write(f'{score_l},{score_r}', f'analytics/scores.csv')
            if games_remaining == 0:
                metadata = (render_states, model_states, (score_l, score_r))
                return states, (actions_l, probs_l, rewards_l), (actions_r, probs_r, rewards_r), metadata
            else:
                score_l, score_r = 0, 0
                state = env.reset()
        i += 1