def get_last_atari_game_versions():
    atari_games = []
    for game in list_games():
        versions = [game_id for game_id in list(registry.env_specs.keys()) if game.replace('_','') + 'noframeskip-v' in game_id.lower()]
        if len(versions)>0:
            atari_games.append(str(np.sort(versions)[-1]))
    return atari_games
Beispiel #2
0
 def __init__(self, env_name):
     game_path = gym.make(env_name).env.game_path
     if not os.path.exists(game_path):
         raise IOError('Requested environment (%s) does not exist '
                       'in valid list of environments:\n%s' \
                       % (env_name, ', '.join(sorted(atari_py.list_games()))))
     super(Rom, self).__init__(game_path)
Beispiel #3
0
    def _args(self):
        parser = argparse.ArgumentParser()
        available_games = list((''.join(x.capitalize() or '_'
                                        for x in word.split('_'))
                                for word in atari_py.list_games()))
        parser.add_argument("-g",
                            "--game",
                            help="Choose from available games: " +
                            str(available_games) + ". Default is 'breakout'.",
                            default="Breakout")
        parser.add_argument(
            "-m",
            "--mode",
            help=
            "Choose from available modes: ddqn_train, ddqn_test, ge_train, ge_test. Default is 'ddqn_training'.",
            default="ddqn_training")
        parser.add_argument(
            "-r",
            "--render",
            help="Choose if the game should be rendered. Default is 'False'.",
            default=False,
            type=bool)
        parser.add_argument(
            "-tsl",
            "--total_step_limit",
            help=
            "Choose how many total steps (frames visible by agent) should be performed. Default is '5000000'.",
            default=5000000,
            type=int)
        parser.add_argument(
            "-trl",
            "--total_run_limit",
            help=
            "Choose after how many runs we should stop. Default is None (no limit).",
            default=None,
            type=int)
        parser.add_argument(
            "-c",
            "--clip",
            help=
            "Choose whether we should clip rewards to (0, 1) range. Default is 'True'",
            default=True,
            type=bool)
        args = parser.parse_args()
        game_mode = args.mode
        game_name = args.game
        render = args.render
        total_step_limit = args.total_step_limit
        total_run_limit = args.total_run_limit
        clip = args.clip

        print("Selected game: " + str(game_name))
        print("Selected mode: " + str(game_mode))
        print("Should render: " + str(render))
        print("Should clip: " + str(clip))
        print("Total step limit: " + str(total_step_limit))
        print("Total run limit: " + str(total_run_limit))

        return game_name, game_mode, render, total_step_limit, total_run_limit, clip
Beispiel #4
0
 def create_env(env_name, algo, n_envs):
     env = gym.make(env_name)
     # Wrap atari-environment with the deepmind-style wrapper
     atari_env_list = atari_py.list_games()
     for atari_env in atari_env_list:
         if atari_env in env_name.lower():
             env = wrap_deepmind(env)
             env = TransposeFrame(env)
             break
     # Parallelize the environment
     env = ParallelEnv(n_envs, env)
     return env
Beispiel #5
0
def is_atari(environment):
    for field in [
            'ramDeterministic', 'ramNoFrameSkip', 'NoFrameskip',
            'Deterministic', 'ram'
    ]:
        environment = environment.replace(field, '')
    environment = re.sub(r'-v\d+', '', environment)
    environment = camel_to_snake_case(environment)
    if environment in ap.list_games():
        return True
    else:
        return False
Beispiel #6
0
    def start(self):
        import gym  # lazy-loading
        gym.logger.set_level(40)  # avoid annoying internal warn messages

        self._env = gym.make(self.id)

        try:
            import atari_py
        except ImportError:
            return

        if any([env_name in self.id.lower() for env_name in atari_py.list_games()]):
            self._env = AtariPreprocessing(self._env.env)
Beispiel #7
0
def atari0():
    '''
        Getter for a list of all the -v0 atari games provided in openAI gym
        
        Returns:
            The list of -v0 atari games
    '''
    import atari_py as ap
    games = []
    for g in ap.list_games():
        gg = g.split("_")
        gg = [g.capitalize() for g in gg]
        games.append(''.join(gg) + "-v0")
    return games
Beispiel #8
0
def get_gym_submodules_and_environments():
    atari_games = atari_py.list_games()

    print('Searching gym.envs for submodules:')
    environments =  gym.envs.registry.all()

    for importer, modname, ispkg in pkgutil.iter_modules(gym.envs.__path__):
        print('  Found submodule {} (Package: {})'.format(modname, ispkg))
        try:
            m = importer.find_module(modname).load_module(modname)
        except gym.error.DependencyNotInstalled:
            pass
        if ispkg:
            for importer, modname, ispkg in pkgutil.iter_modules(getattr(gym.envs, modname).__path__):
                print('    Found environment {}'.format(modname))
Beispiel #9
0
    def start(self):
        import gym  # lazy-loading
        gym.logger.set_level(40)  # avoid annoying internal warn messages

        self._env = gym.make(self.id)

        try:
            import atari_py
            from reaver.envs.atari import AtariPreprocessing
        except ImportError:
            return

        if any([env_name in self.id.lower() for env_name in atari_py.list_games()]):
            self._env = AtariPreprocessing(self._env.env)

        self.make_specs(running=True)
Beispiel #10
0
def get_games():
    not_in = [
        'adventure',
        'air_raid',
        'carnival',
        'elevator_action',
        'journey_escape',
        'kaboom',
        'pooyan',
    ]
    games = atari_py.list_games()

    games = list(set(games) - set(not_in))

    games.sort()
    games = [upper_1st(game) + "-v0" for game in games]
    return games
Beispiel #11
0
import numpy as np

from tqdm import trange

from curl_rainbow.env import Env
from curl_rainbow.test import test
from curl_rainbow.agent import Agent
from curl_rainbow.memory import ReplayMemory

seed = np.random.randint(12345)
# Note that hyperparameters may originally be reported in ATARI game frames instead of agent steps
parser = argparse.ArgumentParser(description='Rainbow')
parser.add_argument('--id', type=str, default='default', help='Experiment ID')
parser.add_argument('--seed', type=int, default=seed, help='Random seed')
parser.add_argument('--disable-cuda', action='store_true', help='Disable CUDA')
parser.add_argument('--game', type=str, default='ms_pacman', choices=atari_py.list_games(), help='ATARI game')
parser.add_argument('--T-max', type=int, default=int(1e5), metavar='STEPS', help='Number of training steps (4x number of frames)')
parser.add_argument('--max-episode-length', type=int, default=int(108e3), metavar='LENGTH', help='Max episode length in game frames (0 to disable)')
parser.add_argument('--history-length', type=int, default=4, metavar='T', help='Number of consecutive states processed')
parser.add_argument('--architecture', type=str, default='data-efficient', choices=['canonical', 'data-efficient'], metavar='ARCH', help='Network architecture')
parser.add_argument('--hidden-size', type=int, default=256, metavar='SIZE', help='Network hidden size')
parser.add_argument('--noisy-std', type=float, default=0.1, metavar='σ', help='Initial standard deviation of noisy linear layers')
parser.add_argument('--atoms', type=int, default=51, metavar='C', help='Discretised size of value distribution')
parser.add_argument('--V-min', type=float, default=-10, metavar='V', help='Minimum of value distribution support')
parser.add_argument('--V-max', type=float, default=10, metavar='V', help='Maximum of value distribution support')
parser.add_argument('--model', type=str, metavar='PARAMS', help='Pretrained model (state dict)')
parser.add_argument('--memory-capacity', type=int, default=int(1e5), metavar='CAPACITY', help='Experience replay memory capacity')
parser.add_argument('--replay-frequency', type=int, default=1, metavar='k', help='Frequency of sampling from memory')
parser.add_argument('--priority-exponent', type=float, default=0.5, metavar='ω', help='Prioritised experience replay exponent (originally denoted α)')
parser.add_argument('--priority-weight', type=float, default=0.4, metavar='β', help='Initial prioritised experience replay importance sampling weight')
parser.add_argument('--multi-step', type=int, default=20, metavar='n', help='Number of steps for multi-step return')
Beispiel #12
0
def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--total-env-steps',
        type=int,
        default=10000000,
        help='Total number to env steps to train (default: 100000)')
    parser.add_argument('--num-envs',
                        type=int,
                        default=64,
                        help='Number of parallel envs to run')
    parser.add_argument('--num-trainers',
                        type=int,
                        default=1,
                        help='Number of training workers')
    parser.add_argument('--num-reanalyze-envs',
                        type=int,
                        default=64,
                        help='Number of parallel envs to run')
    parser.add_argument('--num-reanalyze-workers',
                        type=int,
                        default=1,
                        help='Number of parallel envs to run')
    parser.add_argument('--sync-envs', action='store_true')
    parser.add_argument('--debug-reanalyze', action='store_true')
    parser.add_argument('--fp16', action='store_true')
    parser.add_argument('--buffer-size', type=int, default=500000)
    parser.add_argument('--target-update-interval',
                        type=int,
                        default=100,
                        help="Number of gradient steps for each update to the "
                        "target network.  <=0 to disable target network.")
    parser.add_argument('--no-gpu-0-train', action='store_true')
    parser.add_argument('--input-priorities', action='store_true')
    parser.add_argument('--q-dirichlet', action='store_true')
    parser.add_argument(
        '--cpu-search',
        action='store_true',
        help="Put everything except MCTS inference calls on CPU")
    parser.add_argument('--epoch-steps',
                        type=int,
                        default=50,
                        help="Number of gradient steps between loggings.")
    parser.add_argument(
        '--replay-ratio',
        type=float,
        default=-2.,
        help="Upper bound of async replay ratio.  -1 disables.")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help='Random seed to use')
    parser.add_argument('--game',
                        type=str,
                        default='space_invaders',
                        choices=atari_py.list_games(),
                        help='ATARI game')
    parser.add_argument(
        '--framestack',
        type=int,
        default=4,
        metavar='T',
        help='Number of consecutive frames stacked to form an observation')
    parser.add_argument('--grayscale', action='store_true')
    parser.add_argument(
        '--max-episode-length',
        type=int,
        default=int(108e3),
        metavar='LENGTH',
        help='Max episode length in game frames (0 to disable)')
    parser.add_argument('--discount', type=float, default=0.99)
    parser.add_argument(
        '--evaluation-episodes',
        type=int,
        default=8,
        help='Number of episodes to average over when evaluating')

    # MCTS arguments
    parser.add_argument('--num-simulations', type=int, default=10)
    parser.add_argument('--eval-simulations', type=int, default=25)
    parser.add_argument('--virtual-threads', type=int, default=3)
    parser.add_argument('--virtual-loss-c', type=int, default=1.)
    parser.add_argument('--c1',
                        type=float,
                        default=1.25,
                        help='UCB c1 constant')
    parser.add_argument('--dirichlet-alpha',
                        type=float,
                        default=0.25,
                        help='Root dirichlet alpha')
    parser.add_argument(
        '--visit-temp',
        type=float,
        default=0.5,
        help='Visit counts softmax temperature for sampling actions')

    # PiZero arguments
    parser.add_argument('--batch-size-per-worker',
                        type=int,
                        default=128,
                        help='Batch size per GPU to use during training')
    parser.add_argument('--learning-rate',
                        type=float,
                        default=0.0003,
                        metavar='η',
                        help='Learning rate')
    parser.add_argument('--optim',
                        type=str,
                        default='adam',
                        choices=["adam", "sgd"],
                        help='Optimizer')
    parser.add_argument('--lr-decay-steps',
                        type=float,
                        default=350.e3,
                        help='Learning rate decay time constant')
    parser.add_argument('--lr-decay',
                        type=float,
                        default=0.1,
                        help='Learning rate decay scale')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.9,
                        help='SGD momentum')
    parser.add_argument('--grad-scale-factor',
                        type=float,
                        default=0.5,
                        help='Rollout gradient downscaling. 1 disables')
    parser.add_argument('--adam-eps',
                        type=float,
                        default=1e-4,
                        help='Adam epsilon')
    parser.add_argument('--weight-decay',
                        type=float,
                        default=1e-4,
                        help='Weight decay regularization constant')
    parser.add_argument('--hidden-size',
                        type=int,
                        default=256,
                        help='Hidden size of various MLPs')
    parser.add_argument('--dynamics-blocks',
                        type=int,
                        default=16,
                        help='# of resblocks in dynamics model')
    parser.add_argument('--multistep',
                        type=int,
                        default=1,
                        help='n-step for bootstrapping value targets')
    parser.add_argument('--training-start',
                        type=int,
                        default=100000,
                        help='env steps to wait before starting training')
    parser.add_argument(
        '--priority-exponent',
        type=float,
        default=1.,
        metavar='ω',
        help='Prioritised experience replay exponent (originally denoted α)')
    parser.add_argument(
        '--priority-weight',
        type=float,
        default=1.,
        metavar='β',
        help='Initial prioritised experience replay importance sampling weight'
    )
    parser.add_argument('--jumps', type=int, default=5, help='')
    parser.add_argument('--value-loss-weight', type=float, default=1.)
    parser.add_argument('--policy-loss-weight', type=float, default=1.)
    parser.add_argument('--reward-loss-weight', type=float, default=1.)
    parser.add_argument('--contrastive-loss-weight', type=float, default=1.)
    parser.add_argument('--entropy-loss-weight', type=float, default=0.0)
    parser.add_argument('--init-value-scale', type=float, default=1.)
    parser.add_argument('--film', action='store_true')
    parser.add_argument('--profile', action='store_true')
    parser.add_argument('--no-nce', action='store_true')
    parser.add_argument('--reanalyze', action='store_true')
    parser.add_argument('--use-all-targets', action='store_true')
    parser.add_argument('--q-learning', action='store_true')
    parser.add_argument('--local-target-net', action='store_true')
    parser.add_argument('--epsilon', type=int, default=0.01)
    parser.add_argument('--no-search-value-targets', action='store_true')
    parser.add_argument('--prioritized', action='store_true')
    parser.add_argument(
        '--evaluation-interval',
        type=int,
        default=80000,
        help='Evaluate after every {evaluation-interval} env steps')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=3200,
        help='Evaluate after every {evaluation-interval} env steps')

    parser.add_argument('--wandb-proj', type=str, default='pizero')
    parser.add_argument('--name', type=str, default='')
    parser.add_argument('--savedir', type=str, default='./out')

    args = parser.parse_args()
    args.max_episode_length = int(108e3)
    args.batch_size = args.batch_size_per_worker * args.num_trainers
    return args
Beispiel #13
0
import sys
for p in sys.path:
    print(p)
import atari_py
print(atari_py.list_games())
Beispiel #14
0
import gym
import atari_py as ap

all_envs = gym.envs.registry.all()
env_ids = [env_spec.id for env_spec in all_envs]
env_ids = [x for x in env_ids if '-ram' not in x]
env_ids = [x for x in env_ids if 'Deterministic' not in x]
env_ids = [x for x in env_ids if 'NoFrameskip' in x]

atari_games = ap.list_games()
env_ids = [
    x for x in env_ids if any(
        game.replace('_', '') in x.lower() for game in atari_games)
]
env_ids = [x for x in env_ids if 'v4' in x]

#env_p = gym.make('PitfallNoFrameskip-v4')
#import pdb; pdb.set_trace()
action_meanings = [(x, gym.make(x).unwrapped.get_action_meanings())
                   for x in env_ids]
for pair in action_meanings:
    print(pair[0], *pair[1])

#print([game for game in atari_games if all(game.replace('_', '') not in x.lower() for x in env_ids)])
# ['kaboom', 'adventure', 'defender']  these games are missing from gym
# # Collect all atari environments from gym

# In[2]:

envids = [spec.id for spec in gym.envs.registry.all()]
dir(gym.envs.registry)

envs_pixel = []
for envid in envids:
    if 'ram' not in envid and '0' in envid:
        envs_pixel.append(envid)

# In[3]:

# a = [gym.make(env).action_space for env in envs_pixel]
games = ap.list_games()
games.sort()

# In[4]:


def snake_to_camel(word):
    return ''.join(x.capitalize() or '_' for x in word.split('_'))


games = [snake_to_camel(game) + '-v0' for game in games]
envs = list(map(gym.make, [game for game in games if game in envs_pixel]))

# In[5]:

games = sorted([(game, gym.make(game).action_space.n)
Beispiel #16
0
class Environment(gym.Env, utils.EzPickle):
    """
    The Atari Environment
        (inherits from the openai gym environment: https://gym.openai.com/docs/)
    
    Environment(
        game="pong",                   # use Environment.available_games to see available games
        mode=None,                     # use Environment.available_modes_for(game) to see this list
        difficulty=None,
        obs_type="image",              # or "ram"
        frameskip=(2, 5),              # random number between 2 and 5 
        repeat_action_probability=0.0, # 0 means deterministic
        full_action_space=False,
    )
    """
    metadata = {"render.modes": ["human", "rgb_array"]}
    available_games = atari_py.list_games()

    @classmethod
    def available_modes_for(game):
        ale = atari_py.ALEInterface()
        # load up the game
        ale.setInt(b"random_seed", 0)
        ale.loadROM(atari_py.get_game_path(game))
        return ale.getAvailableModes()

    @classmethod
    def _to_ram(ale):
        ram_size = ale.getRAMSize()
        ram = np.zeros((ram_size), dtype=np.uint8)
        ale.getRAM(ram)
        return ram

    def __init__(
        self,
        game="pong",
        mode=None,
        difficulty=None,
        obs_type="image",
        frameskip=(2, 5),
        repeat_action_probability=0.0,
        full_action_space=False,
    ):
        """
        Arguments:
            game: the name of the game ("pong", "Enduro", etc) dont add the "-v0"
            mode: different modes are available for different games.
            frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.
        """

        utils.EzPickle.__init__(self, game, mode, difficulty, obs_type,
                                frameskip, repeat_action_probability)
        assert obs_type in ("ram", "image")

        self.game = game
        self.game_path = atari_py.get_game_path(game)
        self.game_mode = mode
        self.game_difficulty = difficulty

        if not os.path.exists(self.game_path):
            msg = "You asked for game %s but path %s does not exist"
            raise IOError(msg % (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat("repeat_action_probability".encode("utf-8"),
                          repeat_action_probability)

        self.seed()

        self._action_set = (self.ale.getLegalActionSet() if full_action_space
                            else self.ale.getMinimalActionSet())
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == "ram":
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                dtype=np.uint8,
                                                shape=(128, ))
        elif self._obs_type == "image":
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3),
                                                dtype=np.uint8)
        else:
            raise error.Error("Unrecognized observation type: {}".format(
                self._obs_type))

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        # Empirically, we need to seed before loading the ROM.
        self.ale.setInt(b"random_seed", seed2)
        self.ale.loadROM(self.game_path)

        if self.game_mode is not None:
            modes = self.ale.getAvailableModes()

            assert self.game_mode in modes, (
                'Invalid game mode "{}" for game {}.\nAvailable modes are: {}'
            ).format(self.game_mode, self.game, modes)
            self.ale.setMode(self.game_mode)

        if self.game_difficulty is not None:
            difficulties = self.ale.getAvailableDifficulties()

            assert self.game_difficulty in difficulties, (
                'Invalid game difficulty "{}" for game {}.\nAvailable difficulties are: {}'
            ).format(self.game_difficulty, self.game, difficulties)
            self.ale.setDifficulty(self.game_difficulty)

        return [seed1, seed2]

    def step(self, a):
        reward = 0.0
        action = self._action_set[a]

        if isinstance(self.frameskip, int):
            num_steps = self.frameskip
        else:
            num_steps = self.np_random.randint(self.frameskip[0],
                                               self.frameskip[1])
        for _ in range(num_steps):
            reward += self.ale.act(action)
        ob = self._get_obs()

        return ob, reward, self.ale.game_over(), {
            "ale.lives": self.ale.lives()
        }

    def _get_image(self):
        return self.ale.getScreenRGB2()

    def _get_ram(self):
        return Environment._to_ram(self.ale)

    @property
    def _n_actions(self):
        return len(self._action_set)

    def _get_obs(self):
        if self._obs_type == "ram":
            return self._get_ram()
        elif self._obs_type == "image":
            img = self._get_image()
        return img

    # return: (states, observations)
    def reset(self):
        self.ale.reset_game()
        return self._get_obs()

    def render(self, mode="human"):
        img = self._get_image()
        if mode == "rgb_array":
            return img
        elif mode == "human":
            from gym.envs.classic_control import rendering

            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)
            return self.viewer.isopen

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self._action_set]

    def get_keys_to_action(self):
        KEYWORD_TO_KEY = {
            "UP": ord("w"),
            "DOWN": ord("s"),
            "LEFT": ord("a"),
            "RIGHT": ord("d"),
            "FIRE": ord(" "),
        }

        keys_to_action = {}

        for action_id, action_meaning in enumerate(self.get_action_meanings()):
            keys = []
            for keyword, key in KEYWORD_TO_KEY.items():
                if keyword in action_meaning:
                    keys.append(key)
            keys = tuple(sorted(keys))

            assert keys not in keys_to_action
            keys_to_action[keys] = action_id

        return keys_to_action

    def clone_state(self):
        """Clone emulator state w/o system state. Restoring this state will
        *not* give an identical environment. For complete cloning and restoring
        of the full state, see `{clone,restore}_full_state()`."""
        state_ref = self.ale.cloneState()
        state = self.ale.encodeState(state_ref)
        self.ale.deleteState(state_ref)
        return state

    def restore_state(self, state):
        """Restore emulator state w/o system state."""
        state_ref = self.ale.decodeState(state)
        self.ale.restoreState(state_ref)
        self.ale.deleteState(state_ref)

    def clone_full_state(self):
        """Clone emulator state w/ system state including pseudorandomness.
        Restoring this state will give an identical environment."""
        state_ref = self.ale.cloneSystemState()
        state = self.ale.encodeState(state_ref)
        self.ale.deleteState(state_ref)
        return state

    def restore_full_state(self, state):
        """Restore emulator state w/ system state including pseudorandomness."""
        state_ref = self.ale.decodeState(state)
        self.ale.restoreSystemState(state_ref)
        self.ale.deleteState(state_ref)
Beispiel #17
0
import numpy as np
import cv2
import gym
#colab install for universe
#!git clone https://github.com/openai/universe.git
#!cd universe
#!pip install -e .
#!pip install 'gym[atari]'
#!pip install universe
import universe # register the universe environments
import atari_py as ap #for list
from collections import defaultdict #for Q ie: state Q values store
from collections import namedtuple #for transistions store ie: memory, buffer, ReplayMemory

# list of the games
game_list = ap.list_games()
print(sorted(game_list))

# custom weights initialization
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

def conv(in_channels, out_channels, kernel_size, stride=2, padding=1, batch_norm=True, init_zero_weights=False):
    """Creates a convolutional layer, with optional batch normalization.
    """
    layers = []
Beispiel #18
0
from env import Env
from memory import ReplayMemory
from test import test
import procgen
import LocalProcEnv

seed = np.random.randint(12345)
# Note that hyperparameters may originally be reported in ATARI game frames instead of agent steps
parser = argparse.ArgumentParser(description='Rainbow')
parser.add_argument('--id', type=str, default='default', help='Experiment ID')
parser.add_argument('--seed', type=int, default=seed, help='Random seed')
parser.add_argument('--disable-cuda', action='store_true', help='Disable CUDA')
parser.add_argument('--game',
                    type=str,
                    default='ms_pacman',
                    choices=list(atari_py.list_games()) +
                    procgen.env.ENV_NAMES,
                    help='ATARI game')
parser.add_argument('--T-max',
                    type=int,
                    default=int(1e5),
                    metavar='STEPS',
                    help='Number of training steps (4x number of frames)')
parser.add_argument('--max-episode-length',
                    type=int,
                    default=int(108e3),
                    metavar='LENGTH',
                    help='Max episode length in game frames (0 to disable)')
parser.add_argument('--history-length',
                    type=int,
                    default=4,
def parse_arguments():

    parser = argparse.ArgumentParser(description='Rainbow')
    parser.add_argument('--id',
                        type=str,
                        default='default',
                        help='Experiment ID')
    parser.add_argument('--seed', type=int, default=123, help='Random seed')
    parser.add_argument('--disable-cuda',
                        action='store_true',
                        help='Disable CUDA')
    parser.add_argument('--game',
                        type=str,
                        default='space_invaders',
                        choices=atari_py.list_games(),
                        help='ATARI game')
    parser.add_argument('--T-max',
                        type=int,
                        default=int(50e6),
                        metavar='STEPS',
                        help='Number of training steps (4x number of frames)')
    parser.add_argument(
        '--max-episode-length',
        type=int,
        default=int(108e3),
        metavar='LENGTH',
        help='Max episode length in game frames (0 to disable)')
    parser.add_argument('--history-length',
                        type=int,
                        default=4,
                        metavar='T',
                        help='Number of consecutive states processed')
    parser.add_argument('--hidden-size',
                        type=int,
                        default=512,
                        metavar='SIZE',
                        help='Network hidden size')
    parser.add_argument(
        '--noisy-std',
        type=float,
        default=0.1,
        metavar='σ',
        help='Initial standard deviation of noisy linear layers')
    parser.add_argument('--atoms',
                        type=int,
                        default=51,
                        metavar='C',
                        help='Discretised size of value distribution')
    parser.add_argument('--V-min',
                        type=float,
                        default=-10,
                        metavar='V',
                        help='Minimum of value distribution support')
    parser.add_argument('--V-max',
                        type=float,
                        default=10,
                        metavar='V',
                        help='Maximum of value distribution support')
    parser.add_argument('--model',
                        type=str,
                        metavar='PARAMS',
                        help='Pretrained model (state dict)')
    parser.add_argument('--memory-capacity',
                        type=int,
                        default=int(1e6),
                        metavar='CAPACITY',
                        help='Experience replay memory capacity')
    parser.add_argument('--replay-frequency',
                        type=int,
                        default=4,
                        metavar='k',
                        help='Frequency of sampling from memory')
    parser.add_argument(
        '--priority-exponent',
        type=float,
        default=0.5,
        metavar='ω',
        help='Prioritised experience replay exponent (originally denoted α)')
    parser.add_argument(
        '--priority-weight',
        type=float,
        default=0.4,
        metavar='β',
        help='Initial prioritised experience replay importance sampling weight'
    )
    parser.add_argument('--multi-step',
                        type=int,
                        default=3,
                        metavar='n',
                        help='Number of steps for multi-step return')
    parser.add_argument('--discount',
                        type=float,
                        default=0.99,
                        metavar='γ',
                        help='Discount factor')
    parser.add_argument(
        '--target-update',
        type=int,
        default=int(8e3),
        metavar='τ',
        help='Number of steps after which to update target network')
    parser.add_argument('--reward-clip',
                        type=int,
                        default=1,
                        metavar='VALUE',
                        help='Reward clipping (0 to disable)')
    parser.add_argument('--learning-rate',
                        type=float,
                        default=0.0000625,
                        metavar='η',
                        help='Learning rate')
    parser.add_argument('--adam-eps',
                        type=float,
                        default=1.5e-4,
                        metavar='ε',
                        help='Adam epsilon')
    parser.add_argument('--batch-size',
                        type=int,
                        default=32,
                        metavar='SIZE',
                        help='Batch size')
    parser.add_argument('--norm-clip',
                        type=float,
                        default=10,
                        metavar='NORM',
                        help='Max L2 norm for gradient clipping')
    parser.add_argument('--learn-start',
                        type=int,
                        default=int(20e3),
                        metavar='STEPS',
                        help='Number of steps before starting training')
    parser.add_argument('--evaluate',
                        action='store_true',
                        help='Evaluate only')
    parser.add_argument('--evaluation-interval',
                        type=int,
                        default=100000,
                        metavar='STEPS',
                        help='Number of training steps between evaluations')
    parser.add_argument('--evaluation-episodes',
                        type=int,
                        default=10,
                        metavar='N',
                        help='Number of evaluation episodes to average over')
    parser.add_argument('--evaluation-size',
                        type=int,
                        default=500,
                        metavar='N',
                        help='Number of transitions to use for validating Q')
    parser.add_argument('--render',
                        action='store_true',
                        help='Display screen (testing only)')
    parser.add_argument('--enable-cudnn',
                        action='store_true',
                        help='Enable cuDNN (faster but nondeterministic)')
    parser.add_argument(
        '--checkpoint-interval',
        default=0,
        help=
        'How often to checkpoint the model, defaults to 0 (never checkpoint)')
    parser.add_argument('--memory', help='Path to save/load the memory from')
    parser.add_argument(
        '--disable-bzip-memory',
        action='store_true',
        help=
        'Don\'t zip the memory file. Not recommended (zipping is a bit slower and much, much smaller)'
    )
    parser.add_argument('--tensorboard-dir',
                        type=str,
                        default=None,
                        help='tensorboard directory')
    parser.add_argument('--architecture',
                        type=str,
                        default='canonical',
                        choices=[
                            'canonical', 'depth_1', 'depth_2', 'depth_3',
                            'depth_4', 'width_1', 'width_2', 'width_3',
                            'width_4', 'data-efficient'
                        ],
                        metavar='ARCH',
                        help='Network architecture')

    args = parser.parse_args()

    return args
def get_games_list():
    return atari_py.list_games()
Beispiel #21
0
import itertools
import atari_py as atari
from procgen.env import ENV_NAMES

atari_envs = set(atari.list_games())
procgen_envs = set(ENV_NAMES)

suite_env = dict(
    atari=atari_envs,
    procgen=procgen_envs,
)

env2suite = dict(
    list(itertools.product(atari_envs, ['atari'])) +
    list(itertools.product(procgen_envs, ['procgen'])))


def is_atari(name):
    return env2suite.get(name) == 'atari'


def is_procgen(name):
    return env2suite.get(name) == 'procgen'


if __name__ == '__main__':
    for v in suite_env['procgen']:
        print(v)
Beispiel #22
0
def parse_args():
    parser = argparse.ArgumentParser(description="Rainbow")
    parser.add_argument("--seed", type=int, default=123, help="Random seed")
    parser.add_argument("--disable-cuda",
                        action="store_true",
                        help="Disable CUDA")
    parser.add_argument(
        "--game",
        type=str,
        default="space_invaders",
        choices=atari_py.list_games(),
        help="ATARI game",
    )
    parser.add_argument(
        "--max-timesteps",
        type=int,
        default=int(50e6),
        metavar="STEPS",
        help="Number of training steps (4x number of frames)",
    )
    parser.add_argument(
        "--max-episode-length",
        type=int,
        default=int(108e3),
        metavar="LENGTH",
        help="Max episode length in game frames (0 to disable)",
    )
    parser.add_argument(
        "--history-length",
        type=int,
        default=4,
        metavar="T",
        help="Number of consecutive states processed",
    )
    parser.add_argument(
        "--architecture",
        type=str,
        default="canonical",
        choices=["canonical", "data-efficient"],
        metavar="ARCH",
        help="Network architecture",
    )
    parser.add_argument(
        "--hidden-size",
        type=int,
        default=512,
        metavar="SIZE",
        help="Network hidden size",
    )
    parser.add_argument(
        "--noisy-std",
        type=float,
        default=0.1,
        metavar="σ",
        help="Initial standard deviation of noisy linear layers",
    )
    parser.add_argument(
        "--atoms",
        type=int,
        default=51,
        metavar="C",
        help="Discretised size of value distribution",
    )
    parser.add_argument(
        "--V-min",
        type=float,
        default=-10,
        metavar="V",
        help="Minimum of value distribution support",
    )
    parser.add_argument(
        "--V-max",
        type=float,
        default=10,
        metavar="V",
        help="Maximum of value distribution support",
    )
    parser.add_argument("--model",
                        type=str,
                        metavar="PARAMS",
                        help="Pretrained model (state dict)")
    parser.add_argument(
        "--memory-capacity",
        type=int,
        default=int(1e6),
        metavar="CAPACITY",
        help="Experience replay memory capacity",
    )
    parser.add_argument(
        "--replay-frequency",
        type=int,
        default=4,
        metavar="k",
        help="Frequency of sampling from memory",
    )
    parser.add_argument(
        "--priority-exponent",
        type=float,
        default=0.5,
        metavar="ω",
        help="Prioritised experience replay exponent (originally denoted α)",
    )
    parser.add_argument(
        "--priority-weight",
        type=float,
        default=0.4,
        metavar="β",
        help="Initial prioritised experience replay importance sampling weight",
    )
    parser.add_argument(
        "--multi-step",
        type=int,
        default=3,
        metavar="n",
        help="Number of steps for multi-step return",
    )
    parser.add_argument("--discount",
                        type=float,
                        default=0.99,
                        metavar="γ",
                        help="Discount factor")
    parser.add_argument(
        "--target-update",
        type=int,
        default=int(8e3),
        metavar="τ",
        help="Number of steps after which to update target network",
    )
    parser.add_argument(
        "--reward-clip",
        type=int,
        default=1,
        metavar="VALUE",
        help="Reward clipping (0 to disable)",
    )
    parser.add_argument("--lr",
                        type=float,
                        default=0.0000625,
                        metavar="η",
                        help="Learning rate")
    parser.add_argument("--adam-eps",
                        type=float,
                        default=1.5e-4,
                        metavar="ε",
                        help="Adam epsilon")
    parser.add_argument("--batch-size",
                        type=int,
                        default=32,
                        metavar="SIZE",
                        help="Batch size")
    parser.add_argument(
        "--learn-start",
        type=int,
        default=int(20e3),
        metavar="STEPS",
        help="Number of steps before starting training",
    )
    parser.add_argument("--evaluate",
                        action="store_true",
                        help="Evaluate only")
    parser.add_argument(
        "--evaluation-interval",
        type=int,
        default=100000,
        metavar="STEPS",
        help="Number of training steps between evaluations",
    )
    parser.add_argument(
        "--evaluation-episodes",
        type=int,
        default=10,
        metavar="N",
        help="Number of evaluation episodes to average over",
    )
    parser.add_argument(
        "--evaluation-size",
        type=int,
        default=500,
        metavar="N",
        help="Number of transitions to use for validating Q",
    )
    parser.add_argument("--render",
                        action="store_true",
                        help="Display screen (testing only)")
    parser.add_argument(
        "--enable-cudnn",
        action="store_true",
        help="Enable cuDNN (faster but nondeterministic)",
    )
    return parser.parse_args()
import numpy as np
import torch
from tqdm import trange

from agent import Agent
from env import Env
from memory import ReplayMemory
from test import test


# Note that hyperparameters may originally be reported in ATARI game frames instead of agent steps
parser = argparse.ArgumentParser(description='Rainbow')
parser.add_argument('--id', type=str, default='default', help='Experiment ID')
parser.add_argument('--seed', type=int, default=123, help='Random seed')
parser.add_argument('--disable-cuda', action='store_true', help='Disable CUDA')
parser.add_argument('--game', type=str, default='space_invaders', choices=atari_py.list_games(), help='ATARI game')
parser.add_argument('--T-max', type=int, default=int(50e6), metavar='STEPS', help='Number of training steps (4x number of frames)')
parser.add_argument('--max-episode-length', type=int, default=int(108e3), metavar='LENGTH', help='Max episode length in game frames (0 to disable)')
parser.add_argument('--history-length', type=int, default=4, metavar='T', help='Number of consecutive states processed')
parser.add_argument('--architecture', type=str, default='canonical', choices=['canonical', 'data-efficient'], metavar='ARCH', help='Network architecture')
parser.add_argument('--hidden-size', type=int, default=512, metavar='SIZE', help='Network hidden size')
parser.add_argument('--noisy-std', type=float, default=0.1, metavar='σ', help='Initial standard deviation of noisy linear layers')
parser.add_argument('--atoms', type=int, default=51, metavar='C', help='Discretised size of value distribution')
parser.add_argument('--V-min', type=float, default=-10, metavar='V', help='Minimum of value distribution support')
parser.add_argument('--V-max', type=float, default=10, metavar='V', help='Maximum of value distribution support')
parser.add_argument('--model', type=str, metavar='PARAMS', help='Pretrained model (state dict)')
parser.add_argument('--memory-capacity', type=int, default=int(1e6), metavar='CAPACITY', help='Experience replay memory capacity')
parser.add_argument('--replay-frequency', type=int, default=4, metavar='k', help='Frequency of sampling from memory')
parser.add_argument('--priority-exponent', type=float, default=0.5, metavar='ω', help='Prioritised experience replay exponent (originally denoted α)')
parser.add_argument('--priority-weight', type=float, default=0.4, metavar='β', help='Initial prioritised experience replay importance sampling weight')
parser.add_argument('--multi-step', type=int, default=3, metavar='n', help='Number of steps for multi-step return')
Beispiel #24
0
import atari_py

print(atari_py.list_games())
print(len(atari_py.list_games()))
Beispiel #25
0
from test import ensemble_test
from sklearn.decomposition import PCA
from scipy.special import softmax

# Note that hyperparameters may originally be reported in ATARI game frames instead of agent steps
parser = argparse.ArgumentParser(description='Rainbow')
parser.add_argument('--id',
                    type=str,
                    default='boot_rainbow',
                    help='Experiment ID')
parser.add_argument('--seed', type=int, default=123, help='Random seed')
parser.add_argument('--disable-cuda', action='store_true', help='Disable CUDA')
parser.add_argument('--game',
                    type=str,
                    default='space_invaders',
                    choices=atari_py.list_games(),
                    help='ATARI game')
parser.add_argument('--T-max',
                    type=int,
                    default=int(50e6),
                    metavar='STEPS',
                    help='Number of training steps (4x number of frames)')
parser.add_argument('--max-episode-length',
                    type=int,
                    default=int(108e3),
                    metavar='LENGTH',
                    help='Max episode length in game frames (0 to disable)')
parser.add_argument('--history-length',
                    type=int,
                    default=4,
                    metavar='T',
Beispiel #26
0
import atari_py
""" Available Games """
available_games = list((''.join(x.capitalize() or '_' for x in word.split('_'))
                        for word in atari_py.list_games()))

SELECTED_GAME = "Breakout"
SELECTED_MODEL = "ddqn"
SELECTED_MODE = "training"
RENDER_GAME = False
STEP_LIMIT = 5000000
RUN_LIMIT = None
CLIP_REWARD = True
CHANNEL_FIRST = True
""" Input Options """
FRAMES_IN_OBSERVATION = 4
FRAME_SIZE = 84
INPUT_SHAPE = (FRAMES_IN_OBSERVATION, FRAME_SIZE, FRAME_SIZE)
""" Agent Options """
GAMMA = 0.99
MEMORY_SIZE = 900000
BATCH_SIZE = 32
TRAINING_FREQUENCY = 4
TARGET_NETWORK_UPDATE_FREQUENCY = 40000
MODEL_PERSISTENCE_UPDATE_FREQUENCY = 10000
REPLAY_START_SIZE = 50000

EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.1
EXPLORATION_TEST = 0.02
EXPLORATION_STEPS = 850000
EXPLORATION_DECAY = (EXPLORATION_MAX - EXPLORATION_MIN) / EXPLORATION_STEPS