def get_last_atari_game_versions(): atari_games = [] for game in list_games(): versions = [game_id for game_id in list(registry.env_specs.keys()) if game.replace('_','') + 'noframeskip-v' in game_id.lower()] if len(versions)>0: atari_games.append(str(np.sort(versions)[-1])) return atari_games
def __init__(self, env_name): game_path = gym.make(env_name).env.game_path if not os.path.exists(game_path): raise IOError('Requested environment (%s) does not exist ' 'in valid list of environments:\n%s' \ % (env_name, ', '.join(sorted(atari_py.list_games())))) super(Rom, self).__init__(game_path)
def _args(self): parser = argparse.ArgumentParser() available_games = list((''.join(x.capitalize() or '_' for x in word.split('_')) for word in atari_py.list_games())) parser.add_argument("-g", "--game", help="Choose from available games: " + str(available_games) + ". Default is 'breakout'.", default="Breakout") parser.add_argument( "-m", "--mode", help= "Choose from available modes: ddqn_train, ddqn_test, ge_train, ge_test. Default is 'ddqn_training'.", default="ddqn_training") parser.add_argument( "-r", "--render", help="Choose if the game should be rendered. Default is 'False'.", default=False, type=bool) parser.add_argument( "-tsl", "--total_step_limit", help= "Choose how many total steps (frames visible by agent) should be performed. Default is '5000000'.", default=5000000, type=int) parser.add_argument( "-trl", "--total_run_limit", help= "Choose after how many runs we should stop. Default is None (no limit).", default=None, type=int) parser.add_argument( "-c", "--clip", help= "Choose whether we should clip rewards to (0, 1) range. Default is 'True'", default=True, type=bool) args = parser.parse_args() game_mode = args.mode game_name = args.game render = args.render total_step_limit = args.total_step_limit total_run_limit = args.total_run_limit clip = args.clip print("Selected game: " + str(game_name)) print("Selected mode: " + str(game_mode)) print("Should render: " + str(render)) print("Should clip: " + str(clip)) print("Total step limit: " + str(total_step_limit)) print("Total run limit: " + str(total_run_limit)) return game_name, game_mode, render, total_step_limit, total_run_limit, clip
def create_env(env_name, algo, n_envs): env = gym.make(env_name) # Wrap atari-environment with the deepmind-style wrapper atari_env_list = atari_py.list_games() for atari_env in atari_env_list: if atari_env in env_name.lower(): env = wrap_deepmind(env) env = TransposeFrame(env) break # Parallelize the environment env = ParallelEnv(n_envs, env) return env
def is_atari(environment): for field in [ 'ramDeterministic', 'ramNoFrameSkip', 'NoFrameskip', 'Deterministic', 'ram' ]: environment = environment.replace(field, '') environment = re.sub(r'-v\d+', '', environment) environment = camel_to_snake_case(environment) if environment in ap.list_games(): return True else: return False
def start(self): import gym # lazy-loading gym.logger.set_level(40) # avoid annoying internal warn messages self._env = gym.make(self.id) try: import atari_py except ImportError: return if any([env_name in self.id.lower() for env_name in atari_py.list_games()]): self._env = AtariPreprocessing(self._env.env)
def atari0(): ''' Getter for a list of all the -v0 atari games provided in openAI gym Returns: The list of -v0 atari games ''' import atari_py as ap games = [] for g in ap.list_games(): gg = g.split("_") gg = [g.capitalize() for g in gg] games.append(''.join(gg) + "-v0") return games
def get_gym_submodules_and_environments(): atari_games = atari_py.list_games() print('Searching gym.envs for submodules:') environments = gym.envs.registry.all() for importer, modname, ispkg in pkgutil.iter_modules(gym.envs.__path__): print(' Found submodule {} (Package: {})'.format(modname, ispkg)) try: m = importer.find_module(modname).load_module(modname) except gym.error.DependencyNotInstalled: pass if ispkg: for importer, modname, ispkg in pkgutil.iter_modules(getattr(gym.envs, modname).__path__): print(' Found environment {}'.format(modname))
def start(self): import gym # lazy-loading gym.logger.set_level(40) # avoid annoying internal warn messages self._env = gym.make(self.id) try: import atari_py from reaver.envs.atari import AtariPreprocessing except ImportError: return if any([env_name in self.id.lower() for env_name in atari_py.list_games()]): self._env = AtariPreprocessing(self._env.env) self.make_specs(running=True)
def get_games(): not_in = [ 'adventure', 'air_raid', 'carnival', 'elevator_action', 'journey_escape', 'kaboom', 'pooyan', ] games = atari_py.list_games() games = list(set(games) - set(not_in)) games.sort() games = [upper_1st(game) + "-v0" for game in games] return games
import numpy as np from tqdm import trange from curl_rainbow.env import Env from curl_rainbow.test import test from curl_rainbow.agent import Agent from curl_rainbow.memory import ReplayMemory seed = np.random.randint(12345) # Note that hyperparameters may originally be reported in ATARI game frames instead of agent steps parser = argparse.ArgumentParser(description='Rainbow') parser.add_argument('--id', type=str, default='default', help='Experiment ID') parser.add_argument('--seed', type=int, default=seed, help='Random seed') parser.add_argument('--disable-cuda', action='store_true', help='Disable CUDA') parser.add_argument('--game', type=str, default='ms_pacman', choices=atari_py.list_games(), help='ATARI game') parser.add_argument('--T-max', type=int, default=int(1e5), metavar='STEPS', help='Number of training steps (4x number of frames)') parser.add_argument('--max-episode-length', type=int, default=int(108e3), metavar='LENGTH', help='Max episode length in game frames (0 to disable)') parser.add_argument('--history-length', type=int, default=4, metavar='T', help='Number of consecutive states processed') parser.add_argument('--architecture', type=str, default='data-efficient', choices=['canonical', 'data-efficient'], metavar='ARCH', help='Network architecture') parser.add_argument('--hidden-size', type=int, default=256, metavar='SIZE', help='Network hidden size') parser.add_argument('--noisy-std', type=float, default=0.1, metavar='σ', help='Initial standard deviation of noisy linear layers') parser.add_argument('--atoms', type=int, default=51, metavar='C', help='Discretised size of value distribution') parser.add_argument('--V-min', type=float, default=-10, metavar='V', help='Minimum of value distribution support') parser.add_argument('--V-max', type=float, default=10, metavar='V', help='Maximum of value distribution support') parser.add_argument('--model', type=str, metavar='PARAMS', help='Pretrained model (state dict)') parser.add_argument('--memory-capacity', type=int, default=int(1e5), metavar='CAPACITY', help='Experience replay memory capacity') parser.add_argument('--replay-frequency', type=int, default=1, metavar='k', help='Frequency of sampling from memory') parser.add_argument('--priority-exponent', type=float, default=0.5, metavar='ω', help='Prioritised experience replay exponent (originally denoted α)') parser.add_argument('--priority-weight', type=float, default=0.4, metavar='β', help='Initial prioritised experience replay importance sampling weight') parser.add_argument('--multi-step', type=int, default=20, metavar='n', help='Number of steps for multi-step return')
def get_args(): parser = argparse.ArgumentParser() parser.add_argument( '--total-env-steps', type=int, default=10000000, help='Total number to env steps to train (default: 100000)') parser.add_argument('--num-envs', type=int, default=64, help='Number of parallel envs to run') parser.add_argument('--num-trainers', type=int, default=1, help='Number of training workers') parser.add_argument('--num-reanalyze-envs', type=int, default=64, help='Number of parallel envs to run') parser.add_argument('--num-reanalyze-workers', type=int, default=1, help='Number of parallel envs to run') parser.add_argument('--sync-envs', action='store_true') parser.add_argument('--debug-reanalyze', action='store_true') parser.add_argument('--fp16', action='store_true') parser.add_argument('--buffer-size', type=int, default=500000) parser.add_argument('--target-update-interval', type=int, default=100, help="Number of gradient steps for each update to the " "target network. <=0 to disable target network.") parser.add_argument('--no-gpu-0-train', action='store_true') parser.add_argument('--input-priorities', action='store_true') parser.add_argument('--q-dirichlet', action='store_true') parser.add_argument( '--cpu-search', action='store_true', help="Put everything except MCTS inference calls on CPU") parser.add_argument('--epoch-steps', type=int, default=50, help="Number of gradient steps between loggings.") parser.add_argument( '--replay-ratio', type=float, default=-2., help="Upper bound of async replay ratio. -1 disables.") parser.add_argument('--seed', type=int, default=42, help='Random seed to use') parser.add_argument('--game', type=str, default='space_invaders', choices=atari_py.list_games(), help='ATARI game') parser.add_argument( '--framestack', type=int, default=4, metavar='T', help='Number of consecutive frames stacked to form an observation') parser.add_argument('--grayscale', action='store_true') parser.add_argument( '--max-episode-length', type=int, default=int(108e3), metavar='LENGTH', help='Max episode length in game frames (0 to disable)') parser.add_argument('--discount', type=float, default=0.99) parser.add_argument( '--evaluation-episodes', type=int, default=8, help='Number of episodes to average over when evaluating') # MCTS arguments parser.add_argument('--num-simulations', type=int, default=10) parser.add_argument('--eval-simulations', type=int, default=25) parser.add_argument('--virtual-threads', type=int, default=3) parser.add_argument('--virtual-loss-c', type=int, default=1.) parser.add_argument('--c1', type=float, default=1.25, help='UCB c1 constant') parser.add_argument('--dirichlet-alpha', type=float, default=0.25, help='Root dirichlet alpha') parser.add_argument( '--visit-temp', type=float, default=0.5, help='Visit counts softmax temperature for sampling actions') # PiZero arguments parser.add_argument('--batch-size-per-worker', type=int, default=128, help='Batch size per GPU to use during training') parser.add_argument('--learning-rate', type=float, default=0.0003, metavar='η', help='Learning rate') parser.add_argument('--optim', type=str, default='adam', choices=["adam", "sgd"], help='Optimizer') parser.add_argument('--lr-decay-steps', type=float, default=350.e3, help='Learning rate decay time constant') parser.add_argument('--lr-decay', type=float, default=0.1, help='Learning rate decay scale') parser.add_argument('--momentum', type=float, default=0.9, help='SGD momentum') parser.add_argument('--grad-scale-factor', type=float, default=0.5, help='Rollout gradient downscaling. 1 disables') parser.add_argument('--adam-eps', type=float, default=1e-4, help='Adam epsilon') parser.add_argument('--weight-decay', type=float, default=1e-4, help='Weight decay regularization constant') parser.add_argument('--hidden-size', type=int, default=256, help='Hidden size of various MLPs') parser.add_argument('--dynamics-blocks', type=int, default=16, help='# of resblocks in dynamics model') parser.add_argument('--multistep', type=int, default=1, help='n-step for bootstrapping value targets') parser.add_argument('--training-start', type=int, default=100000, help='env steps to wait before starting training') parser.add_argument( '--priority-exponent', type=float, default=1., metavar='ω', help='Prioritised experience replay exponent (originally denoted α)') parser.add_argument( '--priority-weight', type=float, default=1., metavar='β', help='Initial prioritised experience replay importance sampling weight' ) parser.add_argument('--jumps', type=int, default=5, help='') parser.add_argument('--value-loss-weight', type=float, default=1.) parser.add_argument('--policy-loss-weight', type=float, default=1.) parser.add_argument('--reward-loss-weight', type=float, default=1.) parser.add_argument('--contrastive-loss-weight', type=float, default=1.) parser.add_argument('--entropy-loss-weight', type=float, default=0.0) parser.add_argument('--init-value-scale', type=float, default=1.) parser.add_argument('--film', action='store_true') parser.add_argument('--profile', action='store_true') parser.add_argument('--no-nce', action='store_true') parser.add_argument('--reanalyze', action='store_true') parser.add_argument('--use-all-targets', action='store_true') parser.add_argument('--q-learning', action='store_true') parser.add_argument('--local-target-net', action='store_true') parser.add_argument('--epsilon', type=int, default=0.01) parser.add_argument('--no-search-value-targets', action='store_true') parser.add_argument('--prioritized', action='store_true') parser.add_argument( '--evaluation-interval', type=int, default=80000, help='Evaluate after every {evaluation-interval} env steps') parser.add_argument( '--log-interval', type=int, default=3200, help='Evaluate after every {evaluation-interval} env steps') parser.add_argument('--wandb-proj', type=str, default='pizero') parser.add_argument('--name', type=str, default='') parser.add_argument('--savedir', type=str, default='./out') args = parser.parse_args() args.max_episode_length = int(108e3) args.batch_size = args.batch_size_per_worker * args.num_trainers return args
import sys for p in sys.path: print(p) import atari_py print(atari_py.list_games())
import gym import atari_py as ap all_envs = gym.envs.registry.all() env_ids = [env_spec.id for env_spec in all_envs] env_ids = [x for x in env_ids if '-ram' not in x] env_ids = [x for x in env_ids if 'Deterministic' not in x] env_ids = [x for x in env_ids if 'NoFrameskip' in x] atari_games = ap.list_games() env_ids = [ x for x in env_ids if any( game.replace('_', '') in x.lower() for game in atari_games) ] env_ids = [x for x in env_ids if 'v4' in x] #env_p = gym.make('PitfallNoFrameskip-v4') #import pdb; pdb.set_trace() action_meanings = [(x, gym.make(x).unwrapped.get_action_meanings()) for x in env_ids] for pair in action_meanings: print(pair[0], *pair[1]) #print([game for game in atari_games if all(game.replace('_', '') not in x.lower() for x in env_ids)]) # ['kaboom', 'adventure', 'defender'] these games are missing from gym
# # Collect all atari environments from gym # In[2]: envids = [spec.id for spec in gym.envs.registry.all()] dir(gym.envs.registry) envs_pixel = [] for envid in envids: if 'ram' not in envid and '0' in envid: envs_pixel.append(envid) # In[3]: # a = [gym.make(env).action_space for env in envs_pixel] games = ap.list_games() games.sort() # In[4]: def snake_to_camel(word): return ''.join(x.capitalize() or '_' for x in word.split('_')) games = [snake_to_camel(game) + '-v0' for game in games] envs = list(map(gym.make, [game for game in games if game in envs_pixel])) # In[5]: games = sorted([(game, gym.make(game).action_space.n)
class Environment(gym.Env, utils.EzPickle): """ The Atari Environment (inherits from the openai gym environment: https://gym.openai.com/docs/) Environment( game="pong", # use Environment.available_games to see available games mode=None, # use Environment.available_modes_for(game) to see this list difficulty=None, obs_type="image", # or "ram" frameskip=(2, 5), # random number between 2 and 5 repeat_action_probability=0.0, # 0 means deterministic full_action_space=False, ) """ metadata = {"render.modes": ["human", "rgb_array"]} available_games = atari_py.list_games() @classmethod def available_modes_for(game): ale = atari_py.ALEInterface() # load up the game ale.setInt(b"random_seed", 0) ale.loadROM(atari_py.get_game_path(game)) return ale.getAvailableModes() @classmethod def _to_ram(ale): ram_size = ale.getRAMSize() ram = np.zeros((ram_size), dtype=np.uint8) ale.getRAM(ram) return ram def __init__( self, game="pong", mode=None, difficulty=None, obs_type="image", frameskip=(2, 5), repeat_action_probability=0.0, full_action_space=False, ): """ Arguments: game: the name of the game ("pong", "Enduro", etc) dont add the "-v0" mode: different modes are available for different games. frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int. """ utils.EzPickle.__init__(self, game, mode, difficulty, obs_type, frameskip, repeat_action_probability) assert obs_type in ("ram", "image") self.game = game self.game_path = atari_py.get_game_path(game) self.game_mode = mode self.game_difficulty = difficulty if not os.path.exists(self.game_path): msg = "You asked for game %s but path %s does not exist" raise IOError(msg % (game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip self.ale = atari_py.ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance( repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format( repeat_action_probability) self.ale.setFloat("repeat_action_probability".encode("utf-8"), repeat_action_probability) self.seed() self._action_set = (self.ale.getLegalActionSet() if full_action_space else self.ale.getMinimalActionSet()) self.action_space = spaces.Discrete(len(self._action_set)) (screen_width, screen_height) = self.ale.getScreenDims() if self._obs_type == "ram": self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128, )) elif self._obs_type == "image": self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8) else: raise error.Error("Unrecognized observation type: {}".format( self._obs_type)) def seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. This gets passed as a uint, but gets # checked as an int elsewhere, so we need to keep it below # 2**31. seed2 = seeding.hash_seed(seed1 + 1) % 2**31 # Empirically, we need to seed before loading the ROM. self.ale.setInt(b"random_seed", seed2) self.ale.loadROM(self.game_path) if self.game_mode is not None: modes = self.ale.getAvailableModes() assert self.game_mode in modes, ( 'Invalid game mode "{}" for game {}.\nAvailable modes are: {}' ).format(self.game_mode, self.game, modes) self.ale.setMode(self.game_mode) if self.game_difficulty is not None: difficulties = self.ale.getAvailableDifficulties() assert self.game_difficulty in difficulties, ( 'Invalid game difficulty "{}" for game {}.\nAvailable difficulties are: {}' ).format(self.game_difficulty, self.game, difficulties) self.ale.setDifficulty(self.game_difficulty) return [seed1, seed2] def step(self, a): reward = 0.0 action = self._action_set[a] if isinstance(self.frameskip, int): num_steps = self.frameskip else: num_steps = self.np_random.randint(self.frameskip[0], self.frameskip[1]) for _ in range(num_steps): reward += self.ale.act(action) ob = self._get_obs() return ob, reward, self.ale.game_over(), { "ale.lives": self.ale.lives() } def _get_image(self): return self.ale.getScreenRGB2() def _get_ram(self): return Environment._to_ram(self.ale) @property def _n_actions(self): return len(self._action_set) def _get_obs(self): if self._obs_type == "ram": return self._get_ram() elif self._obs_type == "image": img = self._get_image() return img # return: (states, observations) def reset(self): self.ale.reset_game() return self._get_obs() def render(self, mode="human"): img = self._get_image() if mode == "rgb_array": return img elif mode == "human": from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) return self.viewer.isopen def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def get_action_meanings(self): return [ACTION_MEANING[i] for i in self._action_set] def get_keys_to_action(self): KEYWORD_TO_KEY = { "UP": ord("w"), "DOWN": ord("s"), "LEFT": ord("a"), "RIGHT": ord("d"), "FIRE": ord(" "), } keys_to_action = {} for action_id, action_meaning in enumerate(self.get_action_meanings()): keys = [] for keyword, key in KEYWORD_TO_KEY.items(): if keyword in action_meaning: keys.append(key) keys = tuple(sorted(keys)) assert keys not in keys_to_action keys_to_action[keys] = action_id return keys_to_action def clone_state(self): """Clone emulator state w/o system state. Restoring this state will *not* give an identical environment. For complete cloning and restoring of the full state, see `{clone,restore}_full_state()`.""" state_ref = self.ale.cloneState() state = self.ale.encodeState(state_ref) self.ale.deleteState(state_ref) return state def restore_state(self, state): """Restore emulator state w/o system state.""" state_ref = self.ale.decodeState(state) self.ale.restoreState(state_ref) self.ale.deleteState(state_ref) def clone_full_state(self): """Clone emulator state w/ system state including pseudorandomness. Restoring this state will give an identical environment.""" state_ref = self.ale.cloneSystemState() state = self.ale.encodeState(state_ref) self.ale.deleteState(state_ref) return state def restore_full_state(self, state): """Restore emulator state w/ system state including pseudorandomness.""" state_ref = self.ale.decodeState(state) self.ale.restoreSystemState(state_ref) self.ale.deleteState(state_ref)
import numpy as np import cv2 import gym #colab install for universe #!git clone https://github.com/openai/universe.git #!cd universe #!pip install -e . #!pip install 'gym[atari]' #!pip install universe import universe # register the universe environments import atari_py as ap #for list from collections import defaultdict #for Q ie: state Q values store from collections import namedtuple #for transistions store ie: memory, buffer, ReplayMemory # list of the games game_list = ap.list_games() print(sorted(game_list)) # custom weights initialization def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: nn.init.normal_(m.weight.data, 0.0, 0.02) elif classname.find('BatchNorm') != -1: nn.init.normal_(m.weight.data, 1.0, 0.02) nn.init.constant_(m.bias.data, 0) def conv(in_channels, out_channels, kernel_size, stride=2, padding=1, batch_norm=True, init_zero_weights=False): """Creates a convolutional layer, with optional batch normalization. """ layers = []
from env import Env from memory import ReplayMemory from test import test import procgen import LocalProcEnv seed = np.random.randint(12345) # Note that hyperparameters may originally be reported in ATARI game frames instead of agent steps parser = argparse.ArgumentParser(description='Rainbow') parser.add_argument('--id', type=str, default='default', help='Experiment ID') parser.add_argument('--seed', type=int, default=seed, help='Random seed') parser.add_argument('--disable-cuda', action='store_true', help='Disable CUDA') parser.add_argument('--game', type=str, default='ms_pacman', choices=list(atari_py.list_games()) + procgen.env.ENV_NAMES, help='ATARI game') parser.add_argument('--T-max', type=int, default=int(1e5), metavar='STEPS', help='Number of training steps (4x number of frames)') parser.add_argument('--max-episode-length', type=int, default=int(108e3), metavar='LENGTH', help='Max episode length in game frames (0 to disable)') parser.add_argument('--history-length', type=int, default=4,
def parse_arguments(): parser = argparse.ArgumentParser(description='Rainbow') parser.add_argument('--id', type=str, default='default', help='Experiment ID') parser.add_argument('--seed', type=int, default=123, help='Random seed') parser.add_argument('--disable-cuda', action='store_true', help='Disable CUDA') parser.add_argument('--game', type=str, default='space_invaders', choices=atari_py.list_games(), help='ATARI game') parser.add_argument('--T-max', type=int, default=int(50e6), metavar='STEPS', help='Number of training steps (4x number of frames)') parser.add_argument( '--max-episode-length', type=int, default=int(108e3), metavar='LENGTH', help='Max episode length in game frames (0 to disable)') parser.add_argument('--history-length', type=int, default=4, metavar='T', help='Number of consecutive states processed') parser.add_argument('--hidden-size', type=int, default=512, metavar='SIZE', help='Network hidden size') parser.add_argument( '--noisy-std', type=float, default=0.1, metavar='σ', help='Initial standard deviation of noisy linear layers') parser.add_argument('--atoms', type=int, default=51, metavar='C', help='Discretised size of value distribution') parser.add_argument('--V-min', type=float, default=-10, metavar='V', help='Minimum of value distribution support') parser.add_argument('--V-max', type=float, default=10, metavar='V', help='Maximum of value distribution support') parser.add_argument('--model', type=str, metavar='PARAMS', help='Pretrained model (state dict)') parser.add_argument('--memory-capacity', type=int, default=int(1e6), metavar='CAPACITY', help='Experience replay memory capacity') parser.add_argument('--replay-frequency', type=int, default=4, metavar='k', help='Frequency of sampling from memory') parser.add_argument( '--priority-exponent', type=float, default=0.5, metavar='ω', help='Prioritised experience replay exponent (originally denoted α)') parser.add_argument( '--priority-weight', type=float, default=0.4, metavar='β', help='Initial prioritised experience replay importance sampling weight' ) parser.add_argument('--multi-step', type=int, default=3, metavar='n', help='Number of steps for multi-step return') parser.add_argument('--discount', type=float, default=0.99, metavar='γ', help='Discount factor') parser.add_argument( '--target-update', type=int, default=int(8e3), metavar='τ', help='Number of steps after which to update target network') parser.add_argument('--reward-clip', type=int, default=1, metavar='VALUE', help='Reward clipping (0 to disable)') parser.add_argument('--learning-rate', type=float, default=0.0000625, metavar='η', help='Learning rate') parser.add_argument('--adam-eps', type=float, default=1.5e-4, metavar='ε', help='Adam epsilon') parser.add_argument('--batch-size', type=int, default=32, metavar='SIZE', help='Batch size') parser.add_argument('--norm-clip', type=float, default=10, metavar='NORM', help='Max L2 norm for gradient clipping') parser.add_argument('--learn-start', type=int, default=int(20e3), metavar='STEPS', help='Number of steps before starting training') parser.add_argument('--evaluate', action='store_true', help='Evaluate only') parser.add_argument('--evaluation-interval', type=int, default=100000, metavar='STEPS', help='Number of training steps between evaluations') parser.add_argument('--evaluation-episodes', type=int, default=10, metavar='N', help='Number of evaluation episodes to average over') parser.add_argument('--evaluation-size', type=int, default=500, metavar='N', help='Number of transitions to use for validating Q') parser.add_argument('--render', action='store_true', help='Display screen (testing only)') parser.add_argument('--enable-cudnn', action='store_true', help='Enable cuDNN (faster but nondeterministic)') parser.add_argument( '--checkpoint-interval', default=0, help= 'How often to checkpoint the model, defaults to 0 (never checkpoint)') parser.add_argument('--memory', help='Path to save/load the memory from') parser.add_argument( '--disable-bzip-memory', action='store_true', help= 'Don\'t zip the memory file. Not recommended (zipping is a bit slower and much, much smaller)' ) parser.add_argument('--tensorboard-dir', type=str, default=None, help='tensorboard directory') parser.add_argument('--architecture', type=str, default='canonical', choices=[ 'canonical', 'depth_1', 'depth_2', 'depth_3', 'depth_4', 'width_1', 'width_2', 'width_3', 'width_4', 'data-efficient' ], metavar='ARCH', help='Network architecture') args = parser.parse_args() return args
def get_games_list(): return atari_py.list_games()
import itertools import atari_py as atari from procgen.env import ENV_NAMES atari_envs = set(atari.list_games()) procgen_envs = set(ENV_NAMES) suite_env = dict( atari=atari_envs, procgen=procgen_envs, ) env2suite = dict( list(itertools.product(atari_envs, ['atari'])) + list(itertools.product(procgen_envs, ['procgen']))) def is_atari(name): return env2suite.get(name) == 'atari' def is_procgen(name): return env2suite.get(name) == 'procgen' if __name__ == '__main__': for v in suite_env['procgen']: print(v)
def parse_args(): parser = argparse.ArgumentParser(description="Rainbow") parser.add_argument("--seed", type=int, default=123, help="Random seed") parser.add_argument("--disable-cuda", action="store_true", help="Disable CUDA") parser.add_argument( "--game", type=str, default="space_invaders", choices=atari_py.list_games(), help="ATARI game", ) parser.add_argument( "--max-timesteps", type=int, default=int(50e6), metavar="STEPS", help="Number of training steps (4x number of frames)", ) parser.add_argument( "--max-episode-length", type=int, default=int(108e3), metavar="LENGTH", help="Max episode length in game frames (0 to disable)", ) parser.add_argument( "--history-length", type=int, default=4, metavar="T", help="Number of consecutive states processed", ) parser.add_argument( "--architecture", type=str, default="canonical", choices=["canonical", "data-efficient"], metavar="ARCH", help="Network architecture", ) parser.add_argument( "--hidden-size", type=int, default=512, metavar="SIZE", help="Network hidden size", ) parser.add_argument( "--noisy-std", type=float, default=0.1, metavar="σ", help="Initial standard deviation of noisy linear layers", ) parser.add_argument( "--atoms", type=int, default=51, metavar="C", help="Discretised size of value distribution", ) parser.add_argument( "--V-min", type=float, default=-10, metavar="V", help="Minimum of value distribution support", ) parser.add_argument( "--V-max", type=float, default=10, metavar="V", help="Maximum of value distribution support", ) parser.add_argument("--model", type=str, metavar="PARAMS", help="Pretrained model (state dict)") parser.add_argument( "--memory-capacity", type=int, default=int(1e6), metavar="CAPACITY", help="Experience replay memory capacity", ) parser.add_argument( "--replay-frequency", type=int, default=4, metavar="k", help="Frequency of sampling from memory", ) parser.add_argument( "--priority-exponent", type=float, default=0.5, metavar="ω", help="Prioritised experience replay exponent (originally denoted α)", ) parser.add_argument( "--priority-weight", type=float, default=0.4, metavar="β", help="Initial prioritised experience replay importance sampling weight", ) parser.add_argument( "--multi-step", type=int, default=3, metavar="n", help="Number of steps for multi-step return", ) parser.add_argument("--discount", type=float, default=0.99, metavar="γ", help="Discount factor") parser.add_argument( "--target-update", type=int, default=int(8e3), metavar="τ", help="Number of steps after which to update target network", ) parser.add_argument( "--reward-clip", type=int, default=1, metavar="VALUE", help="Reward clipping (0 to disable)", ) parser.add_argument("--lr", type=float, default=0.0000625, metavar="η", help="Learning rate") parser.add_argument("--adam-eps", type=float, default=1.5e-4, metavar="ε", help="Adam epsilon") parser.add_argument("--batch-size", type=int, default=32, metavar="SIZE", help="Batch size") parser.add_argument( "--learn-start", type=int, default=int(20e3), metavar="STEPS", help="Number of steps before starting training", ) parser.add_argument("--evaluate", action="store_true", help="Evaluate only") parser.add_argument( "--evaluation-interval", type=int, default=100000, metavar="STEPS", help="Number of training steps between evaluations", ) parser.add_argument( "--evaluation-episodes", type=int, default=10, metavar="N", help="Number of evaluation episodes to average over", ) parser.add_argument( "--evaluation-size", type=int, default=500, metavar="N", help="Number of transitions to use for validating Q", ) parser.add_argument("--render", action="store_true", help="Display screen (testing only)") parser.add_argument( "--enable-cudnn", action="store_true", help="Enable cuDNN (faster but nondeterministic)", ) return parser.parse_args()
import numpy as np import torch from tqdm import trange from agent import Agent from env import Env from memory import ReplayMemory from test import test # Note that hyperparameters may originally be reported in ATARI game frames instead of agent steps parser = argparse.ArgumentParser(description='Rainbow') parser.add_argument('--id', type=str, default='default', help='Experiment ID') parser.add_argument('--seed', type=int, default=123, help='Random seed') parser.add_argument('--disable-cuda', action='store_true', help='Disable CUDA') parser.add_argument('--game', type=str, default='space_invaders', choices=atari_py.list_games(), help='ATARI game') parser.add_argument('--T-max', type=int, default=int(50e6), metavar='STEPS', help='Number of training steps (4x number of frames)') parser.add_argument('--max-episode-length', type=int, default=int(108e3), metavar='LENGTH', help='Max episode length in game frames (0 to disable)') parser.add_argument('--history-length', type=int, default=4, metavar='T', help='Number of consecutive states processed') parser.add_argument('--architecture', type=str, default='canonical', choices=['canonical', 'data-efficient'], metavar='ARCH', help='Network architecture') parser.add_argument('--hidden-size', type=int, default=512, metavar='SIZE', help='Network hidden size') parser.add_argument('--noisy-std', type=float, default=0.1, metavar='σ', help='Initial standard deviation of noisy linear layers') parser.add_argument('--atoms', type=int, default=51, metavar='C', help='Discretised size of value distribution') parser.add_argument('--V-min', type=float, default=-10, metavar='V', help='Minimum of value distribution support') parser.add_argument('--V-max', type=float, default=10, metavar='V', help='Maximum of value distribution support') parser.add_argument('--model', type=str, metavar='PARAMS', help='Pretrained model (state dict)') parser.add_argument('--memory-capacity', type=int, default=int(1e6), metavar='CAPACITY', help='Experience replay memory capacity') parser.add_argument('--replay-frequency', type=int, default=4, metavar='k', help='Frequency of sampling from memory') parser.add_argument('--priority-exponent', type=float, default=0.5, metavar='ω', help='Prioritised experience replay exponent (originally denoted α)') parser.add_argument('--priority-weight', type=float, default=0.4, metavar='β', help='Initial prioritised experience replay importance sampling weight') parser.add_argument('--multi-step', type=int, default=3, metavar='n', help='Number of steps for multi-step return')
import atari_py print(atari_py.list_games()) print(len(atari_py.list_games()))
from test import ensemble_test from sklearn.decomposition import PCA from scipy.special import softmax # Note that hyperparameters may originally be reported in ATARI game frames instead of agent steps parser = argparse.ArgumentParser(description='Rainbow') parser.add_argument('--id', type=str, default='boot_rainbow', help='Experiment ID') parser.add_argument('--seed', type=int, default=123, help='Random seed') parser.add_argument('--disable-cuda', action='store_true', help='Disable CUDA') parser.add_argument('--game', type=str, default='space_invaders', choices=atari_py.list_games(), help='ATARI game') parser.add_argument('--T-max', type=int, default=int(50e6), metavar='STEPS', help='Number of training steps (4x number of frames)') parser.add_argument('--max-episode-length', type=int, default=int(108e3), metavar='LENGTH', help='Max episode length in game frames (0 to disable)') parser.add_argument('--history-length', type=int, default=4, metavar='T',
import atari_py """ Available Games """ available_games = list((''.join(x.capitalize() or '_' for x in word.split('_')) for word in atari_py.list_games())) SELECTED_GAME = "Breakout" SELECTED_MODEL = "ddqn" SELECTED_MODE = "training" RENDER_GAME = False STEP_LIMIT = 5000000 RUN_LIMIT = None CLIP_REWARD = True CHANNEL_FIRST = True """ Input Options """ FRAMES_IN_OBSERVATION = 4 FRAME_SIZE = 84 INPUT_SHAPE = (FRAMES_IN_OBSERVATION, FRAME_SIZE, FRAME_SIZE) """ Agent Options """ GAMMA = 0.99 MEMORY_SIZE = 900000 BATCH_SIZE = 32 TRAINING_FREQUENCY = 4 TARGET_NETWORK_UPDATE_FREQUENCY = 40000 MODEL_PERSISTENCE_UPDATE_FREQUENCY = 10000 REPLAY_START_SIZE = 50000 EXPLORATION_MAX = 1.0 EXPLORATION_MIN = 0.1 EXPLORATION_TEST = 0.02 EXPLORATION_STEPS = 850000 EXPLORATION_DECAY = (EXPLORATION_MAX - EXPLORATION_MIN) / EXPLORATION_STEPS