def __init__(self, rom_file, random_seed, frame_skip, repeat_action_probability, minimum_actions, use_sdl, test_mode, image_processing=None): ALEInterface.__init__(self) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. if use_sdl: if sys.platform == 'darwin': import pygame pygame.init() self.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.setBool('sound', True) self.setBool('display_screen', True) self.setFloat('repeat_action_probability', repeat_action_probability) self.setInt('frame_skip', frame_skip) self.random_seed = random_seed self.frame_skip = frame_skip # trolololo self.minimum_actions = minimum_actions self.test_mode = test_mode self.image_processing = image_processing self.num_actions = 0 self.legal_actions = [] self.queue = deque() self.height = -1 self.width = -1 self.loadROM(rom_file) height, width = self.getScreenDims() logging.info('Screen resolution is %dx%d' % (height, width))
class Emulator(object): FRAME_SKIP = 4 SCREEN_WIDTH = 84 SCREEN_HEIGHT = 84 def __init__(self, rom): self.ale = ALEInterface() self.max_num_frames_per_episode = 100000 #self.ale.getInt('max_num_frames_per_episode') self.ale.setInt('frame_skip', self.FRAME_SKIP) self.ale.loadROM('roms/' + rom) self.actions = self.ale.getMinimalActionSet() def reset(self): self.ale.reset_game() def image(self): screen = self.ale.getScreenGrayscale() screen = cv2.resize(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH)) return np.reshape(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH)) def act(self, action): return self.ale.act(action) def terminal(self): return self.ale.game_over()
def __init__(self, rom_file, viz=0, height_range=(None,None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt("random_seed", self.rng.randint(0, 10000)) self.ale.setBool("showinfo", False) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() self.ale.setInt("frame_skip", 1) self.ale.setBool('color_averaging', False) # manual.pdf suggests otherwise. may need to check self.ale.setFloat('repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString('record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode()
def map_game_to_ALE(game_name, interactive): game_path = '/cvgl/u/nishith/MultiTaskRL/libs/DQN_ale/roms/' \ + game_name + '.bin' print game_path game = ALEInterface() if interactive: setup_display(game) game.loadROM(game_path) return game
def act_with_frame_skip(self, a): # trolololo reward = 0 game_over = False lives = ALEInterface.lives(self) for _ in xrange(self.frame_skip): reward += ALEInterface.act(self, self.legal_actions[a]) if ALEInterface.game_over(self) or (not self.test_mode and ALEInterface.lives(self) < lives): game_over = True return reward, game_over
def peekActionSize(rom): if args.use_gym: import gym env = gym.make(args.gym_env) return env.action_space.n else: from ale_python_interface import ALEInterface ale = ALEInterface() ale.loadROM(rom.encode('ascii')) return len(ale.getMinimalActionSet())
def loadROM(self, rom_file): ALEInterface.loadROM(self, rom_file) if self.minimum_actions: self.legal_actions = self.getMinimalActionSet() else: self.legal_actions = self.getLegalActionSet() self.num_actions = len(self.legal_actions) self.setInt('frame_skip', self.frame_skip) if self.random_seed is not None: self.setInt('random_seed', self.random_seed) self.height, self.width = self.getScreenDims()
class AtariMDP(MDP, Serializable): def __init__(self, rom_path, obs_type=OBS_RAM, frame_skip=4): Serializable.__init__(self, rom_path, obs_type, frame_skip) self.options = (rom_path, obs_type, frame_skip) self.ale = ALEInterface() self.ale.loadROM(rom_path) self._rom_path = rom_path self._obs_type = obs_type self._action_set = self.ale.getMinimalActionSet() self.frame_skip = frame_skip def get_image(self): return to_rgb(self.ale) def get_ram(self): return to_ram(self.ale) def game_over(self): return self.ale.game_over() def reset_game(self): return self.ale.reset_game() @property def n_actions(self): return len(self.action_set) def get_obs(self): if self._obs_type == OBS_RAM: return self.get_ram()[None,:] else: assert self._obs_type == OBS_IMAGE return self.get_image()[None,:,:,:] def step(self, a): reward = 0.0 action = self.action_set[a] for _ in xrange(self.frame_skip): reward += self.ale.act(action) ob = self.get_obs().reshape(1,-1) return ob, np.array([reward]), self.ale.game_over() # return: (states, observations) def reset(self): self.ale.reset_game() return self.get_obs() @property def action_set(self): return self._action_set def plot(self): import cv2 cv2.imshow("atarigame",self.get_image()) #pylint: disable=E1101 cv2.waitKey(10) #pylint: disable=E1101
def init(): pygame.init() rom_path = '/Users/maciej/Development/atari-roms' ale = ALEInterface() ale.setInt('random_seed', 123) ale.setBool('frame_skip', 1) ale.loadROM(rom_path + '/space_invaders.bin') ale.setFloat("repeat_action_probability", 0) return ale
def __init__(self, game, args): self.game = game self.ale = ALEInterface() # if sys.platform == 'darwin': # self.ale.setBool('sound', False) # Sound doesn't work on OSX # elif sys.platform.startswith('linux'): # self.ale.setBool('sound', True) # self.ale.setBool('display_screen', True) # self.ale.setBool('display_screen', args.display_screen) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) self.ale.setInt('random_seed', args.random_seed) # # if rand_seed is not None: # self.ale.setInt('random_seed', rand_seed) rom_file = "./roms/%s.bin" % game if not os.path.exists(rom_file): print "not found rom file:", rom_file sys.exit(-1) self.ale.loadROM(rom_file) self.actions = self.ale.getMinimalActionSet()
def __init__(self, rom_name, rng, display_screen = False): self.api = ALEInterface() self.api.setInt('random_seed', rng.randint(333)) self.api.setBool('display_screen', display_screen) self.api.setFloat('repeat_action_probability', 0.0) self.rom_name = rom_name self.display_screen = display_screen self.rng = rng self.repeat = Environment.FRAMES_SKIP self.buffer_len = Environment.BUFFER_LEN self.height = Environment.FRAME_HEIGHT self.width = Environment.FRAME_WIDTH self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP self.merge_id = 0 self.max_reward = Environment.MAX_REWARD self.eval_eps = Environment.EVAL_EPS self.log_dir = '' self.network_dir = '' self.api.loadROM('../rom/' + self.rom_name) self.minimal_actions = self.api.getMinimalActionSet() original_width, original_height = self.api.getScreenDims() self.merge_frame = np.zeros((self.buffer_len , original_height , original_width) , dtype = np.uint8)
def __init__(self, rng, rom="ale/breakout.bin", frame_skip=4, ale_options=[{"key": "random_seed", "value": 0}, {"key": "color_averaging", "value": True}, {"key": "repeat_action_probability", "value": 0.}]): self._mode = -1 self._modeScore = 0.0 self._modeEpisodeCount = 0 self._frameSkip = frame_skip if frame_skip >= 1 else 1 self._randomState = rng self._ale = ALEInterface() for option in ale_options: t = type(option["value"]) if t is int: self._ale.setInt(option["key"], option["value"]) elif t is float: self._ale.setFloat(option["key"], option["value"]) elif t is bool: self._ale.setBool(option["key"], option["value"]) else: raise ValueError("Option {} ({}) is not an int, bool or float.".format(option["key"], t)) self._ale.loadROM(rom) w, h = self._ale.getScreenDims() self._screen = np.empty((h, w), dtype=np.uint8) self._reducedScreen = np.empty((84, 84), dtype=np.uint8) self._actions = self._ale.getMinimalActionSet()
def __init__(self): self.ale = ALEInterface() # turn off the sound self.ale.setBool('sound', False) self.ale.setBool('display_screen', EMULATOR_DISPLAY) self.ale.setInt('frame_skip', FRAME_SKIP) self.ale.setFloat('repeat_action_probability', REPEAT_ACTION_PROBABILITY) self.ale.setBool('color_averaging', COLOR_AVERAGING) self.ale.setInt('random_seed', RANDOM_SEED) if RECORD_SCENE_PATH: self.ale.setString('record_screen_dir', RECORD_SCENE_PATH) self.ale.loadROM(ROM_PATH) self.actions = self.ale.getMinimalActionSet() logger.info("Actions: " + str(self.actions)) self.dims = DIMS
def __init__(self, rand_seed, display=False): self.ale = ALEInterface() self.ale.setInt('random_seed', rand_seed) if display: self._setup_display() self.ale.loadROM(ROM) # height=210, width=160 self.screen = np.empty((210, 160, 1), dtype=np.uint8) no_action = 0 self.reward = self.ale.act(no_action) self.terminal = self.ale.game_over() # screenのshapeは、(210, 160, 1) self.ale.getScreenGrayscale(self.screen) # (210, 160)にreshape reshaped_screen = np.reshape(self.screen, (210, 160)) # height=110, width=84にリサイズ resized_screen = cv2.resize(reshaped_screen, (84, 110)) x_t = resized_screen[18:102,:] x_t = x_t.astype(np.float32) x_t *= (1.0/255.0) self.s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2) # 実際に利用するactionのみを集めておく self.real_actions = self.ale.getMinimalActionSet()
def __init__(self, args): ''' Initialize Atari environment ''' # Parameters self.buffer_length = args.buffer_length self.screen_dims = args.screen_dims self.frame_skip = args.frame_skip self.blend_method = args.blend_method self.reward_processing = args.reward_processing self.max_start_wait = args.max_start_wait self.history_length = args.history_length self.start_frames_needed = self.buffer_length - 1 + ((args.history_length - 1) * self.frame_skip) #Initialize ALE instance self.ale = ALEInterface() self.ale.setFloat(b'repeat_action_probability', 0.0) if args.watch: self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin')) self.buffer = np.empty((self.buffer_length, 210, 160)) self.current = 0 self.action_set = self.ale.getMinimalActionSet() self.lives = self.ale.lives() self.reset()
def __init__(self, rom_path, num_frames=4, live=False, skip_frame=0, mode='normal'): self.ale = ALEInterface() if live: USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.mode = mode self.live = live self.ale.loadROM(rom_path) self.num_frames = num_frames self.frames = [] self.frame_id = 0 self.cum_reward = 0 self.skip_frame = skip_frame if mode == 'small': img = T.matrix('img') self.max_pool = theano.function([img], max_pool_2d(img, [4, 4])) self.img_shape = (16, 16) else: self.img_shape = (84, 84) # image shape according to DQN Nature paper. while len(self.frames) < 4: self.step(choice(self.valid_actions, 1)[0]) self.reset()
def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True): self.ale = ALEInterface() if display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', frame_skip) self.ale.setFloat('repeat_action_probability', repeat_action_probability) self.ale.setBool('color_averaging', color_averaging) if random_seed: self.ale.setInt('random_seed', random_seed) self.ale.loadROM(rom_file) if minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.dims = (screen_width,screen_height)
def __init__(self, rom_file, frame_skip=1, viz=0): """ :param rom_file: path to the rom :param frame_skip: skip every k frames :param viz: the delay. visualize the game while running. 0 to disable """ self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt("random_seed", self.rng.randint(self.rng.randint(0, 1000))) self.ale.setInt("frame_skip", frame_skip) self.ale.loadROM(rom_file) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() if isinstance(viz, int): viz = float(viz) self.viz = viz self.romname = os.path.basename(rom_file) if self.viz and isinstance(self.viz, float): cv2.startWindowThread() cv2.namedWindow(self.romname) self._reset() self.last_image = self._grab_raw_image() self.framenum = 0
def __init__(self): self.ale = ALEInterface() self.ale.setInt('random_seed', 123) self.ale.setBool("display_screen", False) self.ale.setBool("sound", False) self.ale.loadROM("%s/breakout.bin" % rom_directory) self.current_state = [ self.ale.getScreenRGB(), self.ale.getScreenRGB() ]
def __init__(self): self._ale = ALEInterface() self._ale.setInt('random_seed', 123) self._ale.setFloat('repeat_action_probability', 0.0) self._ale.setBool('color_averaging', False) self._ale.loadROM('roms/enduro.bin') self._controller = Controller(self._ale) self._extractor = StateExtractor(self._ale) self._image = None
def __init__(self, settings): self.ale = ALEInterface() self.ale.setInt('frame_skip', settings['frame_skip']) self.ale.setInt('random_seed', np.random.RandomState().randint(1000)) self.ale.setBool('color_averaging', False) self.ale.loadROM('roms/' + settings['rom_name']) self.actions = self.ale.getMinimalActionSet() self.width = settings['screen_width'] self.height = settings['screen_height']
def __init__(self, rom_path, obs_type=OBS_RAM, frame_skip=4): Serializable.__init__(self, rom_path, obs_type, frame_skip) self.options = (rom_path, obs_type, frame_skip) self.ale = ALEInterface() self.ale.loadROM(rom_path) self._rom_path = rom_path self._obs_type = obs_type self._action_set = self.ale.getMinimalActionSet() self.frame_skip = frame_skip
def __init__(self, show_screen, history_length): self.ale = ALEInterface() self.ale.setInt('frame_skip', 4) self.history = None self.history_length = history_length if show_screen: self.display_screen() self.load_game() (screen_width, screen_height) = self.ale.getScreenDims() self.screen_data = np.empty((screen_height, screen_width, 1), dtype=np.uint8) # 210x160 screen data self.dims = (84, 84) # input size for neural network self.actions = [3, 0, 1, 4] # noop, left, right, fire,
def __init__(self, processing_cls, game_rom=None, encoder_model=None, encoder_weights=None, NFQ_model=None, NFQ_weights=None): assert game_rom is not None self.game = ALEInterface() if encoder_weights is not None and encoder_model is not None: self.encoder = Encoder(path_to_model=encoder_model, path_to_weights=encoder_weights) else: self.encoder = Encoder() self.processor = processing_cls() # Get & Set the desired settings self.game.setInt('random_seed', 0) self.game.setInt('frame_skip', 4) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = True if USE_SDL: if sys.platform == 'darwin': pygame.init() self.game.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.game.setBool('sound', False) # no sound self.game.setBool('display_screen', True) # Load the ROM file self.game.loadROM(game_rom) # Get the list of legal actions self.legal_actions = self.game.getLegalActionSet() # Get actions applicable in current game self.minimal_actions = self.game.getMinimalActionSet() if NFQ_model is not None and NFQ_weights is not None: self.NFQ = NFQ( self.encoder.out_dim, len(self.minimal_actions), model_path=NFQ_model, weights_path=NFQ_weights ) else: self.NFQ = NFQ(self.encoder.out_dim, len(self.minimal_actions)) (self.screen_width, self.screen_height) = self.game.getScreenDims() self.screen_data = np.zeros( (self.screen_height, self.screen_width), dtype=np.uint8 )
def __init__(self,rom_name): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode") self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('./' +rom_name) self.screen_width,self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i #print len(self.legal_actions) self.windowname = rom_name
def __init__(self, rom_file, sdl=False): self.ale = ALEInterface() # Setup SDL if sdl: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) # Load rom self.ale.loadROM(str.encode(rom_file))
def init(self,rom_file,ale_frame_skip): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.set("random_seed",123) self.ale.set("disable_color_averaging",1) self.ale.set("frame_skip",ale_frame_skip) self.ale.loadROM(rom_file) self.legal_actions = self.ale.getMinimalActionSet() ram_size = self.ale.getRAMSize() self.ram = np.zeros((ram_size),dtype=np.uint8) self.ale.getRAM(self.ram) self.state = self.ale.getRAM(self.ram)
def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episodes = False): self.ale = ALEInterface() self.ale.setInt("random_seed", rseed * (actor_id +1)) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat("repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt("frame_skip", 1) self.ale.setBool("color_averaging", False) self.ale.loadROM(rom_path + "/" + rom_name + ".bin") self.legal_actions = self.ale.getMinimalActionSet() self.screen_width,self.screen_height = self.ale.getScreenDims() #self.ale.setBool('display_screen', True) # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.screen_images_processed = np.zeros((IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) self.rgb_screen = np.zeros((self.screen_height,self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height,self.screen_width,1), dtype=np.uint8) self.frame_pool = np.empty((2, self.screen_height, self.screen_width)) self.current = 0 self.lives = self.ale.lives() self.visualize = visualize self.visualize_processed = False self.windowname = rom_name + ' ' + str(actor_id) if self.visualize: logger.debug("Opening emulator window...") #from skimage import io #io.use_plugin('qt') cv2.startWindowThread() cv2.namedWindow(self.windowname) logger.debug("Emulator window opened") if self.visualize_processed: logger.debug("Opening processed frame window...") cv2.startWindowThread() logger.debug("Processed frame window opened") cv2.namedWindow(self.windowname + "_processed") self.single_life_episodes = single_life_episodes
def __init__(self, rand_seed, display=False, no_op_max=7): self.ale = ALEInterface() self.ale.setInt('random_seed', rand_seed) self._no_op_max = no_op_max if display: self._setup_display() self.ale.loadROM(ROM) # collect minimal action set self.real_actions = self.ale.getMinimalActionSet() # height=210, width=160 self._screen = np.empty((210, 160, 1), dtype=np.uint8) self.reset()
def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.life_lost = False
#!/usr/bin/env python # python_example.py # Author: Ben Goodrich # # This is a direct port to python of the shared library example from # ALE provided in doc/examples/sharedLibraryInterfaceExample.cpp import sys from random import randrange from ale_python_interface import ALEInterface if len(sys.argv) < 2: print 'Usage:', sys.argv[0], 'rom_file' sys.exit() ale = ALEInterface() # Get & Set the desired settings ale.setInt('random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True)
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def isTerminal(self): return self.ale.game_over()
# random_boxing.py # Two random agents playing against each other Boxing with different frame skips # to evaluate which impact frame skip has on the game. # Author: Jens Roewekamp # import sys import os #check if ALE_PATH exits and add to path if it exits if os.environ['ALE_PATH']: sys.path.append(os.environ['ALE_PATH']) from random import randrange from ale_python_interface import ALEInterface ale = ALEInterface() # Get & Set the desired settings ale.setInt('random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True)
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. """ def __init__(self, rom_file, viz=0, height_range=(None,None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt("random_seed", self.rng.randint(0, 10000)) self.ale.setBool("showinfo", False) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() self.ale.setInt("frame_skip", 1) self.ale.setBool('color_averaging', False) # manual.pdf suggests otherwise. may need to check self.ale.setFloat('repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString('record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1],:] # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_num_actions(self): """ :returns: the number of legal actions """ return len(self.actions) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver) def get_stat(self): try: return {'avg_score': np.mean(self.stats['score']), 'max_score': float(np.max(self.stats['score'])) } except ValueError: return {}
class Environment: def __init__(self, render=False): self.ale = ALEInterface() self.ale.setInt(b'random_seed', 0) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.setBool(b'color_averaging', True) self.ale.setInt(b'frame_skip', 4) self.ale.setBool(b'display_screen', render) self.ale.loadROM(ENV.encode('ascii')) self._screen = np.empty((210, 160, 1), dtype=np.uint8) self._no_op_max = 7 def set_render(self, render): if not render: self.ale.setBool(b'display_screen', render) def reset(self): self.ale.reset_game() # randomize initial state if self._no_op_max > 0: no_op = np.random.randint(0, self._no_op_max + 1) for _ in range(no_op): self.ale.act(0) self.ale.getScreenGrayscale(self._screen) screen = np.reshape(self._screen, (210, 160)) screen = cv2.resize(screen, (84, 110)) screen = screen[18:102, :] screen = screen.astype(np.float32) screen /= 255.0 self.frame_buffer = np.stack((screen, screen, screen, screen), axis=2) return self.frame_buffer def act(self, action): reward = self.ale.act(4 + action) done = self.ale.game_over() self.ale.getScreenGrayscale(self._screen) screen = np.reshape(self._screen, (210, 160)) screen = cv2.resize(screen, (84, 110)) screen = np.reshape(screen[18:102, :], (84, 84, 1)) screen = screen.astype(np.float32) screen *= (1 / 255.0) self.frame_buffer = np.append(self.frame_buffer[:, :, 1:], screen, axis=2) return self.frame_buffer, reward, done, "" def close(self): self.ale.setBool(b'display_screen', False)
class AtariEnvironment: def __init__(self, frame_shape, frame_postprocess=lambda x: x): self.ale = ALEInterface() self.ale.setBool(b"display_screen", cfg.display_screen) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) self.ale.setInt(b"random_seed", cfg.random_seed) self.ale.setFloat(b"repeat_action_probability", cfg.sticky_prob) self.ale.loadROM(str.encode(cfg.rom)) self.ale.setMode(cfg.mode) self.ale.setDifficulty(cfg.difficulty) self.action_set = self.ale.getLegalActionSet() # self.action_set = self.ale.getMinimalActionSet() assert len(self.action_set) == cfg.num_actions screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1, ) self._frame_buffer = CircularBuffer(cfg.frame_buffer_size, screen_dims, np.uint8) self._frame_stack = CircularBuffer(cfg.frame_history_size, frame_shape, np.uint8) self._frame_postprocess = frame_postprocess self._episode_count = 0 self.reset(inc_episode_count=False) def _is_terminal(self): return self.ale.game_over() def _get_single_frame(self): stacked_frames = np.concatenate(self._frame_buffer, axis=2) maxed_frame = np.amax(stacked_frames, axis=2) expanded_frame = np.expand_dims(maxed_frame, 3) frame = self._frame_postprocess(expanded_frame) return frame def reset(self, inc_episode_count=True): self._episode_frames = 0 self._episode_reward = 0 if inc_episode_count: self._episode_count += 1 self.ale.reset_game() for _ in range(cfg.frame_buffer_size): self._frame_buffer.append(self.ale.getScreenGrayscale()) for _ in range(cfg.frame_history_size): self._frame_stack.append(self._get_single_frame()) def act(self, action): assert not self._is_terminal() cum_reward = 0 for _ in range(cfg.frame_skip): cum_reward += self.ale.act(self.action_set[action]) self._frame_buffer.append(self.ale.getScreenGrayscale()) self._frame_stack.append(self._get_single_frame()) self._episode_frames += cfg.frame_skip self._episode_reward += cum_reward cum_reward = np.clip(cum_reward, -1, 1) return cum_reward, self.state, self._is_terminal() @property def state(self): assert len(self._frame_buffer) == cfg.frame_buffer_size assert len(self._frame_stack) == cfg.frame_history_size return np.concatenate(self._frame_stack, axis=-1) @property def episode_reward(self): return self._episode_reward @property def episode_frames(self): return self._episode_frames @property def episode_steps(self): return self._episode_frames // cfg.frame_skip @property def episode_count(self): return self._episode_count
class Agent(object): def __init__(self): self._ale = ALEInterface() self._ale.setInt('random_seed', 123) self._ale.setFloat('repeat_action_probability', 0.0) self._ale.setBool('color_averaging', False) self._ale.loadROM('roms/enduro.bin') self._controller = Controller(self._ale) self._extractor = StateExtractor(self._ale) self._image = None self._speed_range = 50 def run(self, learn, episodes=1, draw=False): """ Implements the playing/learning loop. Args: learn(bool): Whether the self.learn() function should be called. episodes (int): The number of episodes to run the agent for. draw (bool): Whether to overlay the environment state on the frame. Returns: None """ for e in range(episodes): self._relative_speed = -self._speed_range # Observe the environment to set the initial state (road, cars, grid, self._image) = self._extractor.run(draw=draw, scale=4.0) self.initialise(road, cars, self._relative_speed, grid) num_frames = self._ale.getFrameNumber() # Each episode lasts 6500 frames while self._ale.getFrameNumber() - num_frames < 6500: # Take an action self.act() # Update the environment grid (road, cars, grid, self._image) = self._extractor.run(draw=draw, scale=4.0) if self.collision(cars): self._relative_speed = -self._speed_range self.sense(road, cars, self._relative_speed, grid) # Perform learning if required if learn: self.learn() self.callback(learn, e + 1, self._ale.getFrameNumber() - num_frames) self._ale.reset_game() def collision(self, cars): if not cars['others']: return False x, y, _, _ = cars['self'] min_dist = sys.float_info.max min_angle = 0. for c in cars['others']: cx, cy, _, _ = c dist = np.sqrt((cx - x)**2 + (cy - y)**2) if dist < min_dist: min_dist = dist min_angle = np.arctan2(y - cy, cx - x) return min_dist < 18. and 0.1 * np.pi < min_angle and min_angle < 0.9 * np.pi def getActionsSet(self): """ Returns the set of all possible actions """ return [Action.ACCELERATE, Action.RIGHT, Action.LEFT, Action.BRAKE] def move(self, action): """ Executes the action and advances the game to the next state. Args: action (int): The action which should executed. Make sure to use the constants returned by self.getActionsSet() Returns: int: The obtained reward after executing the action """ if action == Action.ACCELERATE: self._relative_speed = min(self._relative_speed + 1, self._speed_range) elif action == Action.BRAKE: self._relative_speed = max(self._relative_speed - 1, -self._speed_range) return self._controller.move(action) def initialise(self, road, cars, speed, grid): """ Called at the beginning of each episode, mainly used for state initialisation. For more information on the arguments have a look at the README.md Args: road: 2-dimensional array containing [x, y] points in pixel coordinates of the road grid cars: dictionary which contains the location and the size of the agent and the opponents in pixel coordinates speed: the relative speed of the agent with respect the others gird: 2-dimensional numpy array containing the latest grid representation of the environment Returns: None """ raise NotImplementedError def act(self): """ Called at each loop iteration to choose and execute an action. Returns: None """ raise NotImplementedError def sense(self, road, cars, speed, grid): """ Called at each loop iteration to construct the new state from the update environment grid. For more information on the arguments have a look at the README.md Args: road: 2-dimensional array containing [x, y] points in pixel coordinates of the road grid cars: dictionary which contains the location and the size of the agent and the opponents in pixel coordinates speed: the relative speed of the agent with respect the others gird: 2-dimensional numpy array containing the latest grid representation of the environment Returns: None """ raise NotImplementedError def learn(self): """ Called at each loop iteration when the agent is learning. It should implement the learning procedure. Returns: None """ raise NotImplementedError def callback(self, learn, episode, iteration): """ Called at each loop iteration mainly for reporting purposes. Args: learn (bool): Indicates whether the agent is learning or not. episode (int): The number of the current episode. iteration (int): The number of the current iteration. Returns: None """ raise NotImplementedError
SEE_SCREEN = True if sys.argv[3] == 'set_screen' else False # Generate future positions of Bert dpeending on current position and action FUTURE_POS = gen_future_locs(BOX_LOCS) # Learning hyperparameters episodes = 400 # how many episodes to wait before moving the weights max_time = 10000 gamma = 0.99 # discount factor for reward lr = 1e-4 NUM_FEATURES = 31 weights = [rd.random() for _ in range(NUM_FEATURES)] e = 0.15 if USE_OPTIMISTIC_PRIOR == False else 0.00 # Initialize learning environment ale = ALEInterface() ale.setBool('sound', False) ale.setBool('display_screen', SEE_SCREEN) ale.setInt('frame_skip', 1) ale.setInt('random_seed', SEED) rd.seed(SEED) ale.loadROM("qbert.bin") ELPASED_FRAME = 0 # Possible positions of Bert in the RAM right beforetaking any action MEM_POS = [[69, 77], [92, 77], [120, 65], [97, 65], [147, 53], [124, 53], [152, 41], [175, 41], [180, 29], [203, 29], [231, 16], [231, 41], [175, 65], [180, 53], [203, 53], [147, 77], [120, 93], [152, 65], [231, 65], [175, 93], [97, 93], [180, 77], [231, 93], [180, 105], [147, 105], [203, 77], [175, 77], [175, 117], [231, 117], [203, 129], [203, 105], [180, 129], [231, 141],
class ALE(Environment): def __init__(self, rom, frame_skip=1, reward_clipping=None, repeat_action_probability=0.0, loss_of_life_termination=False, loss_of_life_reward=0, display_screen=False, seed=np.random.RandomState()): """ Initialize ALE. Args: rom: Rom filename and directory. frame_skip: Repeat action for n frames. Default 1. reward_clipping: Clip rewards between (low, high). Can be None. Default None. repeat_action_probability: Repeats last action with given probability. Default 0. loss_of_life_termination: Signals a terminal state on loss of life. Default False. loss_of_life_reward: Reward/Penalty on loss of life (negative values are a penalty). Default 0. display_screen: Displays the emulator screen. Default False. seed: Random seed """ self.ale = ALEInterface() self.rom = rom self.ale.setBool(b'display_screen', display_screen) self.ale.setInt(b'random_seed', seed.randint(0, 9999)) self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) self.ale.setBool(b'color_averaging', False) self.ale.setInt(b'frame_skip', frame_skip) # all set commands must be done before loading the ROM self.ale.loadROM(rom.encode()) # setup gamescreen object width, height = self.ale.getScreenDims() self.gamescreen = np.empty((height, width, 3), dtype=np.uint8) self.frame_skip = frame_skip # setup action converter # ALE returns legal action indexes, convert these to just numbers self.action_inds = self.ale.getMinimalActionSet() # setup lives self.loss_of_life_reward = loss_of_life_reward self.cur_lives = self.ale.lives() self.loss_of_life_termination = loss_of_life_termination self.life_lost = False # reward clipping self.reward_clipping = reward_clipping def __str__(self): return 'ALE({})'.format(self.rom) def close(self): self.ale = None def reset(self): self.ale.reset_game() self.cur_lives = self.ale.lives() self.life_lost = False # clear gamescreen self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.uint8) return self.current_state def execute(self, action): # convert action to ale action ale_action = self.action_inds[action] # get reward and process terminal & next state rew = self.ale.act(ale_action) if self.loss_of_life_termination or self.loss_of_life_reward != 0: new_lives = self.ale.lives() if new_lives < self.cur_lives: self.cur_lives = new_lives self.life_lost = True rew += self.loss_of_life_reward if self.reward_clipping is not None: rew = np.clip(rew, self.reward_clipping[0], self.reward_clipping[1]) terminal = self.is_terminal state_tp1 = self.current_state return state_tp1, rew, terminal @property def states(self): return dict(shape=self.gamescreen.shape, type=float) @property def actions(self): return dict(continuous=False, num_actions=len(self.action_inds), names=self.action_names) @property def current_state(self): self.gamescreen = self.ale.getScreenRGB(self.gamescreen) return np.copy(self.gamescreen) @property def is_terminal(self): if self.loss_of_life_termination and self.life_lost: return True else: return self.ale.game_over() @property def action_names(self): action_names = [ 'No-Op', 'Fire', 'Up', 'Right', 'Left', 'Down', 'Up Right', 'Up Left', 'Down Right', 'Down Left', 'Up Fire', 'Right Fire', 'Left Fire', 'Down Fire', 'Up Right Fire', 'Up Left Fire', 'Down Right Fire', 'Down Left Fire' ] return np.asarray(action_names)[self.action_inds]
dataPath = "data/" initialExplorationRate = 1.0 finalExplorationRate = 0.1 SEED = None np.random.seed(SEED) loadModel = False saveData = False saveModel = False gamma = .99 learning_rate = 0.00025 display_screen = False frameSkip = 4 ale = ALEInterface() # ale.setInt('random_seed', 0) ale.setInt("frame_skip", frameSkip) USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', False) ale.setBool('display_screen', display_screen) ale.loadROM("rom/Breakout.A26") legal_actions = ale.getMinimalActionSet()
class ALE_Environment(EnvironmentBase): """ Environment Specifications: Number of Actions = 18 Original Frame Dimensions = 210 x 160 Frame Dimensions = 84 x 84 Frame Data Type = np.uint8 Reward = Game Score Summary Name: frames_per_episode """ def __init__(self, config, games_directory=None, rom_filename=None, summary=None): super().__init__() """ Parameters: Name: Type Default: Description(omitted when self-explanatory): display_screen bool False Display game screen agent_render bool False Display current frame the way the agent sees it frame_skip int 4 See ALE Documentation repeat_action_probability float 0.25 in [0,1], see ALE Documentation max_num_frames int 18000 Max number of frames per episode color_averaging bool False If true, it averages over the skipped frames. Otherwise, it takes the maximum over the skipped frames. frame_stack int 4 Stack of frames for agent, see Mnih et. al. (2015) save_summary bool False Save the summary of the environment """ assert isinstance(config, Config) self.display_screen = check_attribute_else_default( config, 'display_screen', False) self.agent_render = check_attribute_else_default( config, 'agent_render', False) self.frame_skip = check_attribute_else_default(config, 'frame_skip', 4) self.repeat_action_probability = check_attribute_else_default( config, 'repeat_action_probability', 0.25) max_num_frames = check_attribute_else_default(config, 'max_num_frames', 18000) self.color_averaging = check_attribute_else_default( config, 'color_averaging', True) if self.color_averaging: self.aggregate_func = np.average else: self.aggregate_func = np.amax self.frame_stack = check_attribute_else_default( config, 'frame_stack', 4) self.save_summary = check_attribute_else_default( config, 'save_summary', False) if self.save_summary: assert isinstance(summary, dict) self.summary = summary check_dict_else_default(self.summary, "frames_per_episode", []) " Environment variables" self.env = ALEInterface() self.env.setInt(b'frame_skip', 1) self.env.setInt(b'random_seed', 0) self.env.setFloat(b'repeat_action_probability', 0) self.env.setInt(b"max_num_frames_per_episode", max_num_frames) self.env.setBool(b"color_averaging", False) self.env.setBool(b'display_screen', self.display_screen) self.rom_file = str.encode(games_directory + rom_filename) self.frame_count = 0 " Loading ROM " self.env.loadROM(self.rom_file) """ Fixed Parameters: Frame Format: "NCHW" (batch_size, channels, height, width). Decided to adopt this format because it's the fastest to process in tensorflow with a gpu. Frame Height and Width: 84, the default value in the literature. """ " Inner state of the environment " self.height = 84 self.width = 84 self.current_state = np.zeros( [self.frame_stack, self.height, self.width], dtype=np.uint8) self.original_height = 210 self.original_width = 160 self.history = np.zeros( [self.frame_skip, self.original_height, self.original_width], np.uint8) self.reset() self.observations_dimensions = self.current_state.shape self.frame_dims = self.current_state[0].shape self.actions = self.env.getLegalActionSet() self.previous_action = 0 def reset(self): if self.save_summary and (self.frame_count != 0): self.summary['frames_per_episode'].append(self.frame_count) self.env.reset_game() self.frame_count = 0 original_frame = np.squeeze(self.env.getScreenGrayscale()) self.history[-1] = original_frame fixed_state = self.fix_state() self.current_state[-1] = fixed_state self.previous_action = 0 # self.agent_state_display() # For debugging purposes def add_frame(self, frame): self.current_state[:-1] = self.current_state[1:] self.current_state[-1] = frame def update(self, action): reward = 0 for _ in range(self.frame_skip): if not self.env.game_over(): p = np.random.rand() current_action = self.previous_action if p <= self.repeat_action_probability else action reward += self.env.act(current_action) self.history[:-1] = self.history[1:] self.history[-1] = np.squeeze(self.env.getScreenGrayscale()) self.frame_count += 1 new_frame = self.fix_state() self.add_frame(new_frame) terminal = self.env.game_over() self.previous_action = action # self.agent_state_display() # For debugging purposes only return self.current_state, reward, terminal def fix_state(self): agg_state = self.aggregate_func(self.history, axis=0) fixed_agg_state = resize(agg_state, (self.height, self.width), mode='constant', preserve_range=True) fixed_agg_state = np.array(fixed_agg_state, dtype=np.uint8) return fixed_agg_state def agent_state_display(self): if self.agent_render: state = self.current_state[-1] plt.imshow(state) plt.pause(0.05) " Getters " def get_current_state(self): return self.current_state def get_state_for_er_buffer(self): return self.current_state[-1] def get_num_actions(self): return 18 " Setters " def set_render(self, display_screen=False): self.env.setBool(b'display_screen', display_screen) self.env.loadROM(self.rom_file)
def __init__(self, config, games_directory=None, rom_filename=None, summary=None): super().__init__() """ Parameters: Name: Type Default: Description(omitted when self-explanatory): display_screen bool False Display game screen agent_render bool False Display current frame the way the agent sees it frame_skip int 4 See ALE Documentation repeat_action_probability float 0.25 in [0,1], see ALE Documentation max_num_frames int 18000 Max number of frames per episode color_averaging bool False If true, it averages over the skipped frames. Otherwise, it takes the maximum over the skipped frames. frame_stack int 4 Stack of frames for agent, see Mnih et. al. (2015) save_summary bool False Save the summary of the environment """ assert isinstance(config, Config) self.display_screen = check_attribute_else_default( config, 'display_screen', False) self.agent_render = check_attribute_else_default( config, 'agent_render', False) self.frame_skip = check_attribute_else_default(config, 'frame_skip', 4) self.repeat_action_probability = check_attribute_else_default( config, 'repeat_action_probability', 0.25) max_num_frames = check_attribute_else_default(config, 'max_num_frames', 18000) self.color_averaging = check_attribute_else_default( config, 'color_averaging', True) if self.color_averaging: self.aggregate_func = np.average else: self.aggregate_func = np.amax self.frame_stack = check_attribute_else_default( config, 'frame_stack', 4) self.save_summary = check_attribute_else_default( config, 'save_summary', False) if self.save_summary: assert isinstance(summary, dict) self.summary = summary check_dict_else_default(self.summary, "frames_per_episode", []) " Environment variables" self.env = ALEInterface() self.env.setInt(b'frame_skip', 1) self.env.setInt(b'random_seed', 0) self.env.setFloat(b'repeat_action_probability', 0) self.env.setInt(b"max_num_frames_per_episode", max_num_frames) self.env.setBool(b"color_averaging", False) self.env.setBool(b'display_screen', self.display_screen) self.rom_file = str.encode(games_directory + rom_filename) self.frame_count = 0 " Loading ROM " self.env.loadROM(self.rom_file) """ Fixed Parameters: Frame Format: "NCHW" (batch_size, channels, height, width). Decided to adopt this format because it's the fastest to process in tensorflow with a gpu. Frame Height and Width: 84, the default value in the literature. """ " Inner state of the environment " self.height = 84 self.width = 84 self.current_state = np.zeros( [self.frame_stack, self.height, self.width], dtype=np.uint8) self.original_height = 210 self.original_width = 160 self.history = np.zeros( [self.frame_skip, self.original_height, self.original_width], np.uint8) self.reset() self.observations_dimensions = self.current_state.shape self.frame_dims = self.current_state[0].shape self.actions = self.env.getLegalActionSet() self.previous_action = 0
import sys from random import randrange from ale_python_interface import ALEInterface import numpy as np import sys import os sys.path.append(os.path.abspath('../lib')) from ImgProc.PongProcessing import PongProcessing as proc from Autoencoder.Encoder import Encoder import matplotlib.pyplot as plt processor = proc() encoder = Encoder(path_to_model='encoder_v2_model.json', path_to_weights='encoder_v2_weights.h5') ale = ALEInterface() # Get & Set the desired settings ale.setInt('random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', False)
class Environment: def __init__(self, render=False): self.ale = ALEInterface() self.ale.setInt(b'random_seed', 0) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.setBool(b'color_averaging', True) self.ale.setInt(b'frame_skip', 4) self.ale.setBool(b'display_screen', render) self.ale.loadROM(ENV.encode('ascii')) self._screen = np.empty((250, 160, 1), dtype=np.uint8) self._no_op_max = 7 self.img_buffer = [] def set_render(self, render): if not render: self.ale.setBool(b'display_screen', render) def reset(self): self.ale.reset_game() # randomize initial state if self._no_op_max > 0: no_op = np.random.randint(0, self._no_op_max + 1) for _ in range(no_op): self.ale.act(0) self.img_buffer = [] self.img_buffer.append(self.ale.getScreenRGB()) self.ale.getScreenGrayscale(self._screen) screen = np.reshape(self._screen, (250, 160)) screen = cv2.resize(screen, (84, 90)) screen = screen[5:89, :] screen = screen.astype(np.float32) screen /= 255.0 self.frame_buffer = np.stack((screen, screen, screen, screen), axis=2) return self.frame_buffer def process(self, action, gif=False): reward = self.ale.act(1+action) done = self.ale.game_over() if gif: self.img_buffer.append(self.ale.getScreenRGB()) self.ale.getScreenGrayscale(self._screen) screen = np.reshape(self._screen, (250, 160)) screen = cv2.resize(screen, (84, 90)) screen = np.reshape(screen[5:89, :], (84, 84, 1)) screen = screen.astype(np.float32) screen *= (1/255.0) self.frame_buffer = np.append(self.frame_buffer[:, :, 1:], screen, axis=2) return self.frame_buffer, reward, done, "" def save_gif(self, path): os.makedirs(os.path.dirname(path), exist_ok=True) imageio.mimsave(path, self.img_buffer, duration=0.001) self.img_buffer = [] def close(self): self.ale.setBool(b'display_screen', False)
def ale_load_from_rom(rom_path, display_screen): rng = get_numpy_rng() try: from ale_python_interface import ALEInterface except ImportError as e: raise ImportError('Unable to import the python package of Arcade Learning Environment. ' \ 'ALE may not have been installed correctly. Refer to ' \ '`https://github.com/mgbellemare/Arcade-Learning-Environment` for some' \ 'installation guidance') ale = ALEInterface() ale.setInt(b'random_seed', rng.randint(1000)) if display_screen: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool(b'sound', False) # Sound doesn't work on OSX ale.setBool(b'display_screen', True) else: ale.setBool(b'display_screen', False) ale.setFloat(b'repeat_action_probability', 0) ale.loadROM(str.encode(rom_path)) return ale
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. Will automatically restart when a real episode ends (isOver might be just lost of lives but not game over). """ def __init__(self, rom_file, viz=0, height_range=(None, None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Error) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w) uint8 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1], :].astype( 'float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) return ret.astype('uint8') # to save some memory def get_action_space(self): return DiscreteActionSpace(len(self.actions)) def finish_episode(self): self.stats['score'].append(self.current_episode_score.sum) def restart_episode(self): self.current_episode_score.reset() with _ALE_LOCK: self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives if isOver: self.finish_episode() if self.ale.game_over(): self.restart_episode() return (r, isOver)
def __init__(self, rom_file, viz=0, frame_skip=4, nullop_start=30, live_lost_as_eoe=True, max_num_frames=0): """ Args: rom_file: path to the rom frame_skip: skip every k frames and repeat the action viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. nullop_start: start with random number of null ops. live_losts_as_eoe: consider lost of lives as end of episode. Useful for training. max_num_frames: maximum number of frames per episode. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Error) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setInt(b"max_num_frames_per_episode", max_num_frames) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.action_space = spaces.Discrete(len(self.actions)) self.observation_space = spaces.Box(low=0, high=255, shape=(self.height, self.width), dtype=np.uint8) self._restart_episode()
session.run(targetNet.b2.assign(trainNet.b2)) session.run(targetNet.b3.assign(trainNet.b3)) session.run(targetNet.b4.assign(trainNet.b4)) # def printDict(dict): # print 'Options:\n' # for i in dict.keys(): # print " ",i,"=",dict[i] # # print '' # initialization np.random.seed(SEED) ale = ALEInterface() if SEED == None: ale.setInt('random_seed', 0) else: ale.setInt('random_seed', SEED) ale.setInt("frame_skip", frameSkip) ale.setBool('color_averaging', True) ale.setBool('sound', False) ale.setBool('display_screen', False) ale.setFloat("repeat_action_probability", 0.0) ale.loadROM(romPath) legal_actions = ale.getMinimalActionSet() n_actions = len(legal_actions) opt.n_actions = n_actions explorationRateDelta = (initialExplorationRate - finalExplorationRate) / (
class AtariPlayer(gym.Env): """ A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings. Info: score: the accumulated reward in the current game gameOver: True when the current game is Over """ def __init__(self, rom_file, viz=0, frame_skip=4, nullop_start=30, live_lost_as_eoe=True, max_num_frames=0): """ Args: rom_file: path to the rom frame_skip: skip every k frames and repeat the action viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. nullop_start: start with random number of null ops. live_losts_as_eoe: consider lost of lives as end of episode. Useful for training. max_num_frames: maximum number of frames per episode. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Error) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setInt(b"max_num_frames_per_episode", max_num_frames) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.action_space = spaces.Discrete(len(self.actions)) self.observation_space = spaces.Box(low=0, high=255, shape=(self.height, self.width), dtype=np.uint8) self._restart_episode() def get_action_meanings(self): return [ACTION_MEANING[i] for i in self.actions] def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def _current_state(self): """ :returns: a gray-scale (h, w) uint8 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) cv2.waitKey(int(self.viz * 1000)) ret = ret.astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) return ret.astype('uint8') # to save some memory def _restart_episode(self): with _ALE_LOCK: self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def reset(self): if self.ale.game_over(): self._restart_episode() return self._current_state() def step(self, act): oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break isOver = self.ale.game_over() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives info = {'ale.lives': newlives} return self._current_state(), r, isOver, info
initialExplorationRate = 1.0 finalExplorationRate = 0.1 SEED = 123 np.random.seed(SEED) loadModel = -1 loadModelPath = "model/" + "%02d" % loadModel + ".tfmodel" saveData = False saveModel = True gamma = .99 learning_rate = 0.00025 display_screen = False frameSkip = 4 ale = ALEInterface() ale.setInt('random_seed', SEED) ale.setInt("frame_skip", frameSkip) USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', False) ale.setBool('display_screen', display_screen) ale.loadROM("rom/Breakout.A26") legal_actions = ale.getMinimalActionSet()
def __init__(self, rom_file, viz=0, height_range=(None,None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode()