class Environment: def __init__(self, rom_file, args): self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.dims = (args.screen_height, args.screen_width) def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def isTerminal(self): return self.ale.game_over()
class Emulate: def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True): self.ale = ALEInterface() if display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', frame_skip) self.ale.setFloat('repeat_action_probability', repeat_action_probability) self.ale.setBool('color_averaging', color_averaging) if random_seed: self.ale.setInt('random_seed', random_seed) self.ale.loadROM(rom_file) if minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.dims = (screen_width,screen_height) def numActions(self): return len(self.actions) def getActions(self): return self.actions def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def getScreenGray(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized,k=1) return rotated def getScreenColor(self): screen = self.ale.getScreenRGB() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized,k=1) return rotated def isTerminal(self): return self.ale.game_over()
def main(): arguments = docopt.docopt(__doc__, version='ALE Demo Version 1.0') pygame.init() ale = ALEInterface() ale.setInt(b'random_seed', 123) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(arguments['<rom_file>'])) legal_actions = ale.getLegalActionSet() width, height = ale.getScreenDims() print(width, height) frame = ale.getScreenRGB() frame = np.array(frame, dtype=float) rewards, num_episodes = [], int(arguments['--iters'] or 5) for episode in range(num_episodes): total_reward = 0 while not ale.game_over(): total_reward += ale.act(random.choice(legal_actions)) print('Episode %d reward %d.' % (episode, total_reward)) rewards.append(total_reward) ale.reset_game() average = sum(rewards) / len(rewards) print('Average for %d episodes: %d' % (num_episodes, average))
class Atari: def __init__(self, rom_dir): self.ale = ALEInterface() # Set settings self.ale.setInt("random_seed", 123) self.frame_skip = 4 self.ale.setInt("frame_skip", self.frame_skip) self.ale.setBool("display_screen", False) self.ale.setBool("sound", True) self.record_sound_for_user = True self.ale.setBool("record_sound_for_user", self.record_sound_for_user) # NOTE recording audio to file still works. But if both file recording and # record_sound_for_user are enabled, then only the latter is done # self.ale.setString("record_sound_filename", "") # Get settings self.ale.loadROM(rom_dir) self.screen_width, self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getLegalActionSet() # Action count across all episodes self.action_count = 0 self.start_time = time.time() self.reset() def reset(self): self.ale.reset_game() def take_action(self): action = self.legal_actions[np.random.randint(self.legal_actions.size)] self.ale.act(action) self.action_count += 1 def print_fps(self, delta_t=500): if self.action_count % delta_t == 0: print '[atari.py] Frames/second: %f' % ( self.action_count / (time.time() - self.start_time)) print '[atari.py] Overall game frame count:', atari.action_count * atari.frame_skip print '---------' def get_image_and_audio(self): np_data_image = np.zeros(self.screen_width * self.screen_height * 3, dtype=np.uint8) if self.record_sound_for_user: np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8) self.ale.getScreenRGBAndAudio(np_data_image, np_data_audio) # Also supports independent audio queries if user desires: # self.ale.getAudio(np_data_audio) else: np_data_audio = 0 self.ale.getScreenRGB(np_data_image) return np.reshape(np_data_image, (self.screen_height, self.screen_width, 3)), np.asarray(np_data_audio)
def train(): ale = ALEInterface() ale.setInt('random_seed', 123) ale.loadROM('roms/breakout.bin') legal_actions = ale.getLegalActionSet() total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] reward = ale.act(a) screen = None screen = ale.getScreenRGB() print(screen) plt.imshow(screen) plt.show() total_reward += reward print(total_reward) print('Episode end!')
def _init_ale(self): ale = ALEInterface() ale.setBool('sound', self.play_sound) ale.setBool('display_screen', self.display_screen) ale.setInt('random_seed', self.random_seed) # Frame skip is implemented separately ale.setInt('frame_skip', 1) ale.setBool('color_averaging', False) ale.setFloat('repeat_action_probability', 0.0) # Somehow this repeat_action_probability has unexpected effect on game. # The larger this value is, the more frames games take to restart. # And when 1.0 games completely hang # We are setting the default value of 0.0 here, expecting that # it has no effect as frame_skip == 1 # This action repeating is agent's concern # so we do not implement an equivalent in our wrapper. if self.record_screen_path: _LG.info('Recording screens: %s', self.record_screen_path) if not os.path.exists(self.record_screen_path): os.makedirs(self.record_screen_path) ale.setString('record_screen_dir', self.record_screen_path) if self.record_sound_filename: _LG.info('Recording sound: %s', self.record_sound_filename) record_sound_dir = os.path.dirname(self.record_sound_filename) if not os.path.exists(record_sound_dir): os.makedirs(record_sound_dir) ale.setBool('sound', True) ale.setString('record_sound_filename', self.record_sound_filename) ale.loadROM(self.rom_path) self._ale = ale self._actions = (ale.getMinimalActionSet() if self.minimal_action_set else ale.getLegalActionSet())
class Game(): """ Wrapper around the ALEInterface class. """ def __init__(self, rom_file, sdl=False): self.ale = ALEInterface() # Setup SDL if sdl: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) # Load rom self.ale.loadROM(str.encode(rom_file)) def get_action_set(self): return self.ale.getLegalActionSet() def get_minimal_action_set(self): return self.ale.getMinimalActionSet() def game_over(self): return self.ale.game_over() def act(self, action): return self.ale.act(action) def reset_game(self): self.ale.reset_game() def get_frame(self): return self.ale.getScreenRGB()
def get_random_baseline(gamepath): ale = ALEInterface() ale.setInt('random_seed', 42) recordings_dir = './recordings/breakout/' USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX #ale.setString("record_screen_dir", recordings_dir); elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file ale.loadROM(gamepath) # Get the list of legal actions legal_actions = ale.getLegalActionSet() # Play 5 episodes rewards = [] for episode in xrange(10): total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] reward = ale.act(a); total_reward += reward rewards.append(total_reward) #print 'Episode', episode, 'ended with score:', total_reward ale.reset_game() avg_reward = sum(rewards) / float(len(rewards)) return avg_reward
def get_random_baseline(gamepath): ale = ALEInterface() ale.setInt('random_seed', 42) recordings_dir = './recordings/breakout/' USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX #ale.setString("record_screen_dir", recordings_dir); elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file ale.loadROM(gamepath) # Get the list of legal actions legal_actions = ale.getLegalActionSet() # Play 5 episodes rewards = [] for episode in xrange(10): total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] reward = ale.act(a) total_reward += reward rewards.append(total_reward) #print 'Episode', episode, 'ended with score:', total_reward ale.reset_game() avg_reward = sum(rewards) / float(len(rewards)) return avg_reward
class AtariGame(Task): ''' RL task based on Arcade Game. ''' def __init__(self, rom_path, num_frames=4, live=False, skip_frame=0, mode='normal'): self.ale = ALEInterface() if live: USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.mode = mode self.live = live self.ale.loadROM(rom_path) self.num_frames = num_frames self.frames = [] self.frame_id = 0 self.cum_reward = 0 self.skip_frame = skip_frame if mode == 'small': img = T.matrix('img') self.max_pool = theano.function([img], max_pool_2d(img, [4, 4])) self.img_shape = (16, 16) else: self.img_shape = (84, 84) # image shape according to DQN Nature paper. while len(self.frames) < 4: self.step(choice(self.valid_actions, 1)[0]) self.reset() def copy(self): import dill as pickle return pickle.loads(pickle.dumps(self)) def reset(self): self.ale.reset_game() self.frame_id = 0 self.cum_reward = 0 if self.skip_frame: for frame_i in range(self.skip_frame): self.step(choice(self.valid_actions, 1)[0]) @property def _curr_frame(self): img = self.ale.getScreenRGB() img = rgb2yuv(img)[:, :, 0] # get Y channel, according to Nature paper. # print 'RAM', self.ale.getRAM() if self.mode == 'small': img = self.max_pool(img) img = imresize(img, self.img_shape, interp='bicubic') return img @property def curr_state(self): ''' return raw pixels. ''' return np.array(self.frames, dtype=floatX) / floatX(255.) # normalize @property def state_shape(self): return self.curr_state.shape @property def num_actions(self): return len(self.valid_actions) @property def valid_actions(self): return self.ale.getLegalActionSet() def step(self, action): reward = self.ale.act(action) if len(self.frames) == self.num_frames: self.frames = self.frames[1:] self.frames.append(self._curr_frame) self.frame_id += 1 #print 'frame_id', self.frame_id self.cum_reward += reward return reward # TODO: scale the gradient up. def is_end(self): if np.abs(self.cum_reward) > 0: return True return self.ale.game_over() def visualize(self, fig=1, fname=None, format='png'): import matplotlib.pyplot as plt fig = plt.figure(fig, figsize=(5,5)) plt.clf() plt.axis('off') #res = plt.imshow(self.ale.getScreenRGB()) res = plt.imshow(self._curr_frame, interpolation='none') if fname: plt.savefig(fname, format=format) else: plt.show() return res
class aleForET: def __init__(self, rom_file, screen, rndseed, resume_state_file=None): # When you might pass None to screen: # You are not interested in running any functions that displays graphics # For example, you should only run proceed_one_step__fast__no_scr_support() # Otherwise, those functions uses self.screen and you will get a RuntimeError if screen != None: pygame.init() self.screen = screen GAME_W, GAME_H = 160, 210 self.size = GAME_W * V.xSCALE, GAME_H * V.ySCALE # Get & Set the desired settings self.ale = ALEInterface() self.ale.setInt("random_seed", rndseed) self.ale.setBool('sound', False) self.ale.setBool('display_screen', False) self.ale.setBool('color_averaging', COLOR_AVG) self.ale.setFloat('repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(rom_file) self.gamename = os.path.basename(rom_file).split('.')[0] self.clock = pygame.time.Clock() self._last_time = time.time() self.score = 0 self.episode = 0 self.frame_cnt = 0 # Get the list of legal actions self.legal_actions = self.ale.getLegalActionSet() if resume_state_file: self.loadALEState(resume_state_file) def saveALEState(self, fname): basedir = os.path.dirname(fname) if not os.path.exists(basedir): os.makedirs(basedir) pALEState = self.ale.cloneSystemState( ) # actually it returns an int, a memory address pointing to a C++ object ALEState serialized_np = self.ale.encodeState( pALEState) # this func actually takes a pointer np.savez(fname, state=serialized_np, score=self.score, episode=self.episode) def loadALEState(self, fname): npzfile = np.load(fname) serialized_np = npzfile['state'] self.score = npzfile['score'] self.episode = npzfile['episode'] pALEState = self.ale.decodeState( serialized_np ) # actually it returns an int, a memory address pointing to a C++ object ALEState self.ale.restoreSystemState( pALEState) # this func actually takes a pointer def proceed_one_step(self, action, refresh_screen=False, fps_limit=0, model_gaze_output=None, gc_window_drawer_func=None): self.clock.tick( fps_limit) # control FPS. fps_limit == 0 means no limit self.frame_cnt += 1 # Display FPS diff_time = time.time() - self._last_time if diff_time > 1.0: print 'FPS: %.1f' % self.clock.get_fps() self._last_time = time.time() # Show game image cur_frame_np = self.ale.getScreenRGB() if refresh_screen: cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np) cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True, False) cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90) # Perform scaling directly on screen, leaving cur_frame_Surface unscaled. # Slightly faster than scaling cur_frame_Surface and then transfer to screen. pygame.transform.scale(cur_frame_Surface, self.size, self.screen) if gc_window_drawer_func != None and model_gaze_output: gc_window_drawer_func(self.screen, model_gaze_output) pygame.display.flip() # Apply an action and get the resulting reward reward = self.ale.act(action) self.score += reward return cur_frame_np, reward, self.check_episode_end_and_if_true_reset_game( ) def proceed_one_step__fast__no_scr_support(self, action): self.frame_cnt += 1 cur_frame_np = self.ale.getScreenRGB() reward = self.ale.act(action) self.score += reward return cur_frame_np, reward, self.check_episode_end_and_if_true_reset_game( ) def check_episode_end_and_if_true_reset_game(self): end = self.ale.game_over() if end: print 'Episode', self.episode, 'ended with score:', self.score self.score = 0 self.episode += 1 self.ale.reset_game() return end # after reset_game(), ale.game_over()'s return value will change to false def run(self, gc_window_drawer_func=None, save_screen_func=None, event_handler_func=None, record_a_and_r_func=None): self.run_start_time = time.time() # used in alerecord_main.py while True: self.check_episode_end_and_if_true_reset_game() self.clock.tick(FRAME_RATE) # control FPS self.frame_cnt += 1 key = pygame.key.get_pressed() if event_handler_func != None: stop, eyelink_err_code, bool_drawgc = event_handler_func( key, self) if stop: return eyelink_err_code # Display FPS diff_time = time.time() - self._last_time if diff_time > 1.0: print 'FPS: %.1f' % self.clock.get_fps() self._last_time = time.time() # Show game image cur_frame_np = self.ale.getScreenRGB() cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np) cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True, False) cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90) # Perform scaling directly on screen, leaving cur_frame_Surface unscaled. # Slightly faster than scaling cur_frame_Surface and then transfer to screen. pygame.transform.scale(cur_frame_Surface, self.size, self.screen) if gc_window_drawer_func != None and bool_drawgc: gc_window_drawer_func(self.screen) pygame.display.flip() # Save frame to disk (160*210, i.e. not scaled; because this is faster) if save_screen_func != None: save_screen_func(cur_frame_Surface, self.frame_cnt) # Apply an action and get the resulting reward a_index = aenum.action_map(key, self.gamename) a = self.legal_actions[a_index] reward = self.ale.act(a) self.score += reward if record_a_and_r_func != None: record_a_and_r_func(a, reward, self.episode, self.score) pygame.event.pump() # need this line to get new key pressed assert False, "Returning should only happen in the while True loop" def run_in_step_by_step_mode(self, gc_window_drawer_func=None, save_screen_func=None, event_handler_func=None, record_a_and_r_func=None): bool_drawgc = False self.run_start_time = time.time() # used in alerecord_main.py while True: self.check_episode_end_and_if_true_reset_game() # Get game image cur_frame_np = self.ale.getScreenRGB() cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np) cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True, False) cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90) self.frame_cnt += 1 # Save frame to disk (160*210, i.e. not scaled; because this is faster) if save_screen_func != None: save_screen_func(cur_frame_Surface, self.frame_cnt) key, draw_next_game_frame = None, False while not draw_next_game_frame: self.clock.tick(FRAME_RATE) # control FPS key = pygame.key.get_pressed() if event_handler_func != None: stop, eyelink_err_code, bool_drawgc = event_handler_func( key, self) if stop: return eyelink_err_code a_index = aenum.action_map(key, self.gamename) # Not in all cases when action_map returns "NO OP" is the real action "NO OP", # Only when the human press "TAB", is the real action "NO OP". if (a_index == aenum.PLAYER_A_NOOP and key[pygame.K_TAB]) \ or a_index != aenum.PLAYER_A_NOOP: draw_next_game_frame = True # Draw the image onto screen. # Perform scaling directly on screen, leaving cur_frame_Surface unscaled. pygame.transform.scale(cur_frame_Surface, self.size, self.screen) if gc_window_drawer_func != None and bool_drawgc: gc_window_drawer_func(self.screen) pygame.display.flip() pygame.event.pump() # need this line to get new key pressed # Apply an action and get the resulting reward a = self.legal_actions[a_index] reward = self.ale.act(a) self.score += reward if record_a_and_r_func != None: record_a_and_r_func(a, reward, self.episode, self.score) assert False, "Returning code should only be in the while True loop"
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() # Set ALE configuration self.ale.setInt(b'frame_skip', args.frame_skip) self.ale.setFloat(b'repeat_action_probability', args.repeat_action_probability) self.ale.setBool(b'color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt(b'random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): os.makedirs(args.record_screen_path) self.ale.setString(b'record_screen_dir', args.record_screen_path.encode()) if args.record_sound_filename: self.ale.setBool(b'sound', True) self.ale.setString(b'record_sound_filename', args.record_sound_filename.encode()) # Load ROM self.ale.loadROM(rom_file.encode()) # Set game difficulty and mode (after loading) self.ale.setDifficulty(args.game_difficulty) self.ale.setMode(args.game_mode) # Whether to use minimum set or set if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() # Life lost control self.life_lost = False # Initialize base class super(ALEEnvironment, self).__init__(args) def action_dim(self): return len(self.actions) def reset(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False screen = self._get_state(self.ale.getScreenRGB()) return screen def step(self, action, action_b=0, ignore_screen=False): lives = self.ale.lives() # Act on environment reward = self.ale.act(self.actions[action], self.actions[action_b] + 18) # Check if life was lost self.life_lost = (not lives == self.ale.lives()) # Check terminal state terminal = (self.ale.game_over() or self.life_lost ) if self.mode == 'train' else self.ale.game_over() # Check if should ignore the screen (in case of RobotEnvironment) if ignore_screen: screen = None else: # Get screen from ALE screen = self._get_state(self.ale.getScreenRGB()) # Wait for next frame to start self.fps_control.wait_next_frame() return screen, reward, terminal
class GameEnvironment: def __init__(self, settings): self.ale = ALEInterface() self.ale.setBool('display_screen', settings['DISPLAY_SCREEN']) self.ale.setBool('sound', settings['SOUND']) self.ale.setBool('color_averaging', settings['COLOR_AVERAGING']) self.ale.setInt('random_seed', settings['RANDOM_SEED']) self.ale.setInt('frame_skip', settings['FRAME_SKIP']) self.ale.setFloat('repeat_action_probability', settings['REPEAT_ACTION_PROB']) roms_dir = settings['ROMS_DIR'] rom_name = settings['ROM_NAME'] ROM = None if(rom_name.endswith('.bin')): self.name = rom_name[:-4] ROM = rom_name else: self.name = rom_name ROM = rom_name + '.bin' self.ale.loadROM(os.path.join(roms_dir, ROM)) self.random_starts = settings['RANDOM_STARTS'] self.rng = settings['RNG'] if(settings['MINIMAL_ACTION_SET']): self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.n_actions = len(self.actions) self.width, self.height = self.ale.getScreenDims() self.observation = np.zeros((self.height, self.width), dtype='uint8') self.reward = None self.game_over = None self.terminal = None self.total_lives = None self.init() def init(self): self.restartGame() self.reward = 0 self.game_over = self.gameOver() self.terminal = self.game_over self.total_lives = self.lives() self.step(0) def getState(self): return self.observation, self.reward, self.terminal, self.game_over def step(self, action, training=False): self.reward = self.act(action) self.paint() lives = self.lives() self.game_over = self.gameOver() self.terminal = self.game_over if(training and (lives < self.total_lives)): self.terminal = True self.total_lives = lives return self.getState() def newGame(self): self.init() for i in xrange(self.rng.randint(1, self.random_starts)): self.act(0) terminal = self.gameOver() if(terminal): print "Warning terminal in random init" return self.step(0) def newTestGame(self): self.init() return self.getState() def paint(self): self.ale.getScreenGrayscale(self.observation) def getScreenRGB(self): return self.ale.getScreenRGB() def act(self, action): assert ((action >= 0) and (action < self.n_actions)) return self.ale.act(self.actions[action]) def lives(self): return self.ale.lives() def restartGame(self): self.ale.reset_game() def gameOver(self): return self.ale.game_over()
def __init__(self, random_seed, frame_skip, repeat_action_probability, sound, display_screen, block_state_repr=None, enemy_state_repr=None, friendly_state_repr=None): ale = ALEInterface() # Get & Set the desired settings if random_seed is not None: ale.setInt('random_seed', random_seed) ale.setInt('frame_skip', frame_skip) ale.setFloat('repeat_action_probability', repeat_action_probability) if display_screen: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', sound) ale.setBool('display_screen', display_screen) # Load the ROM file ale.loadROM('qbert.bin') # Get the list of legal actions legal_actions = ale.getLegalActionSet() minimal_actions = ale.getMinimalActionSet() logging.debug('Legal actions: {}'.format( [action_number_to_name(a) for a in legal_actions])) logging.debug('Minimal actions: {}'.format( [action_number_to_name(a) for a in minimal_actions])) width, height = ale.getScreenDims() rgb_screen = np.empty([height, width, 3], dtype=np.uint8) ram_size = ale.getRAMSize() ram = np.zeros(ram_size, dtype=np.uint8) # ALE components self.ale = ale self.lives = ale.lives() self.rgb_screen = rgb_screen self.ram_size = ale.getRAMSize() self.ram = ram # Verbose state representation self.desired_color = COLOR_YELLOW self.block_colors = INITIAL_COLORS self.enemies = INITIAL_ENEMY_POSITIONS self.friendlies = INITIAL_FRIENDLY_POSITIONS self.discs = INITIAL_DISCS self.current_row, self.current_col = 0, 0 self.level = 1 self.enemy_present = False self.friendly_present = False self.block_state_repr = block_state_repr self.enemy_state_repr = enemy_state_repr self.friendly_state_repr = friendly_state_repr self.num_colored_blocks = 0
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) # OpenCV expects width as first and height as second self.dims = (args.screen_width, args.screen_height) def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def isTerminal(self): return self.ale.game_over()
class AtariSimulator(object): def __init__(self, settings): '''Initiate Arcade Learning Environment (ALE) using Python interface https://github.com/bbitmaster/ale_python_interface/wiki - Set number of frames to be skipped, random seed, ROM and title for display. - Retrieve a set of legal actions and their number. - Retrieve dimensions of the original screen (width/height), and set the dimensions of the cropped screen, together with the padding used to crop the screen rectangle. - Set dimensions of the pygame display that will show visualization of the simulation. (May be cropped --- showing what the learner sees, or not --- showing full Atari screen) - Allocate memory for generated grayscale screenshots. Accepts dims in (height/width) format ''' self.ale = ALEInterface() self.ale.setInt("frame_skip",settings["frame_skip"]) self.ale.setInt("random_seed",settings["seed_simulator"]) self.ale.loadROM(settings["rom_dir"] + '/' + settings["rom"]) self.title = "ALE Simulator: " + str(settings["rom"]) self.actions = self.ale.getLegalActionSet() self.n_actions = self.actions.size self.screen_dims = self.ale.getScreenDims() self.model_dims = settings['model_dims'] self.pad = settings['pad'] print("Original screen width/height: " + str(self.screen_dims[0]) + "/" + str(self.screen_dims[1])) print("Cropped screen width/height: " + str(self.model_dims[0]) + "/" + str(self.model_dims[1])) self.viz_cropped = settings['viz_cropped'] if self.viz_cropped: self.display_dims = (int(self.model_dims[0]*2), int(self.model_dims[1]*2)) else: self.display_dims = (int(self.screen_dims[0]*2), int(self.screen_dims[1]*2)) # preallocate an array to accept ALE screen data (height/width) ! self.screen_data = np.empty((self.screen_dims[1],self.screen_dims[0]),dtype=np.uint8) def get_screenshot(self): '''returns a cropped snapshot of the simulator - store grayscale values in a preallocated array - cut out a square from the rectangle, using provided padding value - downsample to the desired size and transpose from (height/width) to (width/height) ''' self.ale.getScreenGrayscale(self.screen_data) self.tmp = self.screen_data[(self.screen_dims[1]-self.screen_dims[0]-self.pad):(self.screen_dims[1]-self.pad),:] self.frame = spm.imresize(self.tmp,self.model_dims[::-1],interp='nearest').T #, interp='nearest' return self.frame def act(self,action_index): '''function to transition the simulator from s to s' using provided action the action that is provided is in form of an index simulator deals with translating the index into an actual action''' self.last_reward = self.ale.act(self.actions[action_index]) def reward(self): '''return reward - has to be called after the "act" function''' return self.last_reward def episode_over(self): '''return a boolean indicator on whether the game is still running''' return self.ale.game_over() def reset_episode(self): '''reset the game that ended''' self.ale.reset_game() def init_viz_display(self): '''initialize display that will show visualization''' pygame.init() self.screen = pygame.display.set_mode(self.display_dims) if self.title: pygame.display.set_caption(self.title) def refresh_viz_display(self): '''if display is shut down, shut the game down else move the current simulator's frame (cropped or not cropped) into the pygame display, after expanding it 2x along x and y dimensions''' for event in pygame.event.get(): if event.type == pygame.QUIT: exit if self.viz_cropped: self.surface = pygame.surfarray.make_surface(self.frame) # has already been transposed else: self.surface = pygame.surfarray.make_surface(self.screen_data.T) self.screen.blit(pygame.transform.scale2x(self.surface),(0,0)) pygame.display.flip()
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.life_lost = False def numActions(self): return len(self.actions) def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over()
class ALEEnvironment(): def __init__(self, rom_file, args): self.ale = ALEInterface() self.histLen = 4 if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) #self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.mode = "train" self.life_lost = False self.initSrcreen = self.getScreen() self.goalSet = [] self.goalSet.append([[70, 65], [74, 71]]) # lower right ladder 4 self.goalSet.append([[11, 58], [15, 66]]) # lower left ladder 3 self.goalSet.append([[7, 41], [11, 45]]) # key 5 self.goalCenterLoc = [] for goal in self.goalSet: goalCenter = [ float(goal[0][0] + goal[1][0]) / 2, float(goal[0][1] + goal[1][1]) / 2 ] self.goalCenterLoc.append(goalCenter) self.agentOriginLoc = [42, 33] self.agentLastX = 42 self.agentLastY = 33 self.reachedGoal = [0, 0, 0] self.histState = self.initializeHistState() def initializeHistState(self): histState = np.concatenate((self.getState(), self.getState()), axis=2) histState = np.concatenate((histState, self.getState()), axis=2) histState = np.concatenate((histState, self.getState()), axis=2) return histState def numActions(self): return len(self.actions) def resetGoalReach(self): self.reachedGoal = [0, 0, 0] def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False self.reachedGoal = [0, 0, 0] for i in range(19): self.act(0) #wait for initialization self.histState = self.initializeHistState() self.agentLastX = self.agentOriginLoc[0] self.agentLastY = self.agentOriginLoc[1] def beginNextLife(self): self.life_lost = False self.reachedGoal = [0, 0, 0] for i in range(19): self.act(0) #wait for initialization self.histState = self.initializeHistState() self.agentLastX = self.agentOriginLoc[0] self.agentLastY = self.agentOriginLoc[1] def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) currState = self.getState() self.histState = np.concatenate((self.histState[:, :, 1:], currState), axis=2) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def getScreenRGB(self): screen = self.ale.getScreenRGB() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) #resized = screen return resized def getAgentLoc(self): img = self.getScreenRGB() man = [200, 72, 72] mask = np.zeros(np.shape(img)) mask[:, :, 0] = man[0] mask[:, :, 1] = man[1] mask[:, :, 2] = man[2] diff = img - mask indxs = np.where(diff == 0) diff[np.where(diff < 0)] = 0 diff[np.where(diff > 0)] = 0 diff[indxs] = 255 if (np.shape(indxs[0])[0] == 0): mean_x = self.agentLastX mean_y = self.agentLastY else: mean_y = np.sum(indxs[0]) / np.shape(indxs[0])[0] mean_x = np.sum(indxs[1]) / np.shape(indxs[1])[0] self.agentLastX = mean_x self.agentLastY = mean_y return (mean_x, mean_y) def distanceReward(self, lastGoal, goal): if (lastGoal == -1): lastGoalCenter = self.agentOriginLoc else: lastGoalCenter = self.goalCenterLoc[lastGoal] goalCenter = self.goalCenterLoc[goal] agentX, agentY = self.getAgentLoc() dis = np.sqrt((goalCenter[0] - agentX) * (goalCenter[0] - agentX) + (goalCenter[1] - agentY) * (goalCenter[1] - agentY)) disLast = np.sqrt((lastGoalCenter[0] - agentX) * (lastGoalCenter[0] - agentX) + (lastGoalCenter[1] - agentY) * (lastGoalCenter[1] - agentY)) disGoals = np.sqrt((goalCenter[0] - lastGoalCenter[0]) * (goalCenter[0] - lastGoalCenter[0]) + (goalCenter[1] - lastGoalCenter[1]) * (goalCenter[1] - lastGoalCenter[1])) return 0.001 * (disLast - dis) / disGoals # add color channel for input of network def getState(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return np.reshape(resized, (84, 84, 1)) def getStackedState(self): return self.histState def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over() def isGameOver(self): return self.ale.game_over() def isLifeLost(self): return self.life_lost def reset(self): self.ale.reset_game() self.life_lost = False def goalReached(self, goal): goalPosition = self.goalSet[goal] goalScreen = self.initSrcreen stateScreen = self.getScreen() count = 0 for y in range(goalPosition[0][0], goalPosition[1][0]): for x in range(goalPosition[0][1], goalPosition[1][1]): if goalScreen[x][y] != stateScreen[x][y]: count = count + 1 # 30 is total number of pixels of agent if float(count) / 30 > 0.3: self.reachedGoal[goal] = 1 return True return False def goalNotReachedBefore(self, goal): if (self.reachedGoal[goal] == 1): return False return True
def make_dataset(extension='.png'): if len(sys.argv) < 3: print('Usage: %s rom_file num_games' % sys.argv[0]) sys.exit() ale = ALEInterface() # set seed for reproducibility ale.setInt(b'random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # load the ROM file rom_file = str.encode(sys.argv[1]) ale.loadROM(rom_file) # get number of runs num_games = int(sys.argv[2]) # set RGB flag if len(sys.argv) == 4: rgb = bool(sys.argv[3]) # get the list of legal actions legal_actions = ale.getLegalActionSet() # make recording directory import os if not os.path.exists('./record/'): os.makedirs('./record/') if not os.path.exists('./record/train/'): os.makedirs('./record/train/') if not os.path.exists('record/test/'): os.makedirs('./record/test/') # initialise iteration counter iter = 0 # play game for episode in range(num_games): total_reward = 0 while not ale.game_over(): if np.mod(iter, 2) == 0: screenshot_odd = ale.getScreenRGB() else: # take current screenshot as the maximum of last two screenshot = np.maximum(ale.getScreenRGB(), screenshot_odd) # pre-process image screenshot = __pre_process(screenshot, rgb=rgb) # save screenshot in appropriate directory __save_image(screenshot, iter / 2, extension=extension) # select random action a = legal_actions[randrange(len(legal_actions))] # apply an action and get the resulting reward reward = ale.act(a) # increment award total_reward += reward # increment iteration counter iter += 1 print('Episode %d ended with score: %d' % (episode, total_reward)) ale.reset_game()
class Atari: def __init__(self, rom_dir): self.ale = ALEInterface() # Set settings self.ale.setInt("random_seed", 123) self.frame_skip = 1 self.ale.setInt("frame_skip", self.frame_skip) self.ale.setBool("display_screen", False) self.ale.setBool("sound", True) self.record_sound_for_user = True self.ale.setBool("record_sound_for_user", self.record_sound_for_user) # NOTE recording audio to file still works. But if both file recording and # record_sound_for_user are enabled, then only the latter is done # self.ale.setString("record_sound_filename", "") # Get settings self.ale.loadROM(rom_dir) self.action_count = 0 self.screen_width, self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getLegalActionSet() self.framerate = 60 # Should read from ALE settings technically self.samples_per_frame = 512 # Should read from ALE SoundExporter class technically self.audio_freq = self.framerate * self.samples_per_frame #/self.frame_skip self.all_audio = np.zeros((0, ), dtype=np.uint8) # Saving audio/video to disk for verification. self.save_to_file = True # NOTE set to False to test actual screen/audio query speed! if self.save_to_file: self.save_dir_av = './logs_av_seq_Example' # Save png sequence and audio wav file here self.save_dir_movies = './log_movies_Example' self.save_image_prefix = 'image_frames' self.save_audio_filename = 'audio_user_recorder.wav' self.create_save_dir(self.save_dir_av) def take_action(self): action = self.legal_actions[np.random.randint(self.legal_actions.size)] self.ale.act(action) def create_save_dir(self, directory): # Remove previous img/audio image logs if os.path.exists(directory): shutil.rmtree(directory) os.makedirs(directory) def get_image_and_audio(self): np_data_image = np.zeros(self.screen_width * self.screen_height * 3, dtype=np.uint8) if self.record_sound_for_user: np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8) self.ale.getScreenRGBAndAudio(np_data_image, np_data_audio) # Also supports independent audio queries if user desires: # self.ale.getAudio(np_data_audio) else: # np_data_audio = 0 np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8) self.ale.getAudio(np_data_audio) self.ale.getScreenRGB(np_data_image) return np.reshape(np_data_image, (self.screen_height, self.screen_width, 3)), np.asarray(np_data_audio) def audio_to_mfcc(self, audio): mfcc_data = mfcc(signal=audio, samplerate=self.audio_freq, winlen=0.002, winstep=0.0006) mfcc_data = np.swapaxes(mfcc_data, 0, 1) # Time on x-axis # Normalization min_data = np.min(mfcc_data.flatten()) max_data = np.max(mfcc_data.flatten()) mfcc_data = (mfcc_data - min_data) / (max_data - min_data) return mfcc_data def save_image(self, image): number = str(self.action_count).zfill(6) scipy.misc.imsave( os.path.join(self.save_dir_av, self.save_image_prefix + number + '.png'), image) def save_audio(self, audio): wavfile.write(os.path.join(self.save_dir_av, self.save_audio_filename), self.audio_freq, audio) def save_movie(self, movie_name): # Use ffmpeg to convert the saved img sequences and audio to mp4 # Video recording command = [ "ffmpeg", '-y', # overwrite output file if it exists '-r', str(self.framerate), # frames per second '-i', os.path.join(self.save_dir_av, self.save_image_prefix + '%6d.png') # Video input comes from pngs ] # Audio if available if self.record_sound_for_user: command.extend([ '-i', os.path.join(self.save_dir_av, self.save_audio_filename) ]) # Audio input comes from wav # Codecs and output command.extend([ '-c:v', 'libx264', # Video codec '-c:a', 'mp3', # Audio codec os.path.join(self.save_dir_movies, movie_name + '.mp4') # Output dir ]) # Make movie dir and write the mp4 if not os.path.exists(self.save_dir_movies): os.makedirs(self.save_dir_movies) sp.call( command ) # NOTE: needs ffmpeg! Will throw 'dir doesn't exist err' otherwise. def concat_image_audio(self, image, audio_mfcc): # Concatenates image and audio to test sync'ing in saved .mp4 audio_mfcc = scipy.misc.imresize(audio_mfcc, np.shape( image)) # Resize MFCC image to be same size as screen image cmap = plt.get_cmap('viridis') # Apply a colormap to spectrogram audio_mfcc = (np.delete(cmap(audio_mfcc), 3, 2) * 255.).astype( np.uint8) # Gray MFCC -> 4 channel colormap -> 3 channel colormap image = np.concatenate((image, audio_mfcc), axis=1) # Concat screen image and MFCC image return image def plot_mfcc(self, audio_mfcc): plt.clf() plt.imshow(audio_mfcc, interpolation='bilinear', cmap=plt.get_cmap('viridis')) plt.pause(0.001)
class ALEEnvironment(Environment): """ A environment wrapper for the ALE environment """ def __init__(self, rom_name, visible=True): super().__init__('Arcade Learning Environment') frame_skip = 20 self._ale = ALEInterface() self._ale_sampler = ALEInterface() self._ale.setBool(b'display_screen', visible) #self._ale.setInt(b'frame_skip', frame_skip) #self._ale_sampler.setBool(b'display_screen', True) #self._ale_sampler.setInt(b'frame_skip', frame_skip) self._ale.loadROM(rom_name.encode('ascii')) self._ale_sampler.loadROM(rom_name.encode('ascii')) self._action_space = self._ale.getLegalActionSet() self._current_score = 0 def evaluate_rollout(self, solution, discount_factor=0): #temp_state = self._ale.cloneState() temp_ale = self._ale.encodeState(self._ale.cloneState()) temp_state = self._ale_sampler.decodeState(temp_ale) self._ale_sampler.restoreState(temp_state) prev_lives = self._ale.lives() total_rollout_reward = 0 discount = 1 for action in solution: rollout_reward = self._ale_sampler.act(action) if discount_factor is not None: rollout_reward *= discount discount *= discount_factor total_rollout_reward += rollout_reward if self._ale_sampler.game_over(): break score_delta = total_rollout_reward + (self._ale_sampler.lives() - prev_lives) #self._ale.restoreState(temp_state) return score_delta def perform_action(self, action): reward = self._ale.act(action) self._current_score += reward def get_current_score(self): return self._current_score def get_current_lives(self): return self._ale.lives() def get_random_action(self): return np.random.choice(self._action_space) def is_game_over(self): return self._ale.game_over()
def main(): if len(sys.argv) < 2: dir_rom = '/Users/lguan/Documents/Study/Research/Atari-2600-Roms/K-P/ms_pacman.bin' else: dir_rom = sys.argv[1] ale = ALEInterface() # Get & Set the desired settings ale.setInt(b'random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: # mac OS if sys.platform == 'darwin': pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file rom_file = str.encode(dir_rom) print('- Loading ROM - %s' % dir_rom) ale.loadROM(rom_file) print('- Complete loading ROM') (game_surface_width, game_surface_height) = ale.getScreenDims() print("game surface width/height: " + str(game_surface_width) + "/" + str(game_surface_height)) (display_width, display_height) = (800, 640) print 'display width/height', (display_width, display_height) available_action = ale.getLegalActionSet() print available_action # init pygame pygame.init() display_screen = pygame.display.set_mode((display_width, display_height)) pygame.display.set_caption( "Arcade Learning Environment Player Agent Display") # init clock clock = pygame.time.Clock() is_exit = False # Play 10 episodes for episode in range(10): if is_exit: break total_reward = 0 while not ale.game_over() and not is_exit: a = getActionFromKeyboard() # Apply an action and get the resulting reward reward = ale.act(a) total_reward += reward # clear screen display_screen.fill((0, 0, 0)) # render game surface renderGameSurface(ale, display_screen, (game_surface_width, game_surface_height)) # display related info displayRelatedInfo(display_screen, a, total_reward) pygame.display.flip() # process pygame event queue for event in pygame.event.get(): if event.type == pygame.QUIT: is_exit = True break if event.type == pygame.KEYDOWN and event.key == pygame.K_q: is_exit = True break # delay to 60fps clock.tick(60.) print('Episode %d ended with score: %d' % (episode, total_reward)) ale.reset_game()
class GameManager(object): """This class takes care of the interactions between an agent and a game across episodes, as well as overall logging of performance. """ def __init__( self, game_name, agent, results_dir, n_epochs=1, n_episodes=None, n_frames=None, remove_old_results_dir=False, use_minimal_action_set=True, min_time_between_frames=0, ): """game_name is one of the supported games (there are many), as a string: "space_invaders.bin" agent is an an instance of a subclass of the Agent interface results_dir is a string representing a directory in which results and logs are placed If it does not exist, it is created. use_minimal_action_set determines whether the agent is offered all possible actions, or only those (minimal) that are applicable to the specific game. min_time_between_frames is the minimum required time in seconds between frames. If 0, the game is unrestricted. """ self.game_name = game_name self.agent = agent self.use_minimal_action_set = use_minimal_action_set self.min_time_between_frames = min_time_between_frames self.n_epochs = n_epochs self.n_episodes = n_episodes self.n_frames = n_frames if (n_episodes is None and n_frames is None) or (n_episodes is not None and n_frames is not None): raise ValueError("Extacly one of n_episodes and n_frames " "must be defined") self.initialize_results_dir(results_dir, remove_old_results_dir) self.log = util.logging.Logger( ("settings", "step", "episode", "epoch", "overall"), "settings", os.path.join(self.results_dir, "GameManager.log"), ) self.stats = util.logging.CSVLogger( os.path.join(self.results_dir, "stats.log"), header="epoch,episode,total_reward,n_frames,wall_time", print_items=True, ) self._object_cache = dict() self.initialize_ale() self.initialize_agent() self.dump_settings() def initialize_results_dir(self, results_dir, remove_existing=False): """Creates the whole path of directories if they do no exist. If they do exist, raises an error unless remove_existing is True, in which case the existing directory is deleted. """ now = datetime.now().strftime("%Y%m%d-%H-%M") # drop .bin, append current time down to the minute results_dir = os.path.join(results_dir, self.game_name[:-4] + now) if remove_existing: if os.path.exists(results_dir): shutil.rmtree(results_dir) # Should raise an error if directory exists os.makedirs(results_dir) self.results_dir = results_dir def initialize_ale(self): self.ale = ALEInterface() self.ale.loadROM(os.path.join(ROM_RELATIVE_LOCATION, self.game_name)) def initialize_agent(self): RSC = namedtuple("RawStateCallbacks", ["raw", "grey", "rgb", "ram"]) raw_state_callbacks = RSC(self.get_screen, self.get_screen_grayscale, self.get_screen_RGB, self.get_RAM) self.agent.set_raw_state_callbacks(raw_state_callbacks) self.agent.set_results_dir(self.results_dir) if self.use_minimal_action_set: actions = self.ale.getMinimalActionSet() else: actions = self.ale.getLegalActionSet() self.agent.set_available_actions(actions) def rest(self, already_elapsed): rest_time = self.min_time_between_frames - already_elapsed if rest_time > 0: sleep(rest_time) def run(self): """Runs self.n_epochs epochs, where the agent's learning is reset for each new epoch. Each epoch lasts self.n_episodes or self.n_frames, whichever is defined. """ self.log.overall("Starting run") run_start = time() for epoch in xrange(self.n_epochs): self.agent.reset() self.n_epoch = epoch self._run_epoch() self.log.overall("End of run ({:.2f} s)".format(time() - run_start)) def _run_epoch(self): self.n_episode = 0 start = time() while not self._stop_condition_met(): self._run_episode() self.n_episode += 1 wall_time = time() - start frames = self.ale.getFrameNumber() self.log.epoch("Finished epoch after {:.2f} seconds".format(wall_time)) def _run_episode(self): self.ale.reset_game() self.agent.on_episode_start() total_reward = 0 episode_start = time() while (not self.ale.game_over()) and (not self._stop_condition_met()): timestep_start = time() action = self.agent.select_action() reward = self.ale.act(action) self.agent.receive_reward(reward) total_reward += reward self.rest(time() - timestep_start) wall_time = time() - episode_start self.agent.on_episode_end() # Stats format: CSV with epoch, episode, total_reward, n_frames, wall_time self.stats.write( self.n_epoch, self.n_episode, total_reward, self.ale.getEpisodeFrameNumber(), "{:.2f}".format(wall_time) ) def _stop_condition_met(self): if self.n_episodes: return self.n_episode >= self.n_episodes return self.ale.getFrameNumber() >= self.n_frames # Methods for state perception def get_screen(self): """Returns a matrix containing the current game screen in raw pixel data, i.e. before conversion to RGB. Handles reuse of np.array object, so it will overwrite what is in the old object""" return self._cached("raw", self.ale.getScreen) def get_screen_grayscale(self): """Returns an np.array with the screen grayscale colours. Handles reuse of np.array object, so it will overwrite what is in the old object. """ return self._cached("gray", self.ale.getScreenGrayscale) def get_screen_RGB(self): """Returns a numpy array with the screen's RGB colours. The first positions contain the red colours, followed by the green colours and then the blue colours""" return self._cached("rgb", self.ale.getScreenRGB) def get_RAM(self): """Returns a vector containing current RAM content (byte-level). Handles reuse of np.array object, so it will overwrite what is in the old object""" return self._cached("ram", self.ale.getRAM) def _cached(self, key, func): if key in self._object_cache: func(self._object_cache[key]) else: self._object_cache[key] = func() return self._object_cache[key] def dump_settings(self): import json settings = self.get_settings() path = os.path.join(self.results_dir, "settings") with open(path, "w") as f: json.dump(settings, f, indent=4) def get_settings(self): """Returns a dict representing the settings needed to reproduce this object and its subobjects """ return { "game_name": self.game_name, "n_epochs": self.n_epochs, "n_episodes": self.n_episodes, "n_frames": self.n_frames, "agent": self.agent.get_settings(), "results_dir": self.results_dir, "use_minimal_action_set": self.use_minimal_action_set, }
def forward(input, all = False): actionValues = sess.run(y, feed_dict={x: input}) if all is True: return actionValues actionValue_max= np.max(actionValues) index = np.argmax(actionValues,axis = 1) return [index, actionValue_max] ale = ALEInterface() ale.loadROM("Breakout.A26") legal_actions = ale.getLegalActionSet() img = ale.getScreen() actionIndex = forward(img) reward = ale.act(legal_actions(actionIndex)) # Get & Set the desired settings ale.setInt('random_seed', 123) ale.setInt("frame_skip",frameSkip) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = True if USE_SDL: if sys.platform == 'darwin':
class ALEEnvironment(): def __init__(self, rom_file, args): self.ale = ALEInterface() self.histLen = 4 if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', 0.0) self.ale.setBool('color_averaging', args.color_averaging) #if args.random_seed: # self.ale.setInt('random_seed', args.random_seed) self.ale.setInt( 'random_seed', 0) #hoang addition to fix the random seed across all environment self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.mode = "train" self.life_lost = False self.initSrcreen = self.getScreen() print("size of screen is:", self.initSrcreen.shape) im = Image.fromarray(self.initSrcreen) im.save('initial_screen.jpeg') im = Image.open('initial_screen.jpeg') pix = im.load() # print "devil's color", pix[13,62] # print "agent's color", pix[42,33] # exit() # draw = ImageDraw.Draw(im) # draw.rectangle([(37, 29),(48, 37)], outline = 'red') # draw.rectangle([(69, 68), (73, 71)], outline = 'white') # draw.rectangle([(7, 41), (11, 45)], outline = 'white') # draw.rectangle([(11, 58), (15, 66)], outline = 'white') # draw.rectangle([(70, 20), (73, 35)], outline='white') #right door # draw.rectangle([(11, 68), (15, 71)], outline='white') # im.save('first_subgoal_box.jpeg') # exit() # use this tool to get bounding box: http://nicodjimenez.github.io/boxLabel/annotate.html self.goalSet = [] # goal 0 self.goalSet.append([[69, 68], [ 73, 71 ]]) # Lower Right Ladder. This is the box for detecting first subgoal # self.goalSet.append([[11, 58], [15, 66]]) # lower left ladder 3 # self.goalSet.append([[11, 68], [15, 71]]) # lower left ladder 3 # goal 2 self.goalSet.append([[7, 41], [11, 45]]) # Key. This will be second sub goal self.goalSet.append([[11, 68], [15, 71]]) # lower left ladder 3 # goal 4 self.goalSet.append( [[69, 68], [73, 71]]) # Lower Right Ladder again, this will be the third subgoal # goal 6 self.goalSet.append([[70, 20], [73, 35]]) # Right Door. This will be the 4th subgoal self.goalCenterLoc = [] for goal in self.goalSet: goalCenter = [ float(goal[0][0] + goal[1][0]) / 2, float(goal[0][1] + goal[1][1]) / 2 ] self.goalCenterLoc.append(goalCenter) self.agentOriginLoc = [42, 33] self.agentLastX = 42 self.agentLastY = 33 self.devilLastX = 0 self.devilLastY = 0 self.reachedGoal = [0, 0, 0, 0, 0, 0, 0] self.histState = self.initializeHistState() def initializeHistState(self): histState = np.concatenate((self.getState(), self.getState()), axis=2) histState = np.concatenate((histState, self.getState()), axis=2) histState = np.concatenate((histState, self.getState()), axis=2) return histState def numActions(self): return len(self.actions) def resetGoalReach(self): self.reachedGoal = [0, 0, 0, 0, 0, 0, 0, 0] def restart(self): self.ale.reset_game() self.life_lost = False self.reachedGoal = [0, 0, 0, 0, 0, 0, 0] for i in range(19): self.act(0) #wait for initialization self.histState = self.initializeHistState() self.agentLastX = self.agentOriginLoc[0] self.agentLastY = self.agentOriginLoc[1] """ def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if ( self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False self.reachedGoal = [0, 0, 0] for i in range(19): self.act(0) #wait for initialization self.histState = self.initializeHistState() self.agentLastX = self.agentOriginLoc[0] self.agentLastY = self.agentOriginLoc[1] """ def beginNextLife(self): self.life_lost = False self.reachedGoal = [0, 0, 0, 0, 0, 0, 0] for i in range(19): self.act(0) #wait for initialization self.histState = self.initializeHistState() self.agentLastX = self.agentOriginLoc[0] self.agentLastY = self.agentOriginLoc[1] def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) currState = self.getState() self.histState = np.concatenate((self.histState[:, :, 1:], currState), axis=2) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def getScreenRGB(self): screen = self.ale.getScreenRGB() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) #resized = screen return resized def getAgentLoc(self, img): # img = self.getScreenRGB() man = [200, 72, 72] mask = np.zeros(np.shape(img)) mask[:, :, 0] = man[0] mask[:, :, 1] = man[1] mask[:, :, 2] = man[2] diff = img - mask indxs = np.where(diff == 0) diff[np.where(diff < 0)] = 0 diff[np.where(diff > 0)] = 0 diff[indxs] = 255 if (np.shape(indxs[0])[0] == 0): mean_x = self.agentLastX mean_y = self.agentLastY else: mean_y = np.sum(indxs[0]) / np.shape(indxs[0])[0] mean_x = np.sum(indxs[1]) / np.shape(indxs[1])[0] self.agentLastX = mean_x self.agentLastY = mean_y return (mean_x, mean_y) def getDevilLoc(self, img): # img = self.getScreenRGB() # man = [0, 16, 2] devilColor = [236, 236, 236] mask = np.zeros(np.shape(img)) mask[:, :, 0] = devilColor[0] mask[:, :, 1] = devilColor[1] mask[:, :, 2] = devilColor[2] diff = img - mask indxs = np.where(diff == 0) diff[np.where(diff < 0)] = 0 diff[np.where(diff > 0)] = 0 diff[indxs] = 255 if (np.shape(indxs[0])[0] == 0): mean_x = self.devilLastX mean_y = self.devilLastY else: mean_y = np.sum(indxs[0]) / np.shape(indxs[0])[0] mean_x = np.sum(indxs[1]) / np.shape(indxs[1])[0] self.devilLastX = mean_x self.devilLastY = mean_y return (mean_x, mean_y) def distanceReward(self, lastGoal, goal): if (lastGoal == -1): lastGoalCenter = self.agentOriginLoc else: lastGoalCenter = self.goalCenterLoc[lastGoal] goalCenter = self.goalCenterLoc[goal] agentX, agentY = self.getAgentLoc() dis = np.sqrt((goalCenter[0] - agentX) * (goalCenter[0] - agentX) + (goalCenter[1] - agentY) * (goalCenter[1] - agentY)) disLast = np.sqrt((lastGoalCenter[0] - agentX) * (lastGoalCenter[0] - agentX) + (lastGoalCenter[1] - agentY) * (lastGoalCenter[1] - agentY)) disGoals = np.sqrt((goalCenter[0] - lastGoalCenter[0]) * (goalCenter[0] - lastGoalCenter[0]) + (goalCenter[1] - lastGoalCenter[1]) * (goalCenter[1] - lastGoalCenter[1])) return 0.001 * (disLast - dis) / disGoals # add color channel for input of network def getState(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return np.reshape(resized, (84, 84, 1)) def getStackedState(self): return self.histState def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over() def isGameOver(self): return self.ale.game_over() def isLifeLost(self): return self.life_lost def reset(self): self.ale.reset_game() self.life_lost = False def goalReached(self, goal): # if goal in [0,2,4,6]: # those are original task where bounding boxes are used to detect the location of agents subset = [ 0, 2, 3, 4, 6 ] # those are original task where bounding boxes are used to detect the location of agents if goal in subset: # goal_index = goal/2 goal_index = subset.index(goal) goalPosition = self.goalSet[goal_index] goalScreen = self.initSrcreen stateScreen = self.getScreen() count = 0 for y in range(goalPosition[0][0], goalPosition[1][0]): for x in range(goalPosition[0][1], goalPosition[1][1]): if goalScreen[x][y] != stateScreen[x][y]: count = count + 1 # 30 is total number of pixels of agent if float(count) / 30 > 0.3: self.reachedGoal[goal] = 1 return True if goal == 1: # detect if agent is to the left of the devil # return self.agent_left_devil() return self.detect_left_ladder() ############## -- DML modified -- ########### # if goal == 4: # # detect if agent is to the right of the devil # # return self.agent_right_devil() # return self.detect_right_ladder() ################# -- end -- ########### if goal == 5: # detect if the agent is back to the original location return self.original_location_reached() return False def detect_right_ladder(self): goalPosition = self.goalSet[0] goalScreen = self.initSrcreen stateScreen = self.getScreen() count = 0 for y in range(goalPosition[0][0], goalPosition[1][0]): for x in range(goalPosition[0][1], goalPosition[1][1]): if goalScreen[x][y] != stateScreen[x][y]: count = count + 1 # 30 is total number of pixels of agent if float(count) / 30 > 0.3: goal = 5 self.reachedGoal[goal] = 1 return True return False def detect_left_ladder(self): goalPosition = self.goalSet[2] goalScreen = self.initSrcreen stateScreen = self.getScreen() count = 0 for y in range(goalPosition[0][0], goalPosition[1][0]): for x in range(goalPosition[0][1], goalPosition[1][1]): if goalScreen[x][y] != stateScreen[x][y]: count = count + 1 # 30 is total number of pixels of agent if float(count) / 30 > 0.3: goal = 5 self.reachedGoal[goal] = 1 return True return False def original_location_reached(self): img = self.getScreenRGB() (x, y) = self.getAgentLoc(img) # print "Agent's location:",x,y if abs(x - 42) <= 2 and abs(y - 33) <= 2: return True else: return False def pause(self): os.system('read -s -n 1 -p "Press any key to continue...\n"') def agent_left_devil(self): img = self.ale.getScreenRGB() (x, y) = self.getAgentLoc(img) (a, b) = self.getDevilLoc(img) # print "Agent's location:",x,y # print "Devil's location:", a,b if (a - x > 40) and (abs(y - b) <= 40): return True else: return False def agent_right_devil(self): img = self.getScreenRGB() (x, y) = self.getAgentLoc(img) (a, b) = self.getDevilLoc(img) # print "Agent's location:",x,y # print "Devil's location:",a,b # if (x-a > 25) and (abs(y-b) <= 40): if (x - a > 40) and (abs(y - b) <= 40): return True else: return False def goalNotReachedBefore(self, goal): if (self.reachedGoal[goal] == 1): return False return True
class aleForET: def __init__(self,rom_file, screen): self.screen = screen pygame.init() self.ale = ALEInterface() GAME_W, GAME_H = 160, 210 # Setting up the pygame screen Surface self.size = GAME_W * V.xSCALE, GAME_H * V.ySCALE # Get & Set the desired settings self.ale.setInt('random_seed', randint(0,200)) self.ale.setBool('sound', False) self.ale.setBool('display_screen', False) self.ale.setBool('color_averaging', False) self.ale.setFloat('repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(rom_file) self.gamename = os.path.basename(rom_file).split('.')[0] # Get the list of legal actions self.legal_actions = self.ale.getLegalActionSet() def run(self, gc_window_drawer_func = None, save_screen_func = None, event_handler_func = None, record_a_and_r_func = None): last_time=time.time() frame_cnt=0 clock = pygame.time.Clock() # Play 10 episodes for episode in xrange(EPISODES): total_reward = 0 while not self.ale.game_over(): clock.tick(FRAME_RATE) # control FPS frame_cnt+=1 key = pygame.key.get_pressed() if event_handler_func != None: stop, eyelink_err_code, bool_drawgc = event_handler_func(key) if stop: return eyelink_err_code # Display FPS diff_time = time.time()-last_time if diff_time > 1.0: print 'FPS: %.1f' % clock.get_fps() last_time=time.time() # Show game image cur_frame_np = self.ale.getScreenRGB() cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np) cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True, False) cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90) # Perform scaling directly on screen, leaving cur_frame_Surface unscaled. # Slightly faster than scaling cur_frame_Surface and then transfer to screen. pygame.transform.scale(cur_frame_Surface, self.size, self.screen) if gc_window_drawer_func != None and bool_drawgc: gc_window_drawer_func(self.screen) pygame.display.flip() # Save frame to disk (160*210, i.e. not scaled; because this is faster) if save_screen_func != None: save_screen_func(cur_frame_Surface, frame_cnt) # Apply an action and get the resulting reward a_index = aenum.action_map(key, self.gamename) a = self.legal_actions[a_index] reward = self.ale.act(a); total_reward += reward if record_a_and_r_func != None: record_a_and_r_func(a, reward) pygame.event.pump() # need this line to get new key pressed print 'Episode', episode, 'ended with score:', total_reward self.ale.reset_game() TRIAL_OK = 0 # copied from EyeLink's constant return TRIAL_OK def run_in_step_by_step_mode(self, gc_window_drawer_func = None, save_screen_func = None, event_handler_func = None, record_a_and_r_func = None): frame_cnt=0 bool_drawgc = False clock = pygame.time.Clock() # Play 10 episodes for episode in xrange(10): total_reward = 0 while not self.ale.game_over(): # Get game image cur_frame_np = self.ale.getScreenRGB() cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np) cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True, False) cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90) frame_cnt+=1 # Save frame to disk (160*210, i.e. not scaled; because this is faster) if save_screen_func != None: save_screen_func(cur_frame_Surface, frame_cnt) key, draw_next_game_frame = None, False while not draw_next_game_frame: clock.tick(FRAME_RATE) # control FPS key = pygame.key.get_pressed() if event_handler_func != None: stop, eyelink_err_code, bool_drawgc = event_handler_func(key) if stop: return eyelink_err_code a_index = aenum.action_map(key, self.gamename) # Not in all cases when action_map returns "NO OP" is the real action "NO OP", # Only when the human press "TAB", is the real action "NO OP". if (a_index == aenum.PLAYER_A_NOOP and key[pygame.K_TAB]) \ or a_index != aenum.PLAYER_A_NOOP: draw_next_game_frame = True # Draw the image onto screen. # Perform scaling directly on screen, leaving cur_frame_Surface unscaled. pygame.transform.scale(cur_frame_Surface, self.size, self.screen) if gc_window_drawer_func != None and bool_drawgc: gc_window_drawer_func(self.screen) pygame.display.flip() pygame.event.pump() # need this line to get new key pressed # Apply an action and get the resulting reward a = self.legal_actions[a_index] reward = self.ale.act(a); print("step reward: ", reward) total_reward += reward if record_a_and_r_func != None: record_a_and_r_func(a, reward) print 'Episode', episode, 'ended with score:', total_reward self.ale.reset_game() TRIAL_OK = 0 # copied from EyeLink's constant return TRIAL_OK
class AleAgent: ## # @param processing_cls Class for processing game visual unput def __init__(self, processing_cls, game_rom=None, encoder_model=None, encoder_weights=None, NFQ_model=None, NFQ_weights=None): assert game_rom is not None self.game = ALEInterface() if encoder_weights is not None and encoder_model is not None: self.encoder = Encoder(path_to_model=encoder_model, path_to_weights=encoder_weights) else: self.encoder = Encoder() self.processor = processing_cls() # Get & Set the desired settings self.game.setInt('random_seed', 0) self.game.setInt('frame_skip', 4) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = True if USE_SDL: if sys.platform == 'darwin': pygame.init() self.game.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.game.setBool('sound', False) # no sound self.game.setBool('display_screen', True) # Load the ROM file self.game.loadROM(game_rom) # Get the list of legal actions self.legal_actions = self.game.getLegalActionSet() # Get actions applicable in current game self.minimal_actions = self.game.getMinimalActionSet() if NFQ_model is not None and NFQ_weights is not None: self.NFQ = NFQ(self.encoder.out_dim, len(self.minimal_actions), model_path=NFQ_model, weights_path=NFQ_weights) else: self.NFQ = NFQ(self.encoder.out_dim, len(self.minimal_actions)) (self.screen_width, self.screen_height) = self.game.getScreenDims() self.screen_data = np.zeros((self.screen_height, self.screen_width), dtype=np.uint8) ## # Initialize the reinforcement learning def train(self, num_of_episodes=1500, eps=0.995, key_binding=None): pygame.init() for episode in xrange(num_of_episodes): total_reward = 0 moves = 0 hits = 0 print 'Starting episode: ', episode + 1 if key_binding: eps = 0.05 else: eps -= 2 / num_of_episodes self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) next_state = self.encoder.encode(pooled_data) while not self.game.game_over(): current_state = next_state x = None if key_binding: key_pressed = pygame.key.get_pressed() x = key_binding(key_pressed) if x is None: r = np.random.rand() if r < eps: x = np.random.randint(self.minimal_actions.size) else: x = self.NFQ.predict_action(current_state) a = self.minimal_actions[x] # Apply an action and get the resulting reward reward = self.game.act(a) # record only every 3 frames # if not moves % 3: self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) next_state = self.encoder.encode(pooled_data) transition = np.append(current_state, x) transition = np.append(transition, next_state) transition = np.append(transition, reward) self.NFQ.add_transition(transition) total_reward += reward if reward > 0: hits += 1 moves += 1 if eps > 0.1: eps -= 0.00001 # end while print 'Epsilon: ', eps print 'Episode', episode + 1, 'ended with score:', total_reward print 'Hits: ', hits self.game.reset_game() self.NFQ.train() hits = 0 moves = 0 self.NFQ.save_net() # end for ## # Play the game! def play(self): total_reward = 0 moves = 1 while not self.game.game_over(): self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) current_state = self.encoder.encode(pooled_data) x = self.NFQ.predict_action(current_state) a = self.minimal_actions[x] reward = self.game.act(a) total_reward += reward moves += 1 print 'The game ended with score:', total_reward, ' after: ', moves, ' moves'
class Emulate: def __init__(self, rom_file, display_screen=False, frame_skip=4, screen_height=84, screen_width=84, repeat_action_probability=0, color_averaging=True, random_seed=0, record_screen_path='screen_pics', record_sound_filename=None, minimal_action_set=True): self.ale = ALEInterface() if display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', frame_skip) self.ale.setFloat('repeat_action_probability', repeat_action_probability) self.ale.setBool('color_averaging', color_averaging) if random_seed: self.ale.setInt('random_seed', random_seed) self.ale.loadROM(rom_file) if minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.dims = (screen_width, screen_height) def numActions(self): return len(self.actions) def getActions(self): return self.actions def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def getScreenGray(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized, k=1) return rotated def getScreenColor(self): screen = self.ale.getScreenRGB() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized, k=1) return rotated def isTerminal(self): return self.ale.game_over()
class GameManager(object): """This class takes care of the interactions between an agent and a game across episodes, as well as overall logging of performance. """ def __init__(self, game_name, agent, results_dir, n_epochs=1, n_episodes=None, n_frames=None, remove_old_results_dir=False, use_minimal_action_set=True, min_time_between_frames=0): """game_name is one of the supported games (there are many), as a string: "space_invaders.bin" agent is an an instance of a subclass of the Agent interface results_dir is a string representing a directory in which results and logs are placed If it does not exist, it is created. use_minimal_action_set determines whether the agent is offered all possible actions, or only those (minimal) that are applicable to the specific game. min_time_between_frames is the minimum required time in seconds between frames. If 0, the game is unrestricted. """ self.game_name = game_name self.agent = agent self.use_minimal_action_set = use_minimal_action_set self.min_time_between_frames = min_time_between_frames self.n_epochs = n_epochs self.n_episodes = n_episodes self.n_frames = n_frames if ((n_episodes is None and n_frames is None) or (n_episodes is not None and n_frames is not None)): raise ValueError("Extacly one of n_episodes and n_frames " "must be defined") self.initialize_results_dir(results_dir, remove_old_results_dir) self.log = util.logging.Logger( ('settings', 'step', 'episode', 'epoch', 'overall'), 'settings', os.path.join(self.results_dir, 'GameManager.log')) self.stats = util.logging.CSVLogger( os.path.join(self.results_dir, 'stats.log'), header='epoch,episode,total_reward,n_frames,wall_time', print_items=True) self._object_cache = dict() self.initialize_ale() self.initialize_agent() self.dump_settings() def initialize_results_dir(self, results_dir, remove_existing=False): """Creates the whole path of directories if they do no exist. If they do exist, raises an error unless remove_existing is True, in which case the existing directory is deleted. """ now = datetime.now().strftime('%Y%m%d-%H-%M') # drop .bin, append current time down to the minute results_dir = os.path.join(results_dir, self.game_name[:-4] + now) if remove_existing: if os.path.exists(results_dir): shutil.rmtree(results_dir) # Should raise an error if directory exists os.makedirs(results_dir) self.results_dir = results_dir def initialize_ale(self): self.ale = ALEInterface() self.ale.loadROM(os.path.join(ROM_RELATIVE_LOCATION, self.game_name)) def initialize_agent(self): RSC = namedtuple('RawStateCallbacks', ['raw', 'grey', 'rgb', 'ram']) raw_state_callbacks = RSC(self.get_screen, self.get_screen_grayscale, self.get_screen_RGB, self.get_RAM) self.agent.set_raw_state_callbacks(raw_state_callbacks) self.agent.set_results_dir(self.results_dir) if self.use_minimal_action_set: actions = self.ale.getMinimalActionSet() else: actions = self.ale.getLegalActionSet() self.agent.set_available_actions(actions) def rest(self, already_elapsed): rest_time = self.min_time_between_frames - already_elapsed if rest_time > 0: sleep(rest_time) def run(self): """Runs self.n_epochs epochs, where the agent's learning is reset for each new epoch. Each epoch lasts self.n_episodes or self.n_frames, whichever is defined. """ self.log.overall('Starting run') run_start = time() for epoch in xrange(self.n_epochs): self.agent.reset() self.n_epoch = epoch self._run_epoch() self.log.overall('End of run ({:.2f} s)'.format(time() - run_start)) def _run_epoch(self): self.n_episode = 0 start = time() while not self._stop_condition_met(): self._run_episode() self.n_episode += 1 wall_time = (time() - start) frames = self.ale.getFrameNumber() self.log.epoch("Finished epoch after {:.2f} seconds".format(wall_time)) def _run_episode(self): self.ale.reset_game() self.agent.on_episode_start() total_reward = 0 episode_start = time() while (not self.ale.game_over()) and (not self._stop_condition_met()): timestep_start = time() action = self.agent.select_action() reward = self.ale.act(action) self.agent.receive_reward(reward) total_reward += reward self.rest(time() - timestep_start) wall_time = time() - episode_start self.agent.on_episode_end() # Stats format: CSV with epoch, episode, total_reward, n_frames, wall_time self.stats.write(self.n_epoch, self.n_episode, total_reward, self.ale.getEpisodeFrameNumber(), '{:.2f}'.format(wall_time)) def _stop_condition_met(self): if self.n_episodes: return self.n_episode >= self.n_episodes return self.ale.getFrameNumber() >= self.n_frames # Methods for state perception def get_screen(self): """Returns a matrix containing the current game screen in raw pixel data, i.e. before conversion to RGB. Handles reuse of np.array object, so it will overwrite what is in the old object""" return self._cached('raw', self.ale.getScreen) def get_screen_grayscale(self): """Returns an np.array with the screen grayscale colours. Handles reuse of np.array object, so it will overwrite what is in the old object. """ return self._cached('gray', self.ale.getScreenGrayscale) def get_screen_RGB(self): """Returns a numpy array with the screen's RGB colours. The first positions contain the red colours, followed by the green colours and then the blue colours""" return self._cached('rgb', self.ale.getScreenRGB) def get_RAM(self): """Returns a vector containing current RAM content (byte-level). Handles reuse of np.array object, so it will overwrite what is in the old object""" return self._cached('ram', self.ale.getRAM) def _cached(self, key, func): if key in self._object_cache: func(self._object_cache[key]) else: self._object_cache[key] = func() return self._object_cache[key] def dump_settings(self): import json settings = self.get_settings() path = os.path.join(self.results_dir, 'settings') with open(path, 'w') as f: json.dump(settings, f, indent=4) def get_settings(self): """Returns a dict representing the settings needed to reproduce this object and its subobjects """ return { "game_name": self.game_name, "n_epochs": self.n_epochs, "n_episodes": self.n_episodes, "n_frames": self.n_frames, "agent": self.agent.get_settings(), "results_dir": self.results_dir, "use_minimal_action_set": self.use_minimal_action_set, }
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.life_lost = False def numActions(self): return len(self.actions) def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if ( self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over()
class AleEnv(object): '''ALE wrapper for RL training game_over_conditions={'points':(-1, 1)}: dict that describes all desired game over conditions each key corresponds to a condition that is checked; the first condition met produces a game over points: int or tuple of integers int: if x < 0, game ends when score is <= x if x >= 0, game ends when score is >= x tuple: game ends if score <= x[0] or score >= x[1] lives: int that ends game when lives <= x frames: int that ends game when total number of frames >= x episodes: int that ends game when num of episodes >= x Use max_num_frames_per_episode to set max episode length ''' # will include timing and hidden functionality in future iterations def __init__(self, rom_file, display_screen=False, sound=False, random_seed=0, game_over_conditions={}, frame_skip=1, repeat_action_probability=0.25, max_num_frames_per_episode=0, min_action_set=False, screen_color='gray', fps=60, output_buffer_size=1, reduce_screen=False): # ALE instance and setup self.ale = ALEInterface() #TODO: check if rom file exists; will crash jupyter kernel otherwise self.ale.loadROM(str.encode(rom_file)) self.ale.setBool(b'sound', sound) self.ale.setBool(b'display_screen', display_screen) if min_action_set: self.legal_actions = self.ale.getMinimalActionSet() else: self.legal_actions = self.ale.getLegalActionSet() self.ale.setInt(b'random_seed', random_seed) self.ale.setInt(b'frame_skip', frame_skip) self.frame_skip = frame_skip self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) self.ale.setInt(b'max_num_frames_per_episode', max_num_frames_per_episode) self.ale.loadROM(str.encode(rom_file)) self.game_over_conditions = game_over_conditions self.screen_color = screen_color self.reduce_screen = reduce_screen self.d_frame = (fps**-1) * self.frame_skip # set up output buffer self.output_buffer_size = output_buffer_size self.queue_size = self.output_buffer_size self._reset_params() def observe(self, flatten=False, expand_dim=False): if flatten is True: out = np.stack(self.output_queue[i] for i in range(self.output_buffer_size)).flatten() if expand_dim is True: return np.expand_dims(np.expand_dims(out, axis=0), axis=1) else: return out else: out = np.stack(self.output_queue[i] for i in range(self.output_buffer_size)) out = np.squeeze(out) if expand_dim is True: return np.expand_dims(np.expand_dims(out, axis=0), axis=1) else: return out @property def width(self): return self.game_screen.shape[1] @property def height(self): return self.game_screen.shape[0] @property def game_over(self): return self._game_over() @property def actions(self): return self.legal_actions @property def lives(self): return self.ale.lives() def _reset_params(self): self.total_points = 0 self.total_frames = 0 self.curr_episode = 1 self.prev_ep_frame_num = -float("inf") if self.screen_color == 'gray' or self.screen_color == 'grey': self.game_screen = np.squeeze(self.ale.getScreenGrayscale()) if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :] elif self.screen_color == 'rgb' or self.screen_color == 'color': self.game_screen = self.ale.getScreenRGB() if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84, 3)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :] self.output_queue = deque( np.zeros(shape=(self.queue_size - 1, self.height, self.width)), self.queue_size) self.output_queue.appendleft(self.game_screen) def reset(self): self.ale.reset_game() self._reset_params() def act(self, action): reward = self.ale.act(self.legal_actions[action]) if self.screen_color == 'gray' or self.screen_color == 'grey': self.game_screen = np.squeeze(self.ale.getScreenGrayscale()) if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :] elif self.screen_color == 'rgb' or self.screen_color == 'color': self.game_screen = self.ale.getScreenRGB() if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84, 3)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :] self.output_queue.pop() self.output_queue.appendleft(self.game_screen) self.total_points += reward self.total_frames += self.frame_skip if self.ale.getEpisodeFrameNumber() <= self.prev_ep_frame_num: self.curr_episode += 1 self.prev_ep_frame_num = self.ale.getEpisodeFrameNumber() return reward, self.d_frame, self.game_over def _game_over(self): if self.ale.game_over(): return True for cond in self.game_over_conditions: if cond == 'points': if isinstance(self.game_over_conditions[cond], int): if self.total_points >= self.game_over_conditions[cond]: return True elif isinstance(self.game_over_conditions[cond], tuple): if (self.total_points <= self.game_over_conditions[cond][0] or self.total_points >= self.game_over_conditions[cond][1]): return True elif cond == 'lives': if self.lives <= self.game_over_conditions[cond]: return True elif cond == 'frames': if self.total_frames >= self.game_over_conditions[cond]: return True elif cond == 'episodes': if self.curr_episode >= self.game_over_conditions[cond]: return True else: raise RuntimeError("ERROR: Invalid game over condition") return False
class ALEEnvironment(): def __init__(self, config): self.history = History3D(config) self.history_length = config.history_length self.mode = config.mode self.life_lost = False self.terminal = False self.score = 0 #cv2.namedWindow("Image") from ale_python_interface import ALEInterface self.ale = ALEInterface() if config.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', False) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', config.frame_skip) # Whether skip frames or not self.ale.setBool('color_averaging', config.color_averaging) if config.random_seed: # Random seed for repeatable experiments. self.ale.setInt('random_seed', config.random_seed) if config.record_screen_path: if not os.path.exists(config.record_screen_path): os.makedirs(config.record_screen_path) self.ale.setString('record_screen_dir', config.record_screen_path) if config.record_sound_filename: self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', config.record_sound_filename) self.ale.loadROM(config.rom_file) if config.minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.screen_width = config.screen_width self.screen_height = config.screen_height def numActions(self): return len(self.actions) def new_game(self): state, terminal = self.reset() for _ in range(self.history_length + 1): self.history.add(state) return state, terminal, list(range(len(self.actions))) def reset(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or self.ale.game_over()): # `reset` called in a middle of episode # all lives are lost self.ale.reset_game() self.life_lost = False return self.getScreen(), self.isTerminal() def step(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) self.score += reward self.current_state = self.getScreen() self.history.add(self.current_state) self.terminal = self.isTerminal() return reward, self.history.get(), self.terminal def getScreen(self): screen = self.ale.getScreenGrayscale() #print 'screen:\n',type(screen) #print 'screen.shape',screen.shape resized = cv2.resize(screen / 255., (self.screen_width, self.screen_height)) #cv2.imshow("Image", screen) ''' cv2.namedWindow("Image") cv2.destroyAllWindows() ''' return resized def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over()
# doc/examples/sharedLibraryInterfaceExample.cpp import sys from ale_python_interface import ALEInterface import numpy as np if (len(sys.argv) < 2): print("Usage ./ale_python_test1.py <ROM_FILE_NAME>") sys.exit() ale = ALEInterface() max_frames_per_episode = ale.getInt("max_num_frames_per_episode") ale.set("random_seed", 123) random_seed = ale.getInt("random_seed") print("random_seed: " + str(random_seed)) ale.loadROM(sys.argv[1]) legal_actions = ale.getLegalActionSet() for episode in range(10): total_reward = 0.0 while not ale.game_over(): a = legal_actions[np.random.randint(legal_actions.size)] reward = ale.act(a) total_reward += reward print("Episode " + str(episode) + " ended with score: " + str(total_reward)) ale.reset_game()
class ArcadeLearningEnvironment(Environment): """ [Arcade Learning Environment](https://github.com/mgbellemare/Arcade-Learning-Environment) adapter (specification key: `ale`, `arcade_learning_environment`). May require: ```bash sudo apt-get install libsdl1.2-dev libsdl-gfx1.2-dev libsdl-image1.2-dev cmake git clone https://github.com/mgbellemare/Arcade-Learning-Environment.git cd Arcade-Learning-Environment mkdir build && cd build cmake -DUSE_SDL=ON -DUSE_RLGLUE=OFF -DBUILD_EXAMPLES=ON .. make -j 4 cd .. pip3 install . ``` Args: level (string): ALE rom file (<span style="color:#C00000"><b>required</b></span>). loss_of_life_termination: Signals a terminal state on loss of life (<span style="color:#00C000"><b>default</b></span>: false). loss_of_life_reward (float): Reward/Penalty on loss of life (negative values are a penalty) (<span style="color:#00C000"><b>default</b></span>: 0.0). repeat_action_probability (float): Repeats last action with given probability (<span style="color:#00C000"><b>default</b></span>: 0.0). visualize (bool): Whether to visualize interaction (<span style="color:#00C000"><b>default</b></span>: false). frame_skip (int > 0): Number of times to repeat an action without observing (<span style="color:#00C000"><b>default</b></span>: 1). seed (int): Random seed (<span style="color:#00C000"><b>default</b></span>: none). """ def __init__( self, level, life_loss_terminal=False, life_loss_punishment=0.0, repeat_action_probability=0.0, visualize=False, frame_skip=1, seed=None ): from ale_python_interface import ALEInterface self.environment = ALEInterface() self.rom_file = level self.life_loss_terminal = life_loss_terminal self.life_loss_punishment = life_loss_punishment self.environment.setFloat(b'repeat_action_probability', repeat_action_probability) self.environment.setBool(b'display_screen', visualize) self.environment.setInt(b'frame_skip', frame_skip) if seed is not None: self.environment.setInt(b'random_seed', seed) # All set commands must be done before loading the ROM. self.environment.loadROM(rom_file=self.rom_file.encode()) self.available_actions = tuple(self.environment.getLegalActionSet()) # Full list of actions: # No-Op, Fire, Up, Right, Left, Down, Up Right, Up Left, Down Right, Down Left, Up Fire, # Right Fire, Left Fire, Down Fire, Up Right Fire, Up Left Fire, Down Right Fire, Down Left # Fire def __str__(self): return super().__str__() + '({})'.format(self.rom_file) def states(self): width, height = self.environment.getScreenDims() return dict(type='float', shape=(height, width, 3)) def actions(self): return dict(type='int', num_values=len(self.available_actions)) def close(self): self.environment.__del__() self.environment = None def get_states(self): screen = np.copy(self.environment.getScreenRGB(screen_data=self.screen)) screen = screen.astype(dtype=np.float32) / 255.0 return screen def reset(self): self.environment.reset_game() width, height = self.environment.getScreenDims() self.screen = np.empty((height, width, 3), dtype=np.uint8) self.lives = self.environment.lives() return self.get_states() def execute(self, actions): reward = self.environment.act(action=self.available_actions[actions]) terminal = self.environment.game_over() states = self.get_states() next_lives = self.environment.lives() if next_lives < self.lives: if self.life_loss_terminal: terminal = True elif self.life_loss_punishment > 0.0: reward -= self.life_loss_punishment self.lives = next_lives return states, terminal, reward
def main(): pygame.init() ale = ALEInterface() ale.setInt(b'random_seed', 123) ale.setBool(b'display_screen', True) ale.setInt(b'frame_skip', 4) # ale.setFloat(b'repeat_action_probability', .7) # ale.setBool(b'color_averaging', True) game = 'breakout' #ACKTR tasks#, 'space_invaders', 'seaquest', 'qbert', 'pong', 'beam_rider', 'breakout' rom = home + '/Documents/ALE/roms/supported/' + game + '.bin' ale.loadROM(str.encode(rom)) legal_actions = ale.getLegalActionSet() rewards, num_episodes = [], 5 config = [] agent = DQN_agent(config) for episode in range(num_episodes): total_reward = 0 exp_state = [] exp_action = 0 exp_reward = 0 exp_next_state = [] while not ale.game_over(): #Save frame frame = ale.getScreenGrayscale() frame = cv2.resize(frame, (84, 84)) exp_next_state.append(frame) #Make action action = random.choice(legal_actions) reward = ale.act(action) total_reward += reward exp_reward += exp_reward #Make experience if len(exp_next_state) == 4: state_ready = np.reshape(np.stack(exp_next_state), [4 * 84, 84]) # cv2.imshow('image',state_ready) # cv2.waitKey(0) exp_action = action if len(exp_state) == 0: exp_state = exp_next_state else: experience = [ exp_state, exp_action, exp_reward, exp_next_state ] exp_reward = 0 exp_state = exp_next_state exp_next_state = [] print('Episode %d reward %d.' % (episode, total_reward)) rewards.append(total_reward) ale.reset_game() average = sum(rewards) / len(rewards) print('Average for %d episodes: %d' % (num_episodes, average))
class AtariEnvironment: num_actions = 18 # Use full action set def __init__(self, frame_shape, frame_postprocess=lambda x: x): self.ale = ALEInterface() self.ale.setBool(b"display_screen", cfg.display_screen) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) self.ale.setInt(b"random_seed", cfg.random_seed) self.ale.setFloat(b"repeat_action_probability", cfg.sticky_prob) self.ale.loadROM(str.encode(cfg.rom)) self.ale.setMode(cfg.mode) self.ale.setDifficulty(cfg.difficulty) self.action_set = self.ale.getLegalActionSet() assert len(self.action_set) == AtariEnvironment.num_actions screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,) self._frame_buffer = CircularBuffer( cfg.frame_buffer_size, screen_dims, np.uint8 ) self._frame_stack = CircularBuffer( cfg.frame_history_size, frame_shape, np.uint8 ) self._frame_postprocess = frame_postprocess self._episode_count = 0 self.reset(inc_episode_count=False) def _is_terminal(self): return self.ale.game_over() def _get_single_frame(self): stacked_frames = np.concatenate(self._frame_buffer, axis=2) maxed_frame = np.amax(stacked_frames, axis=2) expanded_frame = np.expand_dims(maxed_frame, 3) frame = self._frame_postprocess(expanded_frame) return frame def reset(self, inc_episode_count=True): self._episode_frames = 0 self._episode_reward = 0 if inc_episode_count: self._episode_count += 1 self.ale.reset_game() for _ in range(cfg.frame_buffer_size): self._frame_buffer.append(self.ale.getScreenGrayscale()) for _ in range(cfg.frame_history_size): self._frame_stack.append(self._get_single_frame()) def act(self, action): assert not self._is_terminal() cum_reward = 0 for _ in range(cfg.frame_skip): cum_reward += self.ale.act(self.action_set[action]) self._frame_buffer.append(self.ale.getScreenGrayscale()) self._frame_stack.append(self._get_single_frame()) self._episode_frames += cfg.frame_skip self._episode_reward += cum_reward cum_reward = np.clip(cum_reward, -1, 1) return cum_reward, self.state, self._is_terminal() @property def state(self): assert len(self._frame_buffer) == cfg.frame_buffer_size assert len(self._frame_stack) == cfg.frame_history_size return np.concatenate(self._frame_stack, axis=-1) @property def episode_reward(self): return self._episode_reward @property def episode_frames(self): return self._episode_frames @property def episode_steps(self): return self._episode_frames // cfg.frame_skip @property def episode_count(self): return self._episode_count
class AtariEnvironment: def __init__(self, seed=1, record=False): self.ale = ALEInterface() self.ale.setBool(b'display_screen', FLAGS.display_screen or record) self.ale.setInt(b'frame_skip', 1) self.ale.setBool(b'color_averaging', False) self.ale.setInt(b'random_seed', seed) self.ale.setFloat(b'repeat_action_probability', FLAGS.sticky_prob) self.ale.setInt(b'max_num_frames_per_episode', FLAGS.max_num_frames_per_episode) if record: if not tf.gfile.Exists(FLAGS.record_dir): tf.gfile.MakeDirs(FLAGS.record_dir) self.ale.setBool(b'sound', True) self.ale.setString(b'record_screen_dir', str.encode(FLAGS.record_dir)) self.ale.setString(b'record_sound_filename', str.encode(FLAGS.record_dir + '/sound.wav')) self.ale.setInt(b'fragsize', 64) self.ale.loadROM(str.encode(FLAGS.rom)) self.ale.setMode(FLAGS.mode) self.ale.setDifficulty(FLAGS.difficulty) self.action_set = self.ale.getLegalActionSet() screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,) self._frame_buffer = CircularBuffer(FLAGS.frame_buffer_size, screen_dims, np.uint8) self.reset() def _is_terminal(self): return self.ale.game_over() def _get_single_frame(self): stacked_frames = np.concatenate(self._frame_buffer, axis=2) maxed_frame = np.amax(stacked_frames, axis=2) expanded_frame = np.expand_dims(maxed_frame, 3) return expanded_frame def reset(self): self._episode_frames = 0 self._episode_reward = 0 self.ale.reset_game() for _ in range(FLAGS.frame_buffer_size): self._frame_buffer.append(self.ale.getScreenGrayscale()) def act(self, action): assert not self._is_terminal() cum_reward = 0 for _ in range(FLAGS.frame_skip): cum_reward += self.ale.act(self.action_set[action]) self._frame_buffer.append(self.ale.getScreenGrayscale()) self._episode_frames += FLAGS.frame_skip self._episode_reward += cum_reward cum_reward = np.clip(cum_reward, -1, 1) return cum_reward, self._get_single_frame(), self._is_terminal() def state(self): assert len(self._frame_buffer) == FLAGS.frame_buffer_size return self._get_single_frame() def num_actions(self): return len(self.action_set) def episode_reward(self): return self._episode_reward def episode_frames(self): return self._episode_frames def frame_skip(self): return FLAGS.frame_skip
class AleAgent: ## # @param processing_cls Class for processing game visual unput def __init__(self, processing_cls, game_rom=None, encoder_model=None, encoder_weights=None, NFQ_model=None, NFQ_weights=None): assert game_rom is not None self.game = ALEInterface() if encoder_weights is not None and encoder_model is not None: self.encoder = Encoder(path_to_model=encoder_model, path_to_weights=encoder_weights) else: self.encoder = Encoder() self.processor = processing_cls() # Get & Set the desired settings self.game.setInt('random_seed', 0) self.game.setInt('frame_skip', 4) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = True if USE_SDL: if sys.platform == 'darwin': pygame.init() self.game.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.game.setBool('sound', False) # no sound self.game.setBool('display_screen', True) # Load the ROM file self.game.loadROM(game_rom) # Get the list of legal actions self.legal_actions = self.game.getLegalActionSet() # Get actions applicable in current game self.minimal_actions = self.game.getMinimalActionSet() if NFQ_model is not None and NFQ_weights is not None: self.NFQ = NFQ( self.encoder.out_dim, len(self.minimal_actions), model_path=NFQ_model, weights_path=NFQ_weights ) else: self.NFQ = NFQ(self.encoder.out_dim, len(self.minimal_actions)) (self.screen_width, self.screen_height) = self.game.getScreenDims() self.screen_data = np.zeros( (self.screen_height, self.screen_width), dtype=np.uint8 ) ## # Initialize the reinforcement learning def train(self, num_of_episodes=1500, eps=0.995, key_binding=None): pygame.init() for episode in xrange(num_of_episodes): total_reward = 0 moves = 0 hits = 0 print 'Starting episode: ', episode+1 if key_binding: eps = 0.05 else: eps -= 2/num_of_episodes self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) next_state = self.encoder.encode(pooled_data) while not self.game.game_over(): current_state = next_state x = None if key_binding: key_pressed = pygame.key.get_pressed() x = key_binding(key_pressed) if x is None: r = np.random.rand() if r < eps: x = np.random.randint(self.minimal_actions.size) else: x = self.NFQ.predict_action(current_state) a = self.minimal_actions[x] # Apply an action and get the resulting reward reward = self.game.act(a) # record only every 3 frames # if not moves % 3: self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) next_state = self.encoder.encode(pooled_data) transition = np.append(current_state, x) transition = np.append(transition, next_state) transition = np.append(transition, reward) self.NFQ.add_transition(transition) total_reward += reward if reward > 0: hits += 1 moves += 1 if eps > 0.1: eps -= 0.00001 # end while print 'Epsilon: ', eps print 'Episode', episode+1, 'ended with score:', total_reward print 'Hits: ', hits self.game.reset_game() self.NFQ.train() hits = 0 moves = 0 self.NFQ.save_net() # end for ## # Play the game! def play(self): total_reward = 0 moves = 1 while not self.game.game_over(): self.game.getScreenGrayscale(self.screen_data) pooled_data = self.processor.process(self.screen_data) current_state = self.encoder.encode(pooled_data) x = self.NFQ.predict_action(current_state) a = self.minimal_actions[x] reward = self.game.act(a) total_reward += reward moves += 1 print 'The game ended with score:', total_reward, ' after: ', moves, ' moves'
class KungFuMaster(object): def __init__( self, rom='/home/josema/AI/ALE/Arcade-Learning-Environment/Roms/kung_fu_master.bin', trainsessionname='test'): self.agent = None self.isAuto = True self.gui_visible = False self.userquit = False self.optimalPolicyUser = False # optimal policy set by user self.trainsessionname = trainsessionname self.elapsedtime = 0 # elapsed time for this experiment self.keys = 0 # Configuration self.pause = False # game is paused self.debug = False self.sleeptime = 0.0 self.command = 0 self.iteration = 0 self.cumreward = 0 self.cumreward100 = 0 # cum reward for statistics self.cumscore100 = 0 self.ngoalreached = 0 self.max_level = 1 self.hiscore = 0 self.hireward = -1000000 self.resfile = open("data/" + self.trainsessionname + ".dat", "a+") self.legal_actions = 0 self.rom = rom self.key_status = [] def init(self, agent): # init after creation (uses args set from cli) self.ale = ALEInterface() self.ale.setInt('random_seed', 123) ram_size = self.ale.getRAMSize() self.ram = np.zeros((ram_size), dtype=np.uint8) if (self.gui_visible): os.environ['SDL_VIDEO_CENTERED'] = '1' if sys.platform == 'darwin': pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): pygame.init() self.ale.setBool('sound', True) self.ale.setBool('display_screen', False) self.ale.loadROM(self.rom) self.legal_actions = self.ale.getLegalActionSet() if (self.gui_visible): (self.screen_width, self.screen_height) = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) (display_width, display_height) = (1024, 420) self.screen = pygame.display.set_mode( (display_width, display_height)) pygame.display.set_caption( "Reinforcement Learning - Sapienza - Jose M Salas") self.numpy_surface = np.zeros( (self.screen_height, self.screen_width, 3), dtype=np.uint8) self.game_surface = pygame.Surface( (self.screen_width, self.screen_height)) pygame.display.flip() #init clock self.clock = pygame.time.Clock() self.agent = agent self.nactions = len( self.legal_actions ) # 0: not moving, 1: left, 2: right, 3: up, 4: down for i in range(self.nactions): self.key_status.append(False) print(self.nactions) # ns = 89999 # Number of statuses if we use enemy type ram info without level number #FINAL ns = 489999 # Number of statuses if we use enemy type ram info ns = 4899999 # Number of statuses if we use enemy type ram info # ns = 48999 print('Number of states: %d' % ns) self.agent.init(ns, self.nactions) # 1 for RA not used here def initScreen(self): if (self.gui_visible): if sys.platform == 'darwin': pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): pygame.init() self.ale.setBool('sound', True) self.ale.setBool('display_screen', False) if (self.gui_visible): (self.screen_width, self.screen_height) = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) (display_width, display_height) = (1024, 420) self.screen = pygame.display.set_mode( (display_width, display_height)) pygame.display.set_caption( "Reinforcement Learning - Sapienza - Jose M Salas") self.numpy_surface = np.zeros( (self.screen_height, self.screen_width, 3), dtype=np.uint8) self.game_surface = pygame.Surface( (self.screen_width, self.screen_height)) pygame.display.flip() #init clock self.clock = pygame.time.Clock() def reset(self): self.pos_x = 0 self.pos_y = 0 # Kung fu master observations self.enemy_pos = 0 self.n_enemies = 0 self.my_pos = 0 self.danger_pos = 0 self.danger_type = 0 self.enemy_type = 0 # 0, 1, 2, 3, 80, 81, 82, 40 self.blocked = 0 self.prev_blocked = 0 self.hold_hit = 0 self.time_left1 = 0 self.time_left2 = 0 self.my_energy = 39 self.previous_my_energy = 39 self.lifes = 3 self.previous_lifes = 3 self.got_hit = 0 self.got_blocked = 0 self.got_unblocked = 0 self.still_blocked = False self.starting_pos = 0 self.level = 1 self.score = 0 self.cumreward = 0 self.cumscore = 0 self.action_reward = 0 self.current_reward = 0 # accumulate reward over all events happened during this action until next different state self.prev_state = None # previous state self.firstAction = True # first action of the episode self.finished = False # episode finished self.newstate = True # new state reached self.numactions = 0 # number of actions in this episode self.iteration += 1 self.agent.optimal = self.optimalPolicyUser or ( self.iteration % 100 ) == 0 # False #(random.random() < 0.5) # choose greedy action selection for the entire episode def pair_function(self): # Combine the number of enemies, player blocked and danger type information into 7 different states if self.n_enemies > 0: self.danger_type = 0 # print (str(self.n_enemies) + " - " + str(self.danger_type) + ' - ' + str(self.blocked)) pair = (int)( (0.5 * (self.n_enemies + self.danger_type) * (self.n_enemies + self.danger_type + 1) + self.danger_type + 1) * (1 - (self.blocked / 128))) if pair > 8: return 5 #game not started yet else: return pair def enemy_type_s(self): if self.enemy_type > 127: return (self.enemy_type - 128 + 4) elif self.enemy_type == 64: return 8 else: return self.enemy_type def getstate(self): # print ('enemy type: ' + str(self.enemy_type_s()) + 'level: ' + str(self.level -1) ) x = (int)((self.level - 1) * 1000000 + self.pair_function() * 100000 + (self.enemy_type_s() * 10000) + np.rint(self.my_pos / 32) * 1000 + np.rint(self.enemy_pos / 32) * 100 + np.rint(self.danger_pos / 32) * 10 + np.rint(self.hold_hit / 16)) #3FINAL x = (int)((self.enemy_type_s()*1000) + (self.level-1)*100000 + self.pair_function()*10000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16)) #2NO LEVEL x = (int)((self.enemy_type_s()*1000) + self.pair_function()*10000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16)) #1NO ENEMY TYPE x = (int)((self.level-1)*10000 + self.pair_function()*1000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16)) return x def goal_reached(self): #return (self.my_energy>0 and self.time_left1==0 and self.time_left2<5) #and self.my_energy==39) return (self.level == 5) def update(self, a): self.command = a # Update RAM self.ale.getRAM(self.ram) # Get info from RAM self.enemy_pos = self.ram[72] self.n_enemies = self.ram[91] self.danger_pos = self.ram[73] self.my_pos = self.ram[74] self.hold_hit = self.ram[77] self.enemy_type = self.ram[54] if self.level < self.ram[31]: self.starting_pos = self.ram[74] self.level = self.ram[31] self.max_level = max(self.level, self.max_level) # Danger/Enemy position: # 49 = no danger # 50 = danger approaching from left # 208 = danger approaching from right # ram[96] = 6, danger comes from top # ram[96] = 29, danger comes from bottom # ram[96] = 188, none if self.ram[96] == 6: self.danger_type = 0 elif self.ram[96] == 29: self.danger_type = 1 else: self.danger_type = 2 self.time_left1 = self.ram[27] self.time_left2 = self.ram[28] self.previous_my_energy = self.my_energy self.my_energy = self.ram[75] if self.my_energy < self.previous_my_energy and not self.still_blocked and self.ram[ 34] == 0: self.got_hit = STATES['GotHit'] else: self.got_hit = 0 self.previous_lifes = self.lifes self.lifes = self.ram[29] self.prev_blocked = self.blocked self.blocked = self.ram[61] if self.blocked > self.prev_blocked and not self.still_blocked: self.got_blocked = STATES['GotBlocked'] self.still_blocked = True self.got_unblocked = 0 elif self.blocked < self.prev_blocked and self.still_blocked: self.got_unblocked = STATES['GotUnblocked'] self.still_blocked = False self.got_blocked = 0 else: self.got_blocked = 0 self.got_unblocked = 0 # print ('enemy_pos=' +str(self.enemy_pos) + ' - danger_pos=' + str(self.danger_pos) + ' - my_position=' # + str(self.my_pos) + ' - my_energy=' + str(self.my_energy) + ' - blocked=' + str(self.blocked) + ' - danger_type=' + str(self.danger_type)) self.prev_state = self.getstate() # remember previous state # print " == Update start ",self.prev_state," action",self.command self.current_reward = 0 # accumulate reward over all events happened during this action until next different state #print('self.current_reward = 0') self.numactions += 1 # total number of actions axecuted in this episode # while (self.prev_state == self.getstate()): if (self.firstAction): self.starting_pos = self.ram[74] self.firstAction = False self.current_reward = self.ale.act(a) else: self.current_reward = self.ale.act(a) if self.ram[34] == 0: #only when playing if (a == 3 and self.starting_pos < self.my_pos) or ( a == 4 and self.starting_pos > self.my_pos): self.action_reward = STATES['MoveFW'] elif (a == 3 and self.starting_pos > self.my_pos) or ( a == 4 and self.starting_pos < self.my_pos): self.action_reward = STATES['MoveBW'] else: self.action_reward = STATES['NotMoving'] self.score += self.current_reward self.current_reward += self.action_reward # print('score= ' + str(self.score) + ' current reward=' +str(np.rint(self.current_reward))+ ' - energy=' + str(self.my_energy/39.0) + # ' - got_hot='+ str(self.got_hit) + ' - got_blocked=' + str(self.got_blocked) + ' - got_unblocked=' + str(self.got_unblocked)) # check if episode terminated #self.draw_screen if self.goal_reached(): self.current_reward += STATES['Alive'] self.ngoalreached += 1 #self.ale.reset_game() self.finished = True if (self.ale.game_over()): self.current_reward += STATES['Dead'] if self.level > 1: print('game over in level ' + str(self.level)) if self.my_energy > 0 and self.lifes == 3: print('Game over alive????') self.ale.reset_game() self.finished = True if self.level > 2: if self.gui_visible == False: self.gui_visible = True self.initScreen() #print " ** Update end ",self.getstate(), " prev ",self.prev_state def input(self): self.isPressed = False if self.gui_visible: for event in pygame.event.get(): if event.type == pygame.QUIT: return False if event.type == pygame.KEYDOWN: if event.key == pygame.K_SPACE: self.pause = not self.pause print "Game paused: ", self.pause elif event.key == pygame.K_a: self.isAuto = not self.isAuto self.sleeptime = int(self.isAuto) * 0.07 elif event.key == pygame.K_s: self.sleeptime = 1.0 self.agent.debug = False elif event.key == pygame.K_d: self.sleeptime = 0.07 self.agent.debug = False elif event.key == pygame.K_f: self.sleeptime = 0.005 self.agent.debug = False elif event.key == pygame.K_g: self.sleeptime = 0.0 self.agent.debug = False elif event.key == pygame.K_o: self.optimalPolicyUser = not self.optimalPolicyUser print "Best policy: ", self.optimalPolicyUser elif event.key == pygame.K_q: self.userquit = True print "User quit !!!" else: pressed = pygame.key.get_pressed() self.keys = 0 self.keys |= pressed[pygame.K_UP] self.keys |= pressed[pygame.K_DOWN] << 1 self.keys |= pressed[pygame.K_LEFT] << 2 self.keys |= pressed[pygame.K_RIGHT] << 3 self.keys |= pressed[pygame.K_z] << 4 self.command = key_action_tform_table[self.keys] self.key_status[self.command] = True if event.type == pygame.KEYUP: pressed = pygame.key.get_pressed() self.keys = 0 self.keys |= pressed[pygame.K_UP] self.keys |= pressed[pygame.K_DOWN] << 1 self.keys |= pressed[pygame.K_LEFT] << 2 self.keys |= pressed[pygame.K_RIGHT] << 3 self.keys |= pressed[pygame.K_z] << 4 self.command = key_action_tform_table[self.keys] self.key_status[self.command] = False if not (True in self.key_status): self.command = 0 return True def getUserAction(self): return self.command def getreward(self): r = np.rint( self.current_reward ) + self.got_hit + self.got_blocked + self.got_unblocked - np.rint( self.blocked / 128) self.cumreward += r return r def print_report(self, printall=False): toprint = printall ch = ' ' if (self.agent.optimal): ch = '*' toprint = True s = 'Iter %6d, sc: %3d, l: %d, na: %4d, r: %5d %c' % ( self.iteration, self.score, self.level, self.numactions, self.cumreward, ch) if self.score > self.hiscore: self.hiscore = self.score s += ' HISCORE ' toprint = True if self.cumreward > self.hireward: self.hireward = self.cumreward s += ' HIREWARD ' toprint = True if (toprint): print(s) self.cumreward100 += self.cumreward self.cumscore100 += self.score numiter = 100 if (self.iteration % numiter == 0): #self.doSave() pgoal = float(self.ngoalreached * 100) / numiter print( '----------------------------------------------------------------------------------------------------------------------' ) print( "%s %6d avg last 100: reward %d | score %.2f | level %d | p goals %.1f %%" % (self.trainsessionname, self.iteration, self.cumreward100 / 100, float(self.cumscore100) / 100, self.max_level, pgoal)) print( '----------------------------------------------------------------------------------------------------------------------' ) self.cumreward100 = 0 self.cumscore100 = 0 self.ngoalreached = 0 sys.stdout.flush() self.resfile.write( "%d,%d,%d,%d\n" % (self.score, self.cumreward, self.goal_reached(), self.numactions)) self.resfile.flush() def draw(self): if self.gui_visible: self.screen.fill((0, 0, 0)) self.ale.getScreenRGB(self.numpy_surface) pygame.surfarray.blit_array( self.game_surface, np.transpose(self.numpy_surface, (1, 0, 2))) # pygame.pixelcopy.array_to_surface(self.game_surface, np.transpose(self.numpy_surface,(1,0,2))) self.screen.blit( pygame.transform.scale2x( pygame.transform.scale( self.game_surface, (self.screen_height, self.screen_height))), (0, 0)) #Display ram bytes font = pygame.font.SysFont("Ubuntu Mono", 32) text = font.render("RAM: ", 1, (255, 208, 208)) self.screen.blit(text, (430, 10)) font = pygame.font.SysFont("Ubuntu Mono", 25) height = font.get_height() * 1.2 line_pos = 40 ram_pos = 0 while (ram_pos < 128): ram_string = ''.join([ "%02X " % self.ram[x] for x in range(ram_pos, min(ram_pos + 16, 128)) ]) text = font.render(ram_string, 1, (255, 255, 255)) self.screen.blit(text, (440, line_pos)) line_pos += height ram_pos += 16 #display current action font = pygame.font.SysFont("Ubuntu Mono", 32) text = font.render("Current Action: " + str(self.command), 1, (208, 208, 255)) height = font.get_height() * 1.2 self.screen.blit(text, (430, line_pos)) line_pos += height #display reward font = pygame.font.SysFont("Ubuntu Mono", 30) text = font.render("Total Reward: " + str(self.cumreward), 1, (208, 255, 255)) self.screen.blit(text, (430, line_pos)) pygame.display.flip() # clock.tick(60.) else: return 0 def quit(self): self.resfile.close() pygame.quit()