class emulator: def __init__(self, rom_name, vis): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_mum_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i print self.legal_actions self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: "+ str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): # numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) # self.ale.getScreenRGB(numpy_surface) # image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) image = self.ale.getScreenRGB() image = np.reshape(image, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image(), 0, False def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() if self.vis: cv2.imshow('preview', nextstate) return nextstate, reward, self.ale.game_over() def train(self): for episode in range(10): total_reward = 0 frame_number = 0 while not self.ale.game_over(): a = self.legal_actions[random.randrange(len(self.legal_actions))] # Apply an action and get the resulting reward reward = self.ale.act(a); total_reward += reward screen = self.ale.getScreenRGB() screen = np.array(screen).reshape([self.screen_height, self.screen_width, -1]) frame_number = self.ale.getEpisodeFrameNumber() cv2.imshow("screen", screen/255.0) cv2.waitKey(0) self.ale.saveScreenPNG("test_"+str(frame_number)+".png") print('Episode %d ended with score: %d' % (episode, total_reward)) print('Frame number is : ', frame_number) self.ale.reset_game()
class AleEnv(object): '''ALE wrapper for RL training game_over_conditions={'points':(-1, 1)}: dict that describes all desired game over conditions each key corresponds to a condition that is checked; the first condition met produces a game over points: int or tuple of integers int: if x < 0, game ends when score is <= x if x >= 0, game ends when score is >= x tuple: game ends if score <= x[0] or score >= x[1] lives: int that ends game when lives <= x frames: int that ends game when total number of frames >= x episodes: int that ends game when num of episodes >= x Use max_num_frames_per_episode to set max episode length ''' # will include timing and hidden functionality in future iterations def __init__(self, rom_file, display_screen=False, sound=False, random_seed=0, game_over_conditions={}, frame_skip=1, repeat_action_probability=0.25, max_num_frames_per_episode=0, min_action_set=False, screen_color='gray', fps=60, output_buffer_size=1, reduce_screen=False): # ALE instance and setup self.ale = ALEInterface() #TODO: check if rom file exists; will crash jupyter kernel otherwise self.ale.loadROM(str.encode(rom_file)) self.ale.setBool(b'sound', sound) self.ale.setBool(b'display_screen', display_screen) if min_action_set: self.legal_actions = self.ale.getMinimalActionSet() else: self.legal_actions = self.ale.getLegalActionSet() self.ale.setInt(b'random_seed', random_seed) self.ale.setInt(b'frame_skip', frame_skip) self.frame_skip = frame_skip self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) self.ale.setInt(b'max_num_frames_per_episode', max_num_frames_per_episode) self.ale.loadROM(str.encode(rom_file)) self.game_over_conditions = game_over_conditions self.screen_color = screen_color self.reduce_screen = reduce_screen self.d_frame = (fps**-1) * self.frame_skip # set up output buffer self.output_buffer_size = output_buffer_size self.queue_size = self.output_buffer_size self._reset_params() def observe(self, flatten=False, expand_dim=False): if flatten is True: out = np.stack(self.output_queue[i] for i in range(self.output_buffer_size)).flatten() if expand_dim is True: return np.expand_dims(np.expand_dims(out, axis=0), axis=1) else: return out else: out = np.stack(self.output_queue[i] for i in range(self.output_buffer_size)) out = np.squeeze(out) if expand_dim is True: return np.expand_dims(np.expand_dims(out, axis=0), axis=1) else: return out @property def width(self): return self.game_screen.shape[1] @property def height(self): return self.game_screen.shape[0] @property def game_over(self): return self._game_over() @property def actions(self): return self.legal_actions @property def lives(self): return self.ale.lives() def _reset_params(self): self.total_points = 0 self.total_frames = 0 self.curr_episode = 1 self.prev_ep_frame_num = -float("inf") if self.screen_color == 'gray' or self.screen_color == 'grey': self.game_screen = np.squeeze(self.ale.getScreenGrayscale()) if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :] elif self.screen_color == 'rgb' or self.screen_color == 'color': self.game_screen = self.ale.getScreenRGB() if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84, 3)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :] self.output_queue = deque( np.zeros(shape=(self.queue_size - 1, self.height, self.width)), self.queue_size) self.output_queue.appendleft(self.game_screen) def reset(self): self.ale.reset_game() self._reset_params() def act(self, action): reward = self.ale.act(self.legal_actions[action]) if self.screen_color == 'gray' or self.screen_color == 'grey': self.game_screen = np.squeeze(self.ale.getScreenGrayscale()) if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :] elif self.screen_color == 'rgb' or self.screen_color == 'color': self.game_screen = self.ale.getScreenRGB() if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84, 3)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :] self.output_queue.pop() self.output_queue.appendleft(self.game_screen) self.total_points += reward self.total_frames += self.frame_skip if self.ale.getEpisodeFrameNumber() <= self.prev_ep_frame_num: self.curr_episode += 1 self.prev_ep_frame_num = self.ale.getEpisodeFrameNumber() return reward, self.d_frame, self.game_over def _game_over(self): if self.ale.game_over(): return True for cond in self.game_over_conditions: if cond == 'points': if isinstance(self.game_over_conditions[cond], int): if self.total_points >= self.game_over_conditions[cond]: return True elif isinstance(self.game_over_conditions[cond], tuple): if (self.total_points <= self.game_over_conditions[cond][0] or self.total_points >= self.game_over_conditions[cond][1]): return True elif cond == 'lives': if self.lives <= self.game_over_conditions[cond]: return True elif cond == 'frames': if self.total_frames >= self.game_over_conditions[cond]: return True elif cond == 'episodes': if self.curr_episode >= self.game_over_conditions[cond]: return True else: raise RuntimeError("ERROR: Invalid game over condition") return False
class AtariEnvironment: def __init__(self, args, outputDir): self.outputDir = outputDir self.screenCaptureFrequency = args.screen_capture_freq self.ale = ALEInterface() self.ale.setInt(b'random_seed', 123456) random.seed(123456) # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo self.ale.setFloat(b'repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(args.rom) self.actionSet = self.ale.getMinimalActionSet() self.gameNumber = 0 self.stepNumber = 0 self.resetGame() def getNumActions(self): return len(self.actionSet) def getState(self): return self.state def getGameNumber(self): return self.gameNumber def getFrameNumber(self): return self.ale.getFrameNumber() def getEpisodeFrameNumber(self): return self.ale.getEpisodeFrameNumber() def getEpisodeStepNumber(self): return self.episodeStepNumber def getStepNumber(self): return self.stepNumber def getGameScore(self): return self.gameScore def isGameOver(self): return self.ale.game_over() def step(self, action): previousLives = self.ale.lives() reward = 0 isTerminal = 0 self.stepNumber += 1 self.episodeStepNumber += 1 for i in range(4): prevScreenRGB = self.ale.getScreenRGB() reward += self.ale.act(self.actionSet[action]) screenRGB = self.ale.getScreenRGB() # Detect end of episode, I don't think I'm handling this right in terms # of the overall game loop (??) if self.ale.lives() < previousLives or self.ale.game_over(): isTerminal = 1 break if self.gameNumber % self.screenCaptureFrequency == 0: dir = self.outputDir + '/screen_cap/game-%06d' % ( self.gameNumber) if not os.path.isdir(dir): os.makedirs(dir) self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())) maxedScreen = np.maximum(screenRGB, prevScreenRGB) self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber()) self.gameScore += reward return reward, self.state, isTerminal def resetGame(self): if self.ale.game_over(): self.gameNumber += 1 self.ale.reset_game() self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber()) self.gameScore = 0 self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number
class AtariEnvironment: def __init__(self, args, outputDir): self.outputDir = outputDir self.screenCaptureFrequency = args.screen_capture_freq self.ale = ALEInterface() self.ale.setInt(b'random_seed', 123456) random.seed(123456) # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo self.ale.setFloat(b'repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(args.rom) self.actionSet = self.ale.getMinimalActionSet() self.gameNumber = 0 self.stepNumber = 0 self.resetGame() def getNumActions(self): return len(self.actionSet) def getState(self): return self.state def getGameNumber(self): return self.gameNumber def getFrameNumber(self): return self.ale.getFrameNumber() def getEpisodeFrameNumber(self): return self.ale.getEpisodeFrameNumber() def getEpisodeStepNumber(self): return self.episodeStepNumber def getStepNumber(self): return self.stepNumber def getGameScore(self): return self.gameScore def isGameOver(self): return self.ale.game_over() def step(self, action): previousLives = self.ale.lives() reward = 0 isTerminal = 0 self.stepNumber += 1 self.episodeStepNumber += 1 for i in range(4): prevScreenRGB = self.ale.getScreenRGB() reward += self.ale.act(self.actionSet[action]) screenRGB = self.ale.getScreenRGB() # Detect end of episode, I don't think I'm handling this right in terms # of the overall game loop (??) if self.ale.lives() < previousLives or self.ale.game_over(): isTerminal = 1 break if self.gameNumber % self.screenCaptureFrequency == 0: dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber) if not os.path.isdir(dir): os.makedirs(dir) self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())) maxedScreen = np.maximum(screenRGB, prevScreenRGB) self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber()) self.gameScore += reward return reward, self.state, isTerminal def resetGame(self): if self.ale.game_over(): self.gameNumber += 1 self.ale.reset_game() self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber()) self.gameScore = 0 self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number
# Finally time to load the ROM file ale.loadROM("roms/frogger.bin") # Now it's time to write some code actions = ale.getMinimalActionSet() a = actions[0] count = 0 loc = [12, 9] p_guide = [] time_map = [] for episode in range(1): total_reward = 0 p_guide = set_guide(ale.getScreenRGB(), loc) while not ale.game_over(): screen = ale.getScreenRGB() if not (ale.getEpisodeFrameNumber() % 5) and ( ale.getEpisodeFrameNumber() > 450): #print("icarus: pre-frameNumber: %d" % ale.getEpisodeFrameNumber()) count += 1 guide = set_guide(screen, loc) loc = guide.pop() time_entry = [ale.getEpisodeFrameNumber(), guide] time_map.append(time_entry) # print("icarus: guide: " + str(guide)) for i in range(len(guide)): if not np.all(p_guide[i] == guide[i]): #print('icarus: difference in row %d' % i) p_guide[i] = guide[i] #raw_input("Holding...") #print("icarus: post-frameNumber: %d" % ale.getEpisodeFrameNumber()) #if ale.getEpisodeFrameNumber() > 1000:
font = pygame.font.SysFont("Ubuntu Mono",30) text = font.render("Total Reward: " + str(total_reward) ,1,(208,255,255)) screen.blit(text,(330,line_pos)) pygame.display.flip() #process pygame event queue exit=False for event in pygame.event.get(): if event.type == pygame.QUIT: exit=True break; if(pressed[pygame.K_q]): exit = True if(exit): logger.close(); break #delay to 60fps clock.tick(60.) if(ale.game_over()): episode_frame_number = ale.getEpisodeFrameNumber() frame_number = ale.getFrameNumber() print("Frame Number: " + str(frame_number) + " Episode Frame Number: " + str(episode_frame_number)) print("Episode " + str(episode) + " ended with score: " + str(total_reward)) ale.reset_game() total_reward = 0.0 episode = episode + 1
class GameManager(object): """This class takes care of the interactions between an agent and a game across episodes, as well as overall logging of performance. """ def __init__( self, game_name, agent, results_dir, n_epochs=1, n_episodes=None, n_frames=None, remove_old_results_dir=False, use_minimal_action_set=True, min_time_between_frames=0, ): """game_name is one of the supported games (there are many), as a string: "space_invaders.bin" agent is an an instance of a subclass of the Agent interface results_dir is a string representing a directory in which results and logs are placed If it does not exist, it is created. use_minimal_action_set determines whether the agent is offered all possible actions, or only those (minimal) that are applicable to the specific game. min_time_between_frames is the minimum required time in seconds between frames. If 0, the game is unrestricted. """ self.game_name = game_name self.agent = agent self.use_minimal_action_set = use_minimal_action_set self.min_time_between_frames = min_time_between_frames self.n_epochs = n_epochs self.n_episodes = n_episodes self.n_frames = n_frames if (n_episodes is None and n_frames is None) or (n_episodes is not None and n_frames is not None): raise ValueError("Extacly one of n_episodes and n_frames " "must be defined") self.initialize_results_dir(results_dir, remove_old_results_dir) self.log = util.logging.Logger( ("settings", "step", "episode", "epoch", "overall"), "settings", os.path.join(self.results_dir, "GameManager.log"), ) self.stats = util.logging.CSVLogger( os.path.join(self.results_dir, "stats.log"), header="epoch,episode,total_reward,n_frames,wall_time", print_items=True, ) self._object_cache = dict() self.initialize_ale() self.initialize_agent() self.dump_settings() def initialize_results_dir(self, results_dir, remove_existing=False): """Creates the whole path of directories if they do no exist. If they do exist, raises an error unless remove_existing is True, in which case the existing directory is deleted. """ now = datetime.now().strftime("%Y%m%d-%H-%M") # drop .bin, append current time down to the minute results_dir = os.path.join(results_dir, self.game_name[:-4] + now) if remove_existing: if os.path.exists(results_dir): shutil.rmtree(results_dir) # Should raise an error if directory exists os.makedirs(results_dir) self.results_dir = results_dir def initialize_ale(self): self.ale = ALEInterface() self.ale.loadROM(os.path.join(ROM_RELATIVE_LOCATION, self.game_name)) def initialize_agent(self): RSC = namedtuple("RawStateCallbacks", ["raw", "grey", "rgb", "ram"]) raw_state_callbacks = RSC(self.get_screen, self.get_screen_grayscale, self.get_screen_RGB, self.get_RAM) self.agent.set_raw_state_callbacks(raw_state_callbacks) self.agent.set_results_dir(self.results_dir) if self.use_minimal_action_set: actions = self.ale.getMinimalActionSet() else: actions = self.ale.getLegalActionSet() self.agent.set_available_actions(actions) def rest(self, already_elapsed): rest_time = self.min_time_between_frames - already_elapsed if rest_time > 0: sleep(rest_time) def run(self): """Runs self.n_epochs epochs, where the agent's learning is reset for each new epoch. Each epoch lasts self.n_episodes or self.n_frames, whichever is defined. """ self.log.overall("Starting run") run_start = time() for epoch in xrange(self.n_epochs): self.agent.reset() self.n_epoch = epoch self._run_epoch() self.log.overall("End of run ({:.2f} s)".format(time() - run_start)) def _run_epoch(self): self.n_episode = 0 start = time() while not self._stop_condition_met(): self._run_episode() self.n_episode += 1 wall_time = time() - start frames = self.ale.getFrameNumber() self.log.epoch("Finished epoch after {:.2f} seconds".format(wall_time)) def _run_episode(self): self.ale.reset_game() self.agent.on_episode_start() total_reward = 0 episode_start = time() while (not self.ale.game_over()) and (not self._stop_condition_met()): timestep_start = time() action = self.agent.select_action() reward = self.ale.act(action) self.agent.receive_reward(reward) total_reward += reward self.rest(time() - timestep_start) wall_time = time() - episode_start self.agent.on_episode_end() # Stats format: CSV with epoch, episode, total_reward, n_frames, wall_time self.stats.write( self.n_epoch, self.n_episode, total_reward, self.ale.getEpisodeFrameNumber(), "{:.2f}".format(wall_time) ) def _stop_condition_met(self): if self.n_episodes: return self.n_episode >= self.n_episodes return self.ale.getFrameNumber() >= self.n_frames # Methods for state perception def get_screen(self): """Returns a matrix containing the current game screen in raw pixel data, i.e. before conversion to RGB. Handles reuse of np.array object, so it will overwrite what is in the old object""" return self._cached("raw", self.ale.getScreen) def get_screen_grayscale(self): """Returns an np.array with the screen grayscale colours. Handles reuse of np.array object, so it will overwrite what is in the old object. """ return self._cached("gray", self.ale.getScreenGrayscale) def get_screen_RGB(self): """Returns a numpy array with the screen's RGB colours. The first positions contain the red colours, followed by the green colours and then the blue colours""" return self._cached("rgb", self.ale.getScreenRGB) def get_RAM(self): """Returns a vector containing current RAM content (byte-level). Handles reuse of np.array object, so it will overwrite what is in the old object""" return self._cached("ram", self.ale.getRAM) def _cached(self, key, func): if key in self._object_cache: func(self._object_cache[key]) else: self._object_cache[key] = func() return self._object_cache[key] def dump_settings(self): import json settings = self.get_settings() path = os.path.join(self.results_dir, "settings") with open(path, "w") as f: json.dump(settings, f, indent=4) def get_settings(self): """Returns a dict representing the settings needed to reproduce this object and its subobjects """ return { "game_name": self.game_name, "n_epochs": self.n_epochs, "n_episodes": self.n_episodes, "n_frames": self.n_frames, "agent": self.agent.get_settings(), "results_dir": self.results_dir, "use_minimal_action_set": self.use_minimal_action_set, }
class Agent(): def __init__(self, game, agent_type, display, load_model, record, test): self.name = game self.agent_type = agent_type self.ale = ALEInterface() self.ale.setInt(str.encode('random_seed'), np.random.randint(100)) self.ale.setBool(str.encode('display_screen'), display or record) if record: self.ale.setString(str.encode('record_screen_dir'), str.encode('./data/recordings/{}/{}/tmp/'.format(game, agent_type))) self.ale.loadROM(str.encode('./roms/{}.bin'.format(self.name))) self.action_list = list(self.ale.getMinimalActionSet()) self.frame_shape = np.squeeze(self.ale.getScreenGrayscale()).shape if test: self.name += '_test' if 'space_invaders' in self.name: # Account for blinking bullets self.frameskip = 2 else: self.frameskip = 3 self.frame_buffer = deque(maxlen=4) if load_model and not record: self.load_replaymemory() else: self.replay_memory = ReplayMemory(500000, 32) model_input_shape = self.frame_shape + (4,) model_output_shape = len(self.action_list) if agent_type == 'dqn': self.model = DeepQN( model_input_shape, model_output_shape, self.action_list, self.replay_memory, self.name, load_model ) elif agent_type == 'double': self.model = DoubleDQN( model_input_shape, model_output_shape, self.action_list, self.replay_memory, self.name, load_model ) else: self.model = DuelingDQN( model_input_shape, model_output_shape, self.action_list, self.replay_memory, self.name, load_model ) print('{} Loaded!'.format(' '.join(self.name.split('_')).title())) print('Displaying: ', display) print('Frame Shape: ', self.frame_shape) print('Frame Skip: ', self.frameskip) print('Action Set: ', self.action_list) print('Model Input Shape: ', model_input_shape) print('Model Output Shape: ', model_output_shape) print('Agent: ', agent_type) def training(self, steps): ''' Trains the agent for :steps number of weight updates. Returns the average model loss ''' loss = [] # Initialize frame buffer. np.squeeze removes empty dimensions e.g. if shape=(210,160,__) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) try: for step in range(steps): gameover = False initial_state = np.stack(self.frame_buffer, axis=-1) action = self.model.predict_action(initial_state) # Backup data if step % 5000 == 0: self.model.save_model() self.model.save_hyperparams() self.save_replaymemory() # If using a target model check for weight updates if hasattr(self.model, 'tau'): if self.model.tau == 0: self.model.update_target_model() self.model.tau = 10000 else: self.model.tau -= 1 # Frame skipping technique https://danieltakeshi.github.io/2016/11/25/frame-skipping-and-preprocessing-for-deep-q-networks-on-atari-2600-games/ lives_before = self.ale.lives() for _ in range(self.frameskip): self.ale.act(action) reward = self.ale.act(action) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) lives_after = self.ale.lives() if lives_after < lives_before: gameover = True # Taking advice from dude on reddit reward = -1 if self.ale.game_over(): gameover = True reward = -1 self.ale.reset_game() new_state = np.stack(self.frame_buffer, axis=-1) # Experiment with clipping rewards for stability purposes reward = np.clip(reward, -1, 1) self.replay_memory.add( initial_state, action, reward, gameover, new_state ) loss += self.model.replay_train() except: self.model.save_model() self.model.save_hyperparams() self.save_replaymemory() raise KeyboardInterrupt return np.mean(loss, axis=0) def simulate_random(self): print('Simulating game randomly') done = False total_reward = 0 while not done: action = np.random.choice(self.ale.getMinimalActionSet()) reward = self.ale.act(action) total_reward += reward if self.ale.game_over(): reward = -1 done = True reward = np.clip(reward, -1, 1) if reward != 0: print(reward) frames_survived = self.ale.getEpisodeFrameNumber() self.ale.reset_game() return total_reward, frames_survived def simulate_intelligent(self, evaluating=False): done = False total_score = 0 self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) while not done: state = np.stack(self.frame_buffer, axis=-1) action = self.model.predict_action(state, evaluating) for _ in range(self.frameskip): self.ale.act(action) # Remember, ale.act returns the increase in game score with this action total_score += self.ale.act(action) # Pushes oldest frame out self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) if self.ale.game_over(): done = True frames_survived = self.ale.getEpisodeFrameNumber() print(' Game Over') print(' Frames Survived: ', frames_survived) print(' Score: ', total_score) print('===========================') self.ale.reset_game() return total_score, frames_survived def save_replaymemory(self): with bz2.BZ2File('./data/{}/{}_replaymem.obj'.format(self.agent_type, self.name), 'wb') as f: pickle.dump(self.replay_memory, f, protocol=pickle.HIGHEST_PROTOCOL) print('Saved replay memory at ', datetime.now()) def load_replaymemory(self): try: with bz2.BZ2File('./data/{}/{}_replaymem.obj'.format(self.agent_type, self.name), 'rb') as f: self.replay_memory = pickle.load(f) print('Loaded replay memory at ', datetime.now()) except FileNotFoundError: print('No replay memory file found') raise KeyboardInterrupt
class GameManager(object): """This class takes care of the interactions between an agent and a game across episodes, as well as overall logging of performance. """ def __init__(self, game_name, agent, results_dir, n_epochs=1, n_episodes=None, n_frames=None, remove_old_results_dir=False, use_minimal_action_set=True, min_time_between_frames=0): """game_name is one of the supported games (there are many), as a string: "space_invaders.bin" agent is an an instance of a subclass of the Agent interface results_dir is a string representing a directory in which results and logs are placed If it does not exist, it is created. use_minimal_action_set determines whether the agent is offered all possible actions, or only those (minimal) that are applicable to the specific game. min_time_between_frames is the minimum required time in seconds between frames. If 0, the game is unrestricted. """ self.game_name = game_name self.agent = agent self.use_minimal_action_set = use_minimal_action_set self.min_time_between_frames = min_time_between_frames self.n_epochs = n_epochs self.n_episodes = n_episodes self.n_frames = n_frames if ((n_episodes is None and n_frames is None) or (n_episodes is not None and n_frames is not None)): raise ValueError("Extacly one of n_episodes and n_frames " "must be defined") self.initialize_results_dir(results_dir, remove_old_results_dir) self.log = util.logging.Logger( ('settings', 'step', 'episode', 'epoch', 'overall'), 'settings', os.path.join(self.results_dir, 'GameManager.log')) self.stats = util.logging.CSVLogger( os.path.join(self.results_dir, 'stats.log'), header='epoch,episode,total_reward,n_frames,wall_time', print_items=True) self._object_cache = dict() self.initialize_ale() self.initialize_agent() self.dump_settings() def initialize_results_dir(self, results_dir, remove_existing=False): """Creates the whole path of directories if they do no exist. If they do exist, raises an error unless remove_existing is True, in which case the existing directory is deleted. """ now = datetime.now().strftime('%Y%m%d-%H-%M') # drop .bin, append current time down to the minute results_dir = os.path.join(results_dir, self.game_name[:-4] + now) if remove_existing: if os.path.exists(results_dir): shutil.rmtree(results_dir) # Should raise an error if directory exists os.makedirs(results_dir) self.results_dir = results_dir def initialize_ale(self): self.ale = ALEInterface() self.ale.loadROM(os.path.join(ROM_RELATIVE_LOCATION, self.game_name)) def initialize_agent(self): RSC = namedtuple('RawStateCallbacks', ['raw', 'grey', 'rgb', 'ram']) raw_state_callbacks = RSC(self.get_screen, self.get_screen_grayscale, self.get_screen_RGB, self.get_RAM) self.agent.set_raw_state_callbacks(raw_state_callbacks) self.agent.set_results_dir(self.results_dir) if self.use_minimal_action_set: actions = self.ale.getMinimalActionSet() else: actions = self.ale.getLegalActionSet() self.agent.set_available_actions(actions) def rest(self, already_elapsed): rest_time = self.min_time_between_frames - already_elapsed if rest_time > 0: sleep(rest_time) def run(self): """Runs self.n_epochs epochs, where the agent's learning is reset for each new epoch. Each epoch lasts self.n_episodes or self.n_frames, whichever is defined. """ self.log.overall('Starting run') run_start = time() for epoch in xrange(self.n_epochs): self.agent.reset() self.n_epoch = epoch self._run_epoch() self.log.overall('End of run ({:.2f} s)'.format(time() - run_start)) def _run_epoch(self): self.n_episode = 0 start = time() while not self._stop_condition_met(): self._run_episode() self.n_episode += 1 wall_time = (time() - start) frames = self.ale.getFrameNumber() self.log.epoch("Finished epoch after {:.2f} seconds".format(wall_time)) def _run_episode(self): self.ale.reset_game() self.agent.on_episode_start() total_reward = 0 episode_start = time() while (not self.ale.game_over()) and (not self._stop_condition_met()): timestep_start = time() action = self.agent.select_action() reward = self.ale.act(action) self.agent.receive_reward(reward) total_reward += reward self.rest(time() - timestep_start) wall_time = time() - episode_start self.agent.on_episode_end() # Stats format: CSV with epoch, episode, total_reward, n_frames, wall_time self.stats.write(self.n_epoch, self.n_episode, total_reward, self.ale.getEpisodeFrameNumber(), '{:.2f}'.format(wall_time)) def _stop_condition_met(self): if self.n_episodes: return self.n_episode >= self.n_episodes return self.ale.getFrameNumber() >= self.n_frames # Methods for state perception def get_screen(self): """Returns a matrix containing the current game screen in raw pixel data, i.e. before conversion to RGB. Handles reuse of np.array object, so it will overwrite what is in the old object""" return self._cached('raw', self.ale.getScreen) def get_screen_grayscale(self): """Returns an np.array with the screen grayscale colours. Handles reuse of np.array object, so it will overwrite what is in the old object. """ return self._cached('gray', self.ale.getScreenGrayscale) def get_screen_RGB(self): """Returns a numpy array with the screen's RGB colours. The first positions contain the red colours, followed by the green colours and then the blue colours""" return self._cached('rgb', self.ale.getScreenRGB) def get_RAM(self): """Returns a vector containing current RAM content (byte-level). Handles reuse of np.array object, so it will overwrite what is in the old object""" return self._cached('ram', self.ale.getRAM) def _cached(self, key, func): if key in self._object_cache: func(self._object_cache[key]) else: self._object_cache[key] = func() return self._object_cache[key] def dump_settings(self): import json settings = self.get_settings() path = os.path.join(self.results_dir, 'settings') with open(path, 'w') as f: json.dump(settings, f, indent=4) def get_settings(self): """Returns a dict representing the settings needed to reproduce this object and its subobjects """ return { "game_name": self.game_name, "n_epochs": self.n_epochs, "n_episodes": self.n_episodes, "n_frames": self.n_frames, "agent": self.agent.get_settings(), "results_dir": self.results_dir, "use_minimal_action_set": self.use_minimal_action_set, }
pygame.display.flip() episode = 0 total_reward = 0.0 while (episode < 10): exit = False for event in pygame.event.get(): if event.type == pygame.QUIT: exit = True break if (exit): break a = legal_actions[np.random.randint(legal_actions.size)] reward = ale.act(a) total_reward += reward numpy_surface = np.frombuffer(screen.get_buffer(), dtype=np.int32) ale.getScreenRGB(numpy_surface) pygame.display.flip() if (ale.game_over()): episode_frame_number = ale.getEpisodeFrameNumber() frame_number = ale.getFrameNumber() print("Frame Number: " + str(frame_number) + " Episode Frame Number: " + str(episode_frame_number)) print("Episode " + str(episode) + " ended with score: " + str(total_reward)) ale.reset_game() total_reward = 0.0 episode = episode + 1
class LearningEnvironment: def __init__(self, rom_path=None, agent=None, episodes=10, fps=60, display_width=800, display_height=640, sample_rate=10): """ sample_rate: sample experience every sample_rate (or sample_rate+1) frames """ # if not specified, use the default game rom: pacman if rom_path is None: rom_path = '/Users/lguan/Documents/Study/Research/Atari-2600-Roms/K-P/ms_pacman.bin' # setup agent if agent is None: raise ValueError('Learning Environment - No specified agent') elif not isinstance(agent, PythonReinforcementAgent): raise TypeError( 'The agent should be inherited from PythonReinforcementAgent') self.agent = agent # initialize parameters self.episodes = episodes self.fps = fps self.display_width = display_width self.display_height = display_height self.game_surface_width = 0 # the value will be set after loading the rom self.game_surface_height = 0 # the value will be set after loading the rom self.sample_rate = sample_rate # initialize some useful variables self.total_reward = 0 self.last_sample_frame = 0 self.sample_from_odd_frame = 1 # by switching between 1 and -1 to sample from even frames and odd frames # setup ALE self.ale = ALEInterface() self.setup_ale(rom_path) def setup_ale(self, rom_path): # use current ale ale = self.ale # Get & Set the desired settings ale.setInt(b'random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compiled # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: # mac OS if sys.platform == 'darwin': pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file rom_file = str.encode(rom_path) print('- Loading ROM - %s' % rom_path) ale.loadROM(rom_file) print('- Complete loading ROM') (game_surface_width, game_surface_height) = ale.getScreenDims() print("game surface width/height: " + str(game_surface_width) + "/" + str(game_surface_height)) self.game_surface_height = game_surface_height self.game_surface_width = game_surface_width available_action = ale.getMinimalActionSet() print("available action set: %s" % available_action) def start_game(self): """ use this function to start the game """ # get the ALE and agent ale = self.ale agent = self.agent # init pygame pygame.init() display_screen = pygame.display.set_mode( (self.display_width, self.display_height)) pygame.display.set_caption( "Arcade Learning Environment Player Agent Display") # init clock clock = pygame.time.Clock() is_exit = False # start episodes for episode in range(self.episodes): if is_exit: break self.start_episode() state = None action = None sub_episode_reward = 0 while not ale.game_over() and not is_exit: # get new sample according to the sample_rate if self.last_sample_frame + self.sample_rate \ < (ale.getEpisodeFrameNumber() + self.sample_from_odd_frame) \ or state is None or action is None: np_game_surface = np.zeros(shape=(self.game_surface_height, self.game_surface_width, 3), dtype=np.int8) ale.getScreenRGB(np_game_surface) game_rgb = utils.copyBuffer(np_game_surface) # get new state based on current game state state = self.agent.extract_state(game_rgb) # update info self.last_sample_frame = ale.getEpisodeFrameNumber() self.sample_from_odd_frame = -self.sample_from_odd_frame # get the action from the agent action = agent.getAction(state) current_time = time.time() experience = { 'time': current_time, 'reward': sub_episode_reward, 'state': state } sub_episode_reward = 0 agent.addExperience(experience) if IS_DEBUG: print('ALE - sample from frame %s' % ale.getEpisodeFrameNumber()) # apply an action and get the resulting reward reward = ale.act(action) self.total_reward += reward sub_episode_reward += reward # if current agent is Tamer agent, then receive if isinstance(agent, BasicTamerAgent): h = self.getHumanSignal() agent.receiveHumanSignal(signal=h) # clear screen display_screen.fill((0, 0, 0)) # render game surface self.renderGameSurface(ale, display_screen, self.game_surface_width, self.game_surface_height) # display related info self.displayRelatedInfo(display_screen, action, self.total_reward) pygame.display.flip() # process pygame event queue for event in pygame.event.get(): if event.type == pygame.QUIT: is_exit = True break if event.type == pygame.KEYDOWN and event.key == pygame.K_q: is_exit = True break # delay (default: 60fps) clock.tick(self.fps) print('Episode %d ended with score: %d' % (episode, self.total_reward)) self.end_episode() # finalize the game self.final() def final(self): """ This function will be at the every end of this program """ self.agent.final() def start_episode(self): """ This function will be at the beginning of each episode """ self.total_reward = 0 self.last_sample_frame = self.ale.getEpisodeFrameNumber() self.sample_from_odd_frame = 1 # by switching between 1 and -1 to sample from even frames and odd frames self.agent.startEpisode() def end_episode(self): """ This function will be at the end of each episode """ self.ale.reset_game() self.agent.stopEpisode() @staticmethod def renderGameSurface(ale, screen, game_surface_width, game_surface_height): """ render game surface with specified width and height """ # clear screen screen.fill((0, 0, 0)) # get atari screen pixels and blit them numpy_surface = np.zeros(shape=(game_surface_height, game_surface_width, 3), dtype=np.uint8) ale.getScreenRGB(numpy_surface) numpy_surface = np.swapaxes(numpy_surface, 0, 1) surf = pygame.pixelcopy.make_surface(numpy_surface) screen.blit(pygame.transform.scale2x(surf), (5, 5)) @staticmethod def displayRelatedInfo(screen, action, total_reward): """ display related information like reward and action on the screen """ line_pos = 20 # display current action font = pygame.font.SysFont("Ubuntu Mono", 32) text = font.render("Current Action: " + str(action), 1, (208, 208, 255)) height = font.get_height() * 1.2 screen.blit(text, (380, line_pos)) line_pos += height # display reward font = pygame.font.SysFont("Ubuntu Mono", 30) text = font.render("Total Reward: " + str(total_reward), 1, (208, 255, 255)) screen.blit(text, (380, line_pos)) @staticmethod def getHumanSignal(): pressed = pygame.key.get_pressed() # positive signal if pressed[pygame.K_UP]: return 1 elif pressed[pygame.K_DOWN]: return -1 else: return 0