class emulator: def __init__(self, rom_name, vis, windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState( self.ale.decodeState(checkpoints[random.randint( 0, 99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname, nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
def __init__(self, game, seed=None, use_sdl=False, n_last_screens=4, frame_skip=4, treat_life_lost_as_terminal=True, crop_or_scale='scale', max_start_nullops=30, record_screen_dir=None): self.n_last_screens = n_last_screens self.treat_life_lost_as_terminal = treat_life_lost_as_terminal self.crop_or_scale = crop_or_scale self.max_start_nullops = max_start_nullops # atari_py is used only to provide rom files. atari_py has its own # ale_python_interface, but it is obsolete. game_path = atari_py.get_game_path(game) ale = ALEInterface() if seed is not None: assert seed >= 0 and seed < 2 ** 16, \ "ALE's random seed must be represented by unsigned int" else: # Use numpy's random state seed = np.random.randint(0, 2**16) ale.setInt(b'random_seed', seed) ale.setFloat(b'repeat_action_probability', 0.0) ale.setBool(b'color_averaging', False) if record_screen_dir is not None: ale.setString(b'record_screen_dir', str.encode(str(record_screen_dir))) self.frame_skip = frame_skip if use_sdl: if 'DISPLAY' not in os.environ: raise RuntimeError( 'Please set DISPLAY environment variable for use_sdl=True') # SDL settings below are from the ALE python example if sys.platform == 'darwin': import pygame pygame.init() ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool(b'sound', True) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(str(game_path))) assert ale.getFrameNumber() == 0 self.ale = ale self.legal_actions = ale.getMinimalActionSet() self.initialize() self.action_space = spaces.Discrete(len(self.legal_actions)) one_screen_observation_space = spaces.Box(low=0, high=255, shape=(84, 84)) self.observation_space = spaces.Tuple([one_screen_observation_space] * n_last_screens)
class emulator: def __init__(self, rom_name, vis,windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('roms/' + rom_name ) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width,self.screen_height = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState(self.ale.decodeState(checkpoints[random.randint(0,99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname,nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
def __init__(self, rom_filename, seed=None, use_sdl=False, n_last_screens=4, frame_skip=4, treat_life_lost_as_terminal=True, crop_or_scale='scale', max_start_nullops=30, record_screen_dir=None): self.n_last_screens = n_last_screens self.treat_life_lost_as_terminal = treat_life_lost_as_terminal self.crop_or_scale = crop_or_scale self.max_start_nullops = max_start_nullops ale = ALEInterface() if seed is not None: assert seed >= 0 and seed < 2 ** 16, \ "ALE's random seed must be represented by unsigned int" else: # Use numpy's random state seed = np.random.randint(0, 2 ** 16) ale.setInt(b'random_seed', seed) ale.setFloat(b'repeat_action_probability', 0.0) ale.setBool(b'color_averaging', False) if record_screen_dir is not None: ale.setString(b'record_screen_dir', str.encode(record_screen_dir)) self.frame_skip = frame_skip if use_sdl: if 'DISPLAY' not in os.environ: raise RuntimeError( 'Please set DISPLAY environment variable for use_sdl=True') # SDL settings below are from the ALE python example if sys.platform == 'darwin': import pygame pygame.init() ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool(b'sound', True) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(rom_filename)) assert ale.getFrameNumber() == 0 self.ale = ale self.legal_actions = ale.getMinimalActionSet() self.initialize()
screen_data = None #initialize the state image = ale.getScreenGrayscale(screen_data) image = impre(name_of_the_game, image) state = tc.stack((image, image, image, image), dim=0).unsqueeze(0).type(cpu_dtype) del image memory_buffer = [] # zeros = tc.zeros_like(image) # state_m = tc.zeros(sample_num, 4, 84, 84).type(gpu_dtype) epi_num = 0 epi_reward = 0 frame_num = ale.getFrameNumber() action = None # iteration loop while frame_num < 1e7: # reset_game if the game is over if ale.game_over() == True: epi_num = epi_num + 1 f = open('/home/juna/atari_project/plot/list.txt', 'a') f.write(str(epi_num) + ',' + str(int(epi_reward)) + '\n') f.close() print('=====epi_reward : ' + str(epi_reward) + '=====') epi_reward = 0 ale.reset_game()
class AtariEnvironment: def __init__(self, args, outputDir): self.outputDir = outputDir self.screenCaptureFrequency = args.screen_capture_freq self.ale = ALEInterface() self.ale.setInt(b'random_seed', 123456) random.seed(123456) # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo self.ale.setFloat(b'repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(args.rom) self.actionSet = self.ale.getMinimalActionSet() self.gameNumber = 0 self.stepNumber = 0 self.resetGame() def getNumActions(self): return len(self.actionSet) def getState(self): return self.state def getGameNumber(self): return self.gameNumber def getFrameNumber(self): return self.ale.getFrameNumber() def getEpisodeFrameNumber(self): return self.ale.getEpisodeFrameNumber() def getEpisodeStepNumber(self): return self.episodeStepNumber def getStepNumber(self): return self.stepNumber def getGameScore(self): return self.gameScore def isGameOver(self): return self.ale.game_over() def step(self, action): previousLives = self.ale.lives() reward = 0 isTerminal = 0 self.stepNumber += 1 self.episodeStepNumber += 1 for i in range(4): prevScreenRGB = self.ale.getScreenRGB() reward += self.ale.act(self.actionSet[action]) screenRGB = self.ale.getScreenRGB() # Detect end of episode, I don't think I'm handling this right in terms # of the overall game loop (??) if self.ale.lives() < previousLives or self.ale.game_over(): isTerminal = 1 break if self.gameNumber % self.screenCaptureFrequency == 0: dir = self.outputDir + '/screen_cap/game-%06d' % ( self.gameNumber) if not os.path.isdir(dir): os.makedirs(dir) self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())) maxedScreen = np.maximum(screenRGB, prevScreenRGB) self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber()) self.gameScore += reward return reward, self.state, isTerminal def resetGame(self): if self.ale.game_over(): self.gameNumber += 1 self.ale.reset_game() self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber()) self.gameScore = 0 self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number
class AtariEnvironment: def __init__(self, args, outputDir): self.outputDir = outputDir self.screenCaptureFrequency = args.screen_capture_freq self.ale = ALEInterface() self.ale.setInt(b'random_seed', 123456) random.seed(123456) # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo self.ale.setFloat(b'repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(args.rom) self.actionSet = self.ale.getMinimalActionSet() self.gameNumber = 0 self.stepNumber = 0 self.resetGame() def getNumActions(self): return len(self.actionSet) def getState(self): return self.state def getGameNumber(self): return self.gameNumber def getFrameNumber(self): return self.ale.getFrameNumber() def getEpisodeFrameNumber(self): return self.ale.getEpisodeFrameNumber() def getEpisodeStepNumber(self): return self.episodeStepNumber def getStepNumber(self): return self.stepNumber def getGameScore(self): return self.gameScore def isGameOver(self): return self.ale.game_over() def step(self, action): previousLives = self.ale.lives() reward = 0 isTerminal = 0 self.stepNumber += 1 self.episodeStepNumber += 1 for i in range(4): prevScreenRGB = self.ale.getScreenRGB() reward += self.ale.act(self.actionSet[action]) screenRGB = self.ale.getScreenRGB() # Detect end of episode, I don't think I'm handling this right in terms # of the overall game loop (??) if self.ale.lives() < previousLives or self.ale.game_over(): isTerminal = 1 break if self.gameNumber % self.screenCaptureFrequency == 0: dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber) if not os.path.isdir(dir): os.makedirs(dir) self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())) maxedScreen = np.maximum(screenRGB, prevScreenRGB) self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber()) self.gameScore += reward return reward, self.state, isTerminal def resetGame(self): if self.ale.game_over(): self.gameNumber += 1 self.ale.reset_game() self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber()) self.gameScore = 0 self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number
class Agent(object): def __init__(self): self._ale = ALEInterface() self._ale.setInt('random_seed', 123) self._ale.setFloat('repeat_action_probability', 0.0) self._ale.setBool('color_averaging', False) self._ale.loadROM('roms/enduro.bin') self._controller = Controller(self._ale) self._extractor = StateExtractor(self._ale) self._image = None def run(self, learn, episodes=1, draw=False): """ Implements the playing/learning loop. Args: learn(bool): Whether the self.learn() function should be called. episodes (int): The number of episodes to run the agent for. draw (bool): Whether to overlay the environment state on the frame. Returns: None """ for e in range(episodes): # Observe the environment to set the initial state (grid, self._image) = self._extractor.run(draw=draw, scale=4.0) self.initialise(grid) num_frames = self._ale.getFrameNumber() # Each episode lasts 6500 frames while self._ale.getFrameNumber() - num_frames < 6500: # Take an action self.act() # Update the environment grid (grid, self._image) = self._extractor.run(draw=draw, scale=4.0) self.sense(grid) # Perform learning if required if learn: self.learn() self.callback(learn, e + 1, self._ale.getFrameNumber() - num_frames) self._ale.reset_game() def getActionsSet(self): """ Returns the set of all possible actions """ return [Action.ACCELERATE, Action.RIGHT, Action.LEFT, Action.BRAKE] def move(self, action): """ Executes the action and advances the game to the next state. Args: action (int): The action which should executed. Make sure to use the constants returned by self.getActionsSet() Returns: int: The obtained reward after executing the action """ return self._controller.move(action) def initialise(self, grid): """ Called at the beginning of each episode, mainly used for state initialisation. Args: grid (np.ndarray): 11x10 array with the initial environment grid. Returns: None """ raise NotImplementedError def act(self): """ Called at each loop iteration to choose and execute an action. Returns: None """ raise NotImplementedError def sense(self, grid): """ Called at each loop iteration to construct the new state from the update environment grid. Returns: None """ raise NotImplementedError def learn(self): """ Called at each loop iteration when the agent is learning. It should implement the learning procedure. Returns: None """ raise NotImplementedError def callback(self, learn, episode, iteration): """ Called at each loop iteration mainly for reporting purposes. Args: learn (bool): Indicates whether the agent is learning or not. episode (int): The number of the current episode. iteration (int): The number of the current iteration. Returns: None """ raise NotImplementedError
font = pygame.font.SysFont("Ubuntu Mono",30) text = font.render("Total Reward: " + str(total_reward) ,1,(208,255,255)) screen.blit(text,(330,line_pos)) pygame.display.flip() #process pygame event queue exit=False for event in pygame.event.get(): if event.type == pygame.QUIT: exit=True break; if(pressed[pygame.K_q]): exit = True if(exit): logger.close(); break #delay to 60fps clock.tick(60.) if(ale.game_over()): episode_frame_number = ale.getEpisodeFrameNumber() frame_number = ale.getFrameNumber() print("Frame Number: " + str(frame_number) + " Episode Frame Number: " + str(episode_frame_number)) print("Episode " + str(episode) + " ended with score: " + str(total_reward)) ale.reset_game() total_reward = 0.0 episode = episode + 1
class GameManager(object): """This class takes care of the interactions between an agent and a game across episodes, as well as overall logging of performance. """ def __init__( self, game_name, agent, results_dir, n_epochs=1, n_episodes=None, n_frames=None, remove_old_results_dir=False, use_minimal_action_set=True, min_time_between_frames=0, ): """game_name is one of the supported games (there are many), as a string: "space_invaders.bin" agent is an an instance of a subclass of the Agent interface results_dir is a string representing a directory in which results and logs are placed If it does not exist, it is created. use_minimal_action_set determines whether the agent is offered all possible actions, or only those (minimal) that are applicable to the specific game. min_time_between_frames is the minimum required time in seconds between frames. If 0, the game is unrestricted. """ self.game_name = game_name self.agent = agent self.use_minimal_action_set = use_minimal_action_set self.min_time_between_frames = min_time_between_frames self.n_epochs = n_epochs self.n_episodes = n_episodes self.n_frames = n_frames if (n_episodes is None and n_frames is None) or (n_episodes is not None and n_frames is not None): raise ValueError("Extacly one of n_episodes and n_frames " "must be defined") self.initialize_results_dir(results_dir, remove_old_results_dir) self.log = util.logging.Logger( ("settings", "step", "episode", "epoch", "overall"), "settings", os.path.join(self.results_dir, "GameManager.log"), ) self.stats = util.logging.CSVLogger( os.path.join(self.results_dir, "stats.log"), header="epoch,episode,total_reward,n_frames,wall_time", print_items=True, ) self._object_cache = dict() self.initialize_ale() self.initialize_agent() self.dump_settings() def initialize_results_dir(self, results_dir, remove_existing=False): """Creates the whole path of directories if they do no exist. If they do exist, raises an error unless remove_existing is True, in which case the existing directory is deleted. """ now = datetime.now().strftime("%Y%m%d-%H-%M") # drop .bin, append current time down to the minute results_dir = os.path.join(results_dir, self.game_name[:-4] + now) if remove_existing: if os.path.exists(results_dir): shutil.rmtree(results_dir) # Should raise an error if directory exists os.makedirs(results_dir) self.results_dir = results_dir def initialize_ale(self): self.ale = ALEInterface() self.ale.loadROM(os.path.join(ROM_RELATIVE_LOCATION, self.game_name)) def initialize_agent(self): RSC = namedtuple("RawStateCallbacks", ["raw", "grey", "rgb", "ram"]) raw_state_callbacks = RSC(self.get_screen, self.get_screen_grayscale, self.get_screen_RGB, self.get_RAM) self.agent.set_raw_state_callbacks(raw_state_callbacks) self.agent.set_results_dir(self.results_dir) if self.use_minimal_action_set: actions = self.ale.getMinimalActionSet() else: actions = self.ale.getLegalActionSet() self.agent.set_available_actions(actions) def rest(self, already_elapsed): rest_time = self.min_time_between_frames - already_elapsed if rest_time > 0: sleep(rest_time) def run(self): """Runs self.n_epochs epochs, where the agent's learning is reset for each new epoch. Each epoch lasts self.n_episodes or self.n_frames, whichever is defined. """ self.log.overall("Starting run") run_start = time() for epoch in xrange(self.n_epochs): self.agent.reset() self.n_epoch = epoch self._run_epoch() self.log.overall("End of run ({:.2f} s)".format(time() - run_start)) def _run_epoch(self): self.n_episode = 0 start = time() while not self._stop_condition_met(): self._run_episode() self.n_episode += 1 wall_time = time() - start frames = self.ale.getFrameNumber() self.log.epoch("Finished epoch after {:.2f} seconds".format(wall_time)) def _run_episode(self): self.ale.reset_game() self.agent.on_episode_start() total_reward = 0 episode_start = time() while (not self.ale.game_over()) and (not self._stop_condition_met()): timestep_start = time() action = self.agent.select_action() reward = self.ale.act(action) self.agent.receive_reward(reward) total_reward += reward self.rest(time() - timestep_start) wall_time = time() - episode_start self.agent.on_episode_end() # Stats format: CSV with epoch, episode, total_reward, n_frames, wall_time self.stats.write( self.n_epoch, self.n_episode, total_reward, self.ale.getEpisodeFrameNumber(), "{:.2f}".format(wall_time) ) def _stop_condition_met(self): if self.n_episodes: return self.n_episode >= self.n_episodes return self.ale.getFrameNumber() >= self.n_frames # Methods for state perception def get_screen(self): """Returns a matrix containing the current game screen in raw pixel data, i.e. before conversion to RGB. Handles reuse of np.array object, so it will overwrite what is in the old object""" return self._cached("raw", self.ale.getScreen) def get_screen_grayscale(self): """Returns an np.array with the screen grayscale colours. Handles reuse of np.array object, so it will overwrite what is in the old object. """ return self._cached("gray", self.ale.getScreenGrayscale) def get_screen_RGB(self): """Returns a numpy array with the screen's RGB colours. The first positions contain the red colours, followed by the green colours and then the blue colours""" return self._cached("rgb", self.ale.getScreenRGB) def get_RAM(self): """Returns a vector containing current RAM content (byte-level). Handles reuse of np.array object, so it will overwrite what is in the old object""" return self._cached("ram", self.ale.getRAM) def _cached(self, key, func): if key in self._object_cache: func(self._object_cache[key]) else: self._object_cache[key] = func() return self._object_cache[key] def dump_settings(self): import json settings = self.get_settings() path = os.path.join(self.results_dir, "settings") with open(path, "w") as f: json.dump(settings, f, indent=4) def get_settings(self): """Returns a dict representing the settings needed to reproduce this object and its subobjects """ return { "game_name": self.game_name, "n_epochs": self.n_epochs, "n_episodes": self.n_episodes, "n_frames": self.n_frames, "agent": self.agent.get_settings(), "results_dir": self.results_dir, "use_minimal_action_set": self.use_minimal_action_set, }
class GameManager(object): """This class takes care of the interactions between an agent and a game across episodes, as well as overall logging of performance. """ def __init__(self, game_name, agent, results_dir, n_epochs=1, n_episodes=None, n_frames=None, remove_old_results_dir=False, use_minimal_action_set=True, min_time_between_frames=0): """game_name is one of the supported games (there are many), as a string: "space_invaders.bin" agent is an an instance of a subclass of the Agent interface results_dir is a string representing a directory in which results and logs are placed If it does not exist, it is created. use_minimal_action_set determines whether the agent is offered all possible actions, or only those (minimal) that are applicable to the specific game. min_time_between_frames is the minimum required time in seconds between frames. If 0, the game is unrestricted. """ self.game_name = game_name self.agent = agent self.use_minimal_action_set = use_minimal_action_set self.min_time_between_frames = min_time_between_frames self.n_epochs = n_epochs self.n_episodes = n_episodes self.n_frames = n_frames if ((n_episodes is None and n_frames is None) or (n_episodes is not None and n_frames is not None)): raise ValueError("Extacly one of n_episodes and n_frames " "must be defined") self.initialize_results_dir(results_dir, remove_old_results_dir) self.log = util.logging.Logger( ('settings', 'step', 'episode', 'epoch', 'overall'), 'settings', os.path.join(self.results_dir, 'GameManager.log')) self.stats = util.logging.CSVLogger( os.path.join(self.results_dir, 'stats.log'), header='epoch,episode,total_reward,n_frames,wall_time', print_items=True) self._object_cache = dict() self.initialize_ale() self.initialize_agent() self.dump_settings() def initialize_results_dir(self, results_dir, remove_existing=False): """Creates the whole path of directories if they do no exist. If they do exist, raises an error unless remove_existing is True, in which case the existing directory is deleted. """ now = datetime.now().strftime('%Y%m%d-%H-%M') # drop .bin, append current time down to the minute results_dir = os.path.join(results_dir, self.game_name[:-4] + now) if remove_existing: if os.path.exists(results_dir): shutil.rmtree(results_dir) # Should raise an error if directory exists os.makedirs(results_dir) self.results_dir = results_dir def initialize_ale(self): self.ale = ALEInterface() self.ale.loadROM(os.path.join(ROM_RELATIVE_LOCATION, self.game_name)) def initialize_agent(self): RSC = namedtuple('RawStateCallbacks', ['raw', 'grey', 'rgb', 'ram']) raw_state_callbacks = RSC(self.get_screen, self.get_screen_grayscale, self.get_screen_RGB, self.get_RAM) self.agent.set_raw_state_callbacks(raw_state_callbacks) self.agent.set_results_dir(self.results_dir) if self.use_minimal_action_set: actions = self.ale.getMinimalActionSet() else: actions = self.ale.getLegalActionSet() self.agent.set_available_actions(actions) def rest(self, already_elapsed): rest_time = self.min_time_between_frames - already_elapsed if rest_time > 0: sleep(rest_time) def run(self): """Runs self.n_epochs epochs, where the agent's learning is reset for each new epoch. Each epoch lasts self.n_episodes or self.n_frames, whichever is defined. """ self.log.overall('Starting run') run_start = time() for epoch in xrange(self.n_epochs): self.agent.reset() self.n_epoch = epoch self._run_epoch() self.log.overall('End of run ({:.2f} s)'.format(time() - run_start)) def _run_epoch(self): self.n_episode = 0 start = time() while not self._stop_condition_met(): self._run_episode() self.n_episode += 1 wall_time = (time() - start) frames = self.ale.getFrameNumber() self.log.epoch("Finished epoch after {:.2f} seconds".format(wall_time)) def _run_episode(self): self.ale.reset_game() self.agent.on_episode_start() total_reward = 0 episode_start = time() while (not self.ale.game_over()) and (not self._stop_condition_met()): timestep_start = time() action = self.agent.select_action() reward = self.ale.act(action) self.agent.receive_reward(reward) total_reward += reward self.rest(time() - timestep_start) wall_time = time() - episode_start self.agent.on_episode_end() # Stats format: CSV with epoch, episode, total_reward, n_frames, wall_time self.stats.write(self.n_epoch, self.n_episode, total_reward, self.ale.getEpisodeFrameNumber(), '{:.2f}'.format(wall_time)) def _stop_condition_met(self): if self.n_episodes: return self.n_episode >= self.n_episodes return self.ale.getFrameNumber() >= self.n_frames # Methods for state perception def get_screen(self): """Returns a matrix containing the current game screen in raw pixel data, i.e. before conversion to RGB. Handles reuse of np.array object, so it will overwrite what is in the old object""" return self._cached('raw', self.ale.getScreen) def get_screen_grayscale(self): """Returns an np.array with the screen grayscale colours. Handles reuse of np.array object, so it will overwrite what is in the old object. """ return self._cached('gray', self.ale.getScreenGrayscale) def get_screen_RGB(self): """Returns a numpy array with the screen's RGB colours. The first positions contain the red colours, followed by the green colours and then the blue colours""" return self._cached('rgb', self.ale.getScreenRGB) def get_RAM(self): """Returns a vector containing current RAM content (byte-level). Handles reuse of np.array object, so it will overwrite what is in the old object""" return self._cached('ram', self.ale.getRAM) def _cached(self, key, func): if key in self._object_cache: func(self._object_cache[key]) else: self._object_cache[key] = func() return self._object_cache[key] def dump_settings(self): import json settings = self.get_settings() path = os.path.join(self.results_dir, 'settings') with open(path, 'w') as f: json.dump(settings, f, indent=4) def get_settings(self): """Returns a dict representing the settings needed to reproduce this object and its subobjects """ return { "game_name": self.game_name, "n_epochs": self.n_epochs, "n_episodes": self.n_episodes, "n_frames": self.n_frames, "agent": self.agent.get_settings(), "results_dir": self.results_dir, "use_minimal_action_set": self.use_minimal_action_set, }
class Agent(object): def __init__(self): self._ale = ALEInterface() self._ale.setInt('random_seed', 123) self._ale.setFloat('repeat_action_probability', 0.0) self._ale.setBool('color_averaging', False) self._ale.loadROM('roms/enduro.bin') self._controller = Controller(self._ale) self._extractor = StateExtractor(self._ale) self._image = None def run(self, learn, episodes=1, draw=False): """ Implements the playing/learning loop. Args: learn(bool): Whether the self.learn() function should be called. episodes (int): The number of episodes to run the agent for. draw (bool): Whether to overlay the environment state on the frame. Returns: None """ for e in range(episodes): # Observe the environment to set the initial state (grid, self._image) = self._extractor.run(draw=draw, scale=4.0) self.initialise(grid) num_frames = self._ale.getFrameNumber() # Each episode lasts 6500 frames while self._ale.getFrameNumber() - num_frames < 6500: # Take an action self.act() # Update the environment grid (grid, self._image) = self._extractor.run(draw=draw, scale=4.0) self.sense(grid) # Perform learning if required if learn: self.learn() self.callback(learn, e + 1, self._ale.getFrameNumber() - num_frames) self._ale.reset_game() def getActionsSet(self): """ Returns the set of all possible actions """ return [Action.ACCELERATE, Action.RIGHT, Action.LEFT, Action.BREAK] def move(self, action): """ Executes the action and advances the game to the next state. Args: action (int): The action which should executed. Make sure to use the constants returned by self.getActionsSet() Returns: int: The obtained reward after executing the action """ return self._controller.move(action) def initialise(self, grid): """ Called at the beginning of each episode, mainly used for state initialisation. Args: grid (np.ndarray): 11x10 array with the initial environment grid. Returns: None """ raise NotImplementedError def act(self): """ Called at each loop iteration to choose and execute an action. Returns: None """ raise NotImplementedError def sense(self, grid): """ Called at each loop iteration to construct the new state from the update environment grid. Returns: None """ raise NotImplementedError def learn(self): """ Called at each loop iteration when the agent is learning. It should implement the learning procedure. Returns: None """ raise NotImplementedError def callback(self, learn, episode, iteration): """ Called at each loop iteration mainly for reporting purposes. Args: learn (bool): Indicates whether the agent is learning or not. episode (int): The number of the current episode. iteration (int): The number of the current iteration. Returns: None """ raise NotImplementedError
class Agent(object): def __init__(self): self._ale = ALEInterface() self._ale.setInt('random_seed', 123) self._ale.setFloat('repeat_action_probability', 0.0) self._ale.setBool('color_averaging', False) self._ale.loadROM('roms/enduro.bin') self._controller = Controller(self._ale) self._extractor = StateExtractor(self._ale) self._image = None self._speed_range = 50 def run(self, learn, episodes=1, draw=False): """ Implements the playing/learning loop. Args: learn(bool): Whether the self.learn() function should be called. episodes (int): The number of episodes to run the agent for. draw (bool): Whether to overlay the environment state on the frame. Returns: None """ for e in range(episodes): self._relative_speed = -self._speed_range # Observe the environment to set the initial state (road, cars, grid, self._image) = self._extractor.run(draw=draw, scale=4.0) self.initialise(road, cars, self._relative_speed, grid) num_frames = self._ale.getFrameNumber() # Each episode lasts 6500 frames while self._ale.getFrameNumber() - num_frames < 6500: # Take an action self.act() # Update the environment grid (road, cars, grid, self._image) = self._extractor.run(draw=draw, scale=4.0) if self.collision(cars): self._relative_speed = -self._speed_range self.sense(road, cars, self._relative_speed, grid) # Perform learning if required if learn: self.learn() self.callback(learn, e + 1, self._ale.getFrameNumber() - num_frames) self._ale.reset_game() def collision(self, cars): if not cars['others']: return False x, y, _, _ = cars['self'] min_dist = sys.float_info.max min_angle = 0. for c in cars['others']: cx, cy, _, _ = c dist = np.sqrt((cx - x)**2 + (cy - y)**2) if dist < min_dist: min_dist = dist min_angle = np.arctan2(y - cy, cx - x) return min_dist < 18. and 0.1 * np.pi < min_angle and min_angle < 0.9 * np.pi def getActionsSet(self): """ Returns the set of all possible actions """ return [Action.ACCELERATE, Action.RIGHT, Action.LEFT, Action.BRAKE] def move(self, action): """ Executes the action and advances the game to the next state. Args: action (int): The action which should executed. Make sure to use the constants returned by self.getActionsSet() Returns: int: The obtained reward after executing the action """ if action == Action.ACCELERATE: self._relative_speed = min(self._relative_speed + 1, self._speed_range) elif action == Action.BRAKE: self._relative_speed = max(self._relative_speed - 1, -self._speed_range) return self._controller.move(action) def initialise(self, road, cars, speed, grid): """ Called at the beginning of each episode, mainly used for state initialisation. For more information on the arguments have a look at the README.md Args: road: 2-dimensional array containing [x, y] points in pixel coordinates of the road grid cars: dictionary which contains the location and the size of the agent and the opponents in pixel coordinates speed: the relative speed of the agent with respect the others gird: 2-dimensional numpy array containing the latest grid representation of the environment Returns: None """ raise NotImplementedError def act(self): """ Called at each loop iteration to choose and execute an action. Returns: None """ raise NotImplementedError def sense(self, road, cars, speed, grid): """ Called at each loop iteration to construct the new state from the update environment grid. For more information on the arguments have a look at the README.md Args: road: 2-dimensional array containing [x, y] points in pixel coordinates of the road grid cars: dictionary which contains the location and the size of the agent and the opponents in pixel coordinates speed: the relative speed of the agent with respect the others gird: 2-dimensional numpy array containing the latest grid representation of the environment Returns: None """ raise NotImplementedError def learn(self): """ Called at each loop iteration when the agent is learning. It should implement the learning procedure. Returns: None """ raise NotImplementedError def callback(self, learn, episode, iteration): """ Called at each loop iteration mainly for reporting purposes. Args: learn (bool): Indicates whether the agent is learning or not. episode (int): The number of the current episode. iteration (int): The number of the current iteration. Returns: None """ raise NotImplementedError