def __init__(self, rom, frame_skip=4, show_rom=False): # set up emulator self.ale = ALEInterface(show_rom) self.ale.setInt(b'frame_skip', frame_skip) self.ale.loadROM(rom) # setup gamescreen object. I think this is faster than recreating an empty each time width, height = self.ale.getScreenDims() self.gamescreen = np.empty((height, width, 1), dtype=np.uint8)
def __init__(self, rom, show_rom, skip_frame, learner=None): # set up emulator self.ale = ALEInterface(show_rom) self.ale.loadROM(rom) (self.screen_width, self.screen_height) = self.ale.getScreenDims() legal_actions = self.ale.getMinimalActionSet() # set up vars self.skipFrame = skip_frame if learner: learner.set_legal_actions(legal_actions) self.total_frame_count = 0
def main(): import time import numpy as np from learningALE.libs.ale_python_interface import ALEInterface # this script is used to try and find what ram index stores the number of lives for a game ale = ALEInterface(True) ale.loadROM(b'D:\\_code\\beam_rider.bin') (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getLegalActionSet() frameCount = 0 ramlist = list() st = time.time() for episode in range(1): total_reward = 0.0 while not ale.game_over(): a = legal_actions[np.random.randint(legal_actions.size)] ram = ale.getRAM() ramlist.append(ram) reward = ale.act(a) total_reward += reward frameCount += 1 print("Episode " + str(episode) + " ended with score: " + str(total_reward)) ale.reset_game() et = time.time() print(et-st, frameCount/(et-st)) import matplotlib.pyplot as plt ramarray = np.asarray(ramlist) w = np.where(ramarray > 3)[1] ramarray[:, w] = 0 plt.plot(ramarray) notZ = np.where(ramarray != 0)[1] unqNZ = np.unique(notZ) print(unqNZ)
def main(): import os import pickle import time import lasagne import matplotlib.pyplot as plt import numpy as np from learners.nns import AlloEggoCnn from scipy.misc import imresize from learningALE.handlers import ActionHandler, ActionPolicy from learningALE.libs.ale_python_interface import ALEInterface dtype = np.float16 plt.ion() # set up emulator ale = ALEInterface(True) ale.loadROM(b'D:\\_code\\breakout.bin') (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getMinimalActionSet() lives = 5 # set up vars skipFrame = 4 actionHandler = ActionHandler(ActionPolicy.eGreedy, (0.1, 0.1, 2), legal_actions) scoreList = list() cnn = AlloEggoCnn() with open(os.getcwd() + '\saves\cnnbestalloego.pkl', 'rb') as fin: parms = pickle.load(fin) lasagne.layers.set_all_param_values(cnn.a_out, parms) frameCount = 0 st = time.time() for episode in range(100): total_reward = 0.0 while not ale.game_over(): # get frames frames = list() reward = 0 for frame in range(skipFrame): gamescreen = ale.getScreenRGB() processedImg = np.asarray(imresize(gamescreen.view( np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0], 0.5, interp='nearest'), dtype=dtype) / 255 frames.append(processedImg) performedAction, actionInd = actionHandler.getLastAction() rew = ale.act(performedAction) if rew > 0: reward += 1 ram = ale.getRAM() if ram[57] != lives: reward -= 1 lives = ram[57] frames = np.asarray(frames) actionVect = cnn.get_output(frames.reshape( (1, skipFrame, 105, 80)))[0] actionHandler.setAction(actionVect) total_reward += reward frameCount += 1 * skipFrame ale.reset_game() actionHandler.anneal() scoreList.append(total_reward) lives = 5 print("Episode " + str(episode) + " ended with score: " + str(total_reward)) et = time.time() print('Total Time:', et - st, 'Frame Count:', frameCount, 'FPS:', frameCount / (et - st)) plt.clf() plt.plot(scoreList, '.') plt.pause(0.01) plt.ioff()
__author__ = 'Ben' import time import numpy as np from learningALE.libs.ale_python_interface import ALEInterface """ This example is meant for those wanting to play around with GameHandler, or implement their own ALE interface. For people that want a plug and play interface use :class:`handlers.GameHandler` """ # start up the python ale interface ale = ALEInterface() # load a rom ale.loadROM(b'd:\_code\_reinforcementlearning\\breakout.bin') # screen dimensions and legal actions (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getLegalActionSet() frameCount = 0 st = time.time() for episode in range(1): total_reward = 0.0 while not ale.game_over(): # get a random action a = legal_actions[np.random.randint(legal_actions.size)] # get gamescreen and convert to usable format (Height x Width x Channels) gamescreen = ale.getScreenRGB()
12, #10100 fire left 15, #10101 fire up/left 17, #10110 fire down/left 15, #10111 fire up/down/left (invalid) 11, #11000 fire right 14, #11001 fire up/right 16, #11010 fire down/right 14, #11011 fire up/down/right (invalid) 11, #11100 fire left/right (invalid) 14, #11101 fire left/right/up (invalid) 16, #11110 fire left/right/down (invalid) 14 #11111 fire up/down/left/right (invalid) ) ale = ALEInterface(False) rom = b'D:\\_code\\montezuma_revenge.bin' ale.loadROM(rom) legal_actions = ale.getMinimalActionSet() print(legal_actions) (screen_width,screen_height) = ale.getScreenDims() print("width/height: " +str(screen_width) + "/" + str(screen_height)) (display_width,display_height) = (1024,420) #init pygame pygame.init() screen = pygame.display.set_mode((display_width,display_height)) pygame.display.set_caption("Arcade Learning Environment Player Agent Display")
class GameHandler: """ The :class:`GameHandler` class takes care of the interface between the ALE and the learner. Currently supported is the ability to display the ALE screen when running, skip x number of frames by repeating the last action, and configuring the dtype to convert the gamescreen to (default is float16 for space). Parameters ---------- rom : byte string Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin' show_rom : boolean Whether or not to show the game being played or not. True takes longer to run but can be fun to watch skip_frame : int Number of frames to skip using the last action chosen learner : :class:`learners.learner` Default None. The learner, on construction GameHandler will call set_legal_actions. If none then set_legal_actions needs to be called """ def __init__(self, rom, show_rom, skip_frame, learner=None): # set up emulator self.ale = ALEInterface(show_rom) self.ale.loadROM(rom) (self.screen_width, self.screen_height) = self.ale.getScreenDims() legal_actions = self.ale.getMinimalActionSet() # set up vars self.skipFrame = skip_frame if learner: learner.set_legal_actions(legal_actions) self.total_frame_count = 0 def run_one_game(self, learner, neg_reward=False, early_return=False, clip=True, max_episode_frame=np.inf): """ Runs a game until ale.game_over() is true. Currently does not support stopping at a specific frame count during an episode Parameters ---------- learner : :class:`learners.learner` Will call get_game_action and frames_processed. get_game_action must return a valid ALE action ind. frames_processed can be a pass. neg_reward : bool Default False. Whether or not to use negative rewards, recieved when agent looses a life. early_return : bool Default False. If set to true and neg_rewards is set then will return on first loss of life clip : bool Default True. Whether or not to clip positive rewards to 1 max_episode_frame : int Default np.inf. The maximum number of frames to run per episode Returns ------- int Total reward from game. Can be negative if neg_reward is true. """ total_reward = 0.0 gamescreen = None self.ale.reset_game() cur_lives = self.ale.lives() action_to_perform = 0 # initially set at zero because we start the game before asking the learner while not self.ale.game_over() and self.ale.getEpisodeFrameNumber() < max_episode_frame: # get frames frames = list() reward = 0 # loop over skip frame for frame in range(self.skipFrame): gamescreen = self.ale.getScreenGrayscale(gamescreen) # convert ALE gamescreen into usable image, scaled between 0 and 1 processedImg = imresize(gamescreen[33:-16, :, 0], 0.525, interp="nearest") frames.append(processedImg) # act on the action to perform, should be ALE compatible action ind rew = self.ale.act(action_to_perform) # clip positive rewards to 1 if rew > 0 and clip: reward += 1 else: reward += rew # if allowing negative rewards, see if lives has decreased if neg_reward: new_lives = self.ale.lives() if new_lives < cur_lives: reward -= 1 # losing a life is a negative 1 reward cur_lives = new_lives # end frame skip loop total_reward += reward frames = np.asarray(frames) # frames_processed must be here before action_to_perform gets overwritten. learner.frames_processed(frames, action_to_perform, reward) action_to_perform = learner.get_game_action() self.total_frame_count += 1 * self.skipFrame # if doing early return, end game on first loss of life if reward < 0 and early_return: return total_reward # end of game return total_reward def set_legal_actions(self, learner): learner.set_legal_actions(self.ale.getMinimalActionSet())
10, #10011 fire up/down (invalid) 12, #10100 fire left 15, #10101 fire up/left 17, #10110 fire down/left 15, #10111 fire up/down/left (invalid) 11, #11000 fire right 14, #11001 fire up/right 16, #11010 fire down/right 14, #11011 fire up/down/right (invalid) 11, #11100 fire left/right (invalid) 14, #11101 fire left/right/up (invalid) 16, #11110 fire left/right/down (invalid) 14 #11111 fire up/down/left/right (invalid) ) ale = ALEInterface(False) rom = b'D:\\_code\\montezuma_revenge.bin' ale.loadROM(rom) legal_actions = ale.getMinimalActionSet() print(legal_actions) (screen_width, screen_height) = ale.getScreenDims() print("width/height: " + str(screen_width) + "/" + str(screen_height)) (display_width, display_height) = (1024, 420) #init pygame pygame.init() screen = pygame.display.set_mode((display_width, display_height)) pygame.display.set_caption("Arcade Learning Environment Player Agent Display")
def main(): import time import numpy as np from learningALE.libs.ale_python_interface import ALEInterface # this script is used to try and find what ram index stores the number of lives for a game ale = ALEInterface(True) ale.loadROM(b'D:\\_code\\beam_rider.bin') (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getLegalActionSet() frameCount = 0 ramlist = list() st = time.time() for episode in range(1): total_reward = 0.0 while not ale.game_over(): a = legal_actions[np.random.randint(legal_actions.size)] ram = ale.getRAM() ramlist.append(ram) reward = ale.act(a) total_reward += reward frameCount += 1 print("Episode " + str(episode) + " ended with score: " + str(total_reward)) ale.reset_game() et = time.time() print(et - st, frameCount / (et - st)) import matplotlib.pyplot as plt ramarray = np.asarray(ramlist) w = np.where(ramarray > 3)[1] ramarray[:, w] = 0 plt.plot(ramarray) notZ = np.where(ramarray != 0)[1] unqNZ = np.unique(notZ) print(unqNZ)
def main(): import os import pickle import matplotlib.pyplot as plt from learningALE.learners.nns import CNN from scipy.misc import imresize from learningALE.handlers.actionhandler import ActionHandler, ActionPolicy from learningALE.libs.ale_python_interface import ALEInterface import lasagne import numpy as np # plt.ion() skipFrame = 3 cnn = CNN((None, skipFrame, 86, 80), 6, .1, stride=(4,2)) with open(os.getcwd()+'\datasets\\spccnn.pkl', 'rb') as infile: parms = pickle.load(infile) lasagne.layers.set_all_param_values(cnn.l_out, parms) # rom = b'D:\\_code\\breakout.bin' rom = b'D:\\_code\\space_invaders.bin' ale = ALEInterface(True) ale.loadROM(rom) (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getMinimalActionSet() # get labels labels = ['noop', 'fire', 'up', 'right', 'left', 'down', 'upright', 'upleft', 'downright', 'downleft', 'upfire', 'rightfire', 'leftfire', 'downfire', 'uprightfire' , 'upleftfire', 'downrightfire', 'downleftfire'] labels = np.asarray(labels)[legal_actions] # set up vars actionHandler = ActionHandler(ActionPolicy.eGreedy, (.1, .1, 2), legal_actions) rewList = list() for ep in range(100): total_reward = 0.0 trainCount = 0 ale.reset_game() while not ale.game_over(): # get frames frames = list() reward = 0 for frame in range(skipFrame): gamescreen = ale.getScreenRGB() processedImg = np.asarray( gamescreen.view(np.uint8).reshape(screen_height, screen_width, 4)[25:-12, :, 0], dtype=np.float32) processedImg[processedImg > 1] = 255 processedImg = imresize(processedImg, 0.5, interp='nearest')/255 frames.append(processedImg) performedAction, actionInd = actionHandler.getLastAction() rew = ale.act(performedAction) if rew > 0: rew = 1 reward += rew total_reward += reward frames = np.asarray(frames, dtype=np.float32) actionVect = cnn.get_output(frames.reshape((1, skipFrame, frames.shape[1], 80)))[0] actionHandler.setAction(actionVect) # hid1_act = cnn.get_hid1_act(frames.reshape((1, skip_frame, frames.shape[1], 80))) # hid2_act = cnn.get_hid2_act(frames.reshape((1, skip_frame, frames.shape[1], 80))) # for x in range(hid1_act.shape[1]): # plt.subplot(4,4,x+1) # plt.imshow(hid1_act[0,x], cmap=plt.cm.gray) # for x in range(hid2_act.shape[1]): # plt.subplot(6,6,x+1) # plt.imshow(hid2_act[0,x], cmap=plt.cm.gray) # plt.show() # plt.clf() # plt.plot(actionVect) # plt.xticks(range(len(labels)), labels) # plt.pause(0.001) rewList.append(total_reward) print(ep, total_reward) print(np.mean(rewList), np.std(rewList), np.max(rewList), np.min(rewList)) print(np.unique(rewList, return_counts=True)) plt.plot(rewList) plt.show()
def main(): import os import pickle import time import lasagne import matplotlib.pyplot as plt import numpy as np from learners.nns import AlloEggoCnn from scipy.misc import imresize from learningALE.handlers import ActionHandler, ActionPolicy from learningALE.libs.ale_python_interface import ALEInterface dtype = np.float16 plt.ion() # set up emulator ale = ALEInterface(True) ale.loadROM(b'D:\\_code\\breakout.bin') (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getMinimalActionSet() lives = 5 # set up vars skipFrame = 4 actionHandler = ActionHandler(ActionPolicy.eGreedy, (0.1, 0.1, 2), legal_actions) scoreList = list() cnn = AlloEggoCnn() with open(os.getcwd() + '\saves\cnnbestalloego.pkl', 'rb') as fin: parms = pickle.load(fin) lasagne.layers.set_all_param_values(cnn.a_out, parms) frameCount = 0 st = time.time() for episode in range(100): total_reward = 0.0 while not ale.game_over(): # get frames frames = list() reward = 0 for frame in range(skipFrame): gamescreen = ale.getScreenRGB() processedImg = np.asarray( imresize(gamescreen.view(np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0], 0.5, interp='nearest'), dtype=dtype)/255 frames.append(processedImg) performedAction, actionInd = actionHandler.getLastAction() rew = ale.act(performedAction) if rew > 0: reward += 1 ram = ale.getRAM() if ram[57] != lives: reward -= 1 lives = ram[57] frames = np.asarray(frames) actionVect = cnn.get_output(frames.reshape((1, skipFrame, 105, 80)))[0] actionHandler.setAction(actionVect) total_reward += reward frameCount += 1*skipFrame ale.reset_game() actionHandler.anneal() scoreList.append(total_reward) lives = 5 print("Episode " + str(episode) + " ended with score: " + str(total_reward)) et = time.time() print('Total Time:', et-st, 'Frame Count:', frameCount, 'FPS:',frameCount/(et-st)) plt.clf() plt.plot(scoreList, '.') plt.pause(0.01) plt.ioff()
__author__ = 'Ben' import time import numpy as np from learningALE.libs.ale_python_interface import ALEInterface """ This example is meant for those wanting to play around with GameHandler, or implement their own ALE interface. For people that want a plug and play interface use :class:`handlers.GameHandler` """ # start up the python ale interface ale = ALEInterface() # load a rom ale.loadROM(b'd:\_code\_reinforcementlearning\\breakout.bin') # screen dimensions and legal actions (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getLegalActionSet() frameCount = 0 st = time.time() for episode in range(1): total_reward = 0.0 while not ale.game_over(): # get a random action a = legal_actions[np.random.randint(legal_actions.size)]
def main(): import os import pickle import matplotlib.pyplot as plt from learningALE.learners.nns import CNN from scipy.misc import imresize from learningALE.handlers.actionhandler import ActionHandler, ActionPolicy from learningALE.libs.ale_python_interface import ALEInterface import lasagne import numpy as np # plt.ion() skipFrame = 3 cnn = CNN((None, skipFrame, 86, 80), 6, .1, stride=(4, 2)) with open(os.getcwd() + '\datasets\\spccnn.pkl', 'rb') as infile: parms = pickle.load(infile) lasagne.layers.set_all_param_values(cnn.l_out, parms) # rom = b'D:\\_code\\breakout.bin' rom = b'D:\\_code\\space_invaders.bin' ale = ALEInterface(True) ale.loadROM(rom) (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getMinimalActionSet() # get labels labels = [ 'noop', 'fire', 'up', 'right', 'left', 'down', 'upright', 'upleft', 'downright', 'downleft', 'upfire', 'rightfire', 'leftfire', 'downfire', 'uprightfire', 'upleftfire', 'downrightfire', 'downleftfire' ] labels = np.asarray(labels)[legal_actions] # set up vars actionHandler = ActionHandler(ActionPolicy.eGreedy, (.1, .1, 2), legal_actions) rewList = list() for ep in range(100): total_reward = 0.0 trainCount = 0 ale.reset_game() while not ale.game_over(): # get frames frames = list() reward = 0 for frame in range(skipFrame): gamescreen = ale.getScreenRGB() processedImg = np.asarray(gamescreen.view(np.uint8).reshape( screen_height, screen_width, 4)[25:-12, :, 0], dtype=np.float32) processedImg[processedImg > 1] = 255 processedImg = imresize(processedImg, 0.5, interp='nearest') / 255 frames.append(processedImg) performedAction, actionInd = actionHandler.getLastAction() rew = ale.act(performedAction) if rew > 0: rew = 1 reward += rew total_reward += reward frames = np.asarray(frames, dtype=np.float32) actionVect = cnn.get_output( frames.reshape((1, skipFrame, frames.shape[1], 80)))[0] actionHandler.setAction(actionVect) # hid1_act = cnn.get_hid1_act(frames.reshape((1, skip_frame, frames.shape[1], 80))) # hid2_act = cnn.get_hid2_act(frames.reshape((1, skip_frame, frames.shape[1], 80))) # for x in range(hid1_act.shape[1]): # plt.subplot(4,4,x+1) # plt.imshow(hid1_act[0,x], cmap=plt.cm.gray) # for x in range(hid2_act.shape[1]): # plt.subplot(6,6,x+1) # plt.imshow(hid2_act[0,x], cmap=plt.cm.gray) # plt.show() # plt.clf() # plt.plot(actionVect) # plt.xticks(range(len(labels)), labels) # plt.pause(0.001) rewList.append(total_reward) print(ep, total_reward) print(np.mean(rewList), np.std(rewList), np.max(rewList), np.min(rewList)) print(np.unique(rewList, return_counts=True)) plt.plot(rewList) plt.show()
class GameHandler: """ The :class:`GameHandler` class takes care of the interface between the ALE and the learner. Currently supported is the ability to display the ALE screen when running, skip x number of frames by repeating the last action, and configuring the dtype to convert the gamescreen to (default is float16 for space). Parameters ---------- rom : byte string Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin' show_rom : boolean Whether or not to show the game being played or not. True takes longer to run but can be fun to watch skip_frame : int Number of frames to skip using the last action chosen learner : :class:`learners.learner` Default None. The learner, on construction GameHandler will call set_legal_actions. If none then set_legal_actions needs to be called """ def __init__(self, rom, show_rom, skip_frame, learner=None): # set up emulator self.ale = ALEInterface(show_rom) self.ale.loadROM(rom) (self.screen_width, self.screen_height) = self.ale.getScreenDims() legal_actions = self.ale.getMinimalActionSet() # set up vars self.skipFrame = skip_frame if learner: learner.set_legal_actions(legal_actions) self.total_frame_count = 0 def run_one_game(self, learner, neg_reward=False, early_return=False, clip=True, max_episode_frame=np.inf): """ Runs a game until ale.game_over() is true. Parameters ---------- learner : :class:`learners.learner` Will call get_game_action and frames_processed. get_game_action must return a valid ALE action ind. frames_processed can be a pass. neg_reward : bool Default False. Whether or not to use negative rewards, recieved when agent looses a life. early_return : bool Default False. If set to true and neg_rewards is set then will return on first loss of life clip : bool Default True. Whether or not to clip positive rewards to 1 max_episode_frame : int Default np.inf. The maximum number of frames to run per episode Returns ------- int Total reward from game. Can be negative if neg_reward is true. """ total_reward = 0.0 gamescreen = None self.ale.reset_game() cur_lives = self.ale.lives() action_to_perform = 0 # initially set at zero because we start the game before asking the learner while not self.ale.game_over() and self.ale.getEpisodeFrameNumber() < max_episode_frame: # get frames frames = list() reward = 0 # loop over skip frame for frame in range(self.skipFrame): gamescreen = self.ale.getScreenGrayscale(gamescreen) # convert ALE gamescreen into usable image, scaled between 0 and 1 processedImg = imresize(gamescreen[33:-16, :, 0], 0.525, interp='nearest') frames.append(processedImg) # act on the action to perform, should be ALE compatible action ind rew = self.ale.act(action_to_perform) # clip positive rewards to 1 if rew > 0 and clip: reward += 1 else: reward += rew # if allowing negative rewards, see if lives has decreased if neg_reward: new_lives = self.ale.lives() if new_lives < cur_lives: reward -= 1 # losing a life is a negative 1 reward cur_lives = new_lives # end frame skip loop total_reward += reward frames = np.asarray(frames) # frames_processed must be here before action_to_perform gets overwritten. learner.frames_processed(frames, action_to_perform, reward) action_to_perform = learner.get_game_action() self.total_frame_count += 1 * self.skipFrame # if doing early return, end game on first loss of life if reward < 0 and early_return: return total_reward # end of game return total_reward def set_legal_actions(self, learner): learner.set_legal_actions(self.ale.getMinimalActionSet()) def get_legal_actions(self): return self.ale.getMinimalActionSet()
class MinimalGameHandler: """ The :class:`MinimalGameHandler` class takes care of the interface to the ALE and tries to do nothing else. It's meant for advanced users who need fine control over every aspect of the process. It has many functions that are simply wrappers of the underlying ALE but with pythonic names/usage. Parameters ---------- rom : byte string Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin' frame_skip : int Default 4. Number of frames to skip inbetween action. This sets frame skip on the Stella environment show_rom : boolean Default False. Whether or not to show the game. True takes longer to run but can be fun to watch """ def __init__(self, rom, frame_skip=4, show_rom=False): # set up emulator self.ale = ALEInterface(show_rom) self.ale.setInt(b'frame_skip', frame_skip) self.ale.loadROM(rom) # setup gamescreen object. I think this is faster than recreating an empty each time width, height = self.ale.getScreenDims() self.gamescreen = np.empty((height, width, 1), dtype=np.uint8) def reset(self): self.ale.reset_game() def step(self, action, clip=None): reward = 0 if clip is not None: reward += np.clip(self.ale.act(action), 0, clip) else: reward += self.ale.act(action) return reward def get_gamescreen(self, converted=True): self.gamescreen = self.ale.getScreenGrayscale(self.gamescreen) if converted: # convert ALE gamescreen into 84x84 image processedImg = imresize(self.gamescreen[33:-16, :, 0], 0.525, interp='nearest') return processedImg else: # we return a copy here because converted returns a copy also. And its easier for exp replay return np.copy(self.gamescreen) def get_game_over(self): return self.ale.game_over() def get_legal_actions(self): return self.ale.getMinimalActionSet()