def __init__(self, rom, frame_skip=4, show_rom=False): # set up emulator self.ale = ALEInterface(show_rom) self.ale.setInt(b'frame_skip', frame_skip) self.ale.loadROM(rom) # setup gamescreen object. I think this is faster than recreating an empty each time width, height = self.ale.getScreenDims() self.gamescreen = np.empty((height, width, 1), dtype=np.uint8)
def __init__(self, rom, show_rom, skip_frame, learner=None): # set up emulator self.ale = ALEInterface(show_rom) self.ale.loadROM(rom) (self.screen_width, self.screen_height) = self.ale.getScreenDims() legal_actions = self.ale.getMinimalActionSet() # set up vars self.skipFrame = skip_frame if learner: learner.set_legal_actions(legal_actions) self.total_frame_count = 0
def main(): import time import numpy as np from learningALE.libs.ale_python_interface import ALEInterface # this script is used to try and find what ram index stores the number of lives for a game ale = ALEInterface(True) ale.loadROM(b'D:\\_code\\beam_rider.bin') (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getLegalActionSet() frameCount = 0 ramlist = list() st = time.time() for episode in range(1): total_reward = 0.0 while not ale.game_over(): a = legal_actions[np.random.randint(legal_actions.size)] ram = ale.getRAM() ramlist.append(ram) reward = ale.act(a) total_reward += reward frameCount += 1 print("Episode " + str(episode) + " ended with score: " + str(total_reward)) ale.reset_game() et = time.time() print(et - st, frameCount / (et - st)) import matplotlib.pyplot as plt ramarray = np.asarray(ramlist) w = np.where(ramarray > 3)[1] ramarray[:, w] = 0 plt.plot(ramarray) notZ = np.where(ramarray != 0)[1] unqNZ = np.unique(notZ) print(unqNZ)
def main(): import os import pickle import time import lasagne import matplotlib.pyplot as plt import numpy as np from learners.nns import AlloEggoCnn from scipy.misc import imresize from learningALE.handlers import ActionHandler, ActionPolicy from learningALE.libs.ale_python_interface import ALEInterface dtype = np.float16 plt.ion() # set up emulator ale = ALEInterface(True) ale.loadROM(b'D:\\_code\\breakout.bin') (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getMinimalActionSet() lives = 5 # set up vars skipFrame = 4 actionHandler = ActionHandler(ActionPolicy.eGreedy, (0.1, 0.1, 2), legal_actions) scoreList = list() cnn = AlloEggoCnn() with open(os.getcwd() + '\saves\cnnbestalloego.pkl', 'rb') as fin: parms = pickle.load(fin) lasagne.layers.set_all_param_values(cnn.a_out, parms) frameCount = 0 st = time.time() for episode in range(100): total_reward = 0.0 while not ale.game_over(): # get frames frames = list() reward = 0 for frame in range(skipFrame): gamescreen = ale.getScreenRGB() processedImg = np.asarray(imresize(gamescreen.view( np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0], 0.5, interp='nearest'), dtype=dtype) / 255 frames.append(processedImg) performedAction, actionInd = actionHandler.getLastAction() rew = ale.act(performedAction) if rew > 0: reward += 1 ram = ale.getRAM() if ram[57] != lives: reward -= 1 lives = ram[57] frames = np.asarray(frames) actionVect = cnn.get_output(frames.reshape( (1, skipFrame, 105, 80)))[0] actionHandler.setAction(actionVect) total_reward += reward frameCount += 1 * skipFrame ale.reset_game() actionHandler.anneal() scoreList.append(total_reward) lives = 5 print("Episode " + str(episode) + " ended with score: " + str(total_reward)) et = time.time() print('Total Time:', et - st, 'Frame Count:', frameCount, 'FPS:', frameCount / (et - st)) plt.clf() plt.plot(scoreList, '.') plt.pause(0.01) plt.ioff()
__author__ = 'Ben' import time import numpy as np from learningALE.libs.ale_python_interface import ALEInterface """ This example is meant for those wanting to play around with GameHandler, or implement their own ALE interface. For people that want a plug and play interface use :class:`handlers.GameHandler` """ # start up the python ale interface ale = ALEInterface() # load a rom ale.loadROM(b'd:\_code\_reinforcementlearning\\breakout.bin') # screen dimensions and legal actions (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getLegalActionSet() frameCount = 0 st = time.time() for episode in range(1): total_reward = 0.0 while not ale.game_over(): # get a random action a = legal_actions[np.random.randint(legal_actions.size)] # get gamescreen and convert to usable format (Height x Width x Channels) gamescreen = ale.getScreenRGB()
10, #10011 fire up/down (invalid) 12, #10100 fire left 15, #10101 fire up/left 17, #10110 fire down/left 15, #10111 fire up/down/left (invalid) 11, #11000 fire right 14, #11001 fire up/right 16, #11010 fire down/right 14, #11011 fire up/down/right (invalid) 11, #11100 fire left/right (invalid) 14, #11101 fire left/right/up (invalid) 16, #11110 fire left/right/down (invalid) 14 #11111 fire up/down/left/right (invalid) ) ale = ALEInterface(False) rom = b'D:\\_code\\montezuma_revenge.bin' ale.loadROM(rom) legal_actions = ale.getMinimalActionSet() print(legal_actions) (screen_width, screen_height) = ale.getScreenDims() print("width/height: " + str(screen_width) + "/" + str(screen_height)) (display_width, display_height) = (1024, 420) #init pygame pygame.init() screen = pygame.display.set_mode((display_width, display_height)) pygame.display.set_caption("Arcade Learning Environment Player Agent Display")
def main(): import os import pickle import matplotlib.pyplot as plt from learningALE.learners.nns import CNN from scipy.misc import imresize from learningALE.handlers.actionhandler import ActionHandler, ActionPolicy from learningALE.libs.ale_python_interface import ALEInterface import lasagne import numpy as np # plt.ion() skipFrame = 3 cnn = CNN((None, skipFrame, 86, 80), 6, .1, stride=(4, 2)) with open(os.getcwd() + '\datasets\\spccnn.pkl', 'rb') as infile: parms = pickle.load(infile) lasagne.layers.set_all_param_values(cnn.l_out, parms) # rom = b'D:\\_code\\breakout.bin' rom = b'D:\\_code\\space_invaders.bin' ale = ALEInterface(True) ale.loadROM(rom) (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getMinimalActionSet() # get labels labels = [ 'noop', 'fire', 'up', 'right', 'left', 'down', 'upright', 'upleft', 'downright', 'downleft', 'upfire', 'rightfire', 'leftfire', 'downfire', 'uprightfire', 'upleftfire', 'downrightfire', 'downleftfire' ] labels = np.asarray(labels)[legal_actions] # set up vars actionHandler = ActionHandler(ActionPolicy.eGreedy, (.1, .1, 2), legal_actions) rewList = list() for ep in range(100): total_reward = 0.0 trainCount = 0 ale.reset_game() while not ale.game_over(): # get frames frames = list() reward = 0 for frame in range(skipFrame): gamescreen = ale.getScreenRGB() processedImg = np.asarray(gamescreen.view(np.uint8).reshape( screen_height, screen_width, 4)[25:-12, :, 0], dtype=np.float32) processedImg[processedImg > 1] = 255 processedImg = imresize(processedImg, 0.5, interp='nearest') / 255 frames.append(processedImg) performedAction, actionInd = actionHandler.getLastAction() rew = ale.act(performedAction) if rew > 0: rew = 1 reward += rew total_reward += reward frames = np.asarray(frames, dtype=np.float32) actionVect = cnn.get_output( frames.reshape((1, skipFrame, frames.shape[1], 80)))[0] actionHandler.setAction(actionVect) # hid1_act = cnn.get_hid1_act(frames.reshape((1, skip_frame, frames.shape[1], 80))) # hid2_act = cnn.get_hid2_act(frames.reshape((1, skip_frame, frames.shape[1], 80))) # for x in range(hid1_act.shape[1]): # plt.subplot(4,4,x+1) # plt.imshow(hid1_act[0,x], cmap=plt.cm.gray) # for x in range(hid2_act.shape[1]): # plt.subplot(6,6,x+1) # plt.imshow(hid2_act[0,x], cmap=plt.cm.gray) # plt.show() # plt.clf() # plt.plot(actionVect) # plt.xticks(range(len(labels)), labels) # plt.pause(0.001) rewList.append(total_reward) print(ep, total_reward) print(np.mean(rewList), np.std(rewList), np.max(rewList), np.min(rewList)) print(np.unique(rewList, return_counts=True)) plt.plot(rewList) plt.show()