def main(): import time import numpy as np from learningALE.libs.ale_python_interface import ALEInterface # this script is used to try and find what ram index stores the number of lives for a game ale = ALEInterface(True) ale.loadROM(b'D:\\_code\\beam_rider.bin') (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getLegalActionSet() frameCount = 0 ramlist = list() st = time.time() for episode in range(1): total_reward = 0.0 while not ale.game_over(): a = legal_actions[np.random.randint(legal_actions.size)] ram = ale.getRAM() ramlist.append(ram) reward = ale.act(a) total_reward += reward frameCount += 1 print("Episode " + str(episode) + " ended with score: " + str(total_reward)) ale.reset_game() et = time.time() print(et - st, frameCount / (et - st)) import matplotlib.pyplot as plt ramarray = np.asarray(ramlist) w = np.where(ramarray > 3)[1] ramarray[:, w] = 0 plt.plot(ramarray) notZ = np.where(ramarray != 0)[1] unqNZ = np.unique(notZ) print(unqNZ)
def main(): import time import numpy as np from learningALE.libs.ale_python_interface import ALEInterface # this script is used to try and find what ram index stores the number of lives for a game ale = ALEInterface(True) ale.loadROM(b'D:\\_code\\beam_rider.bin') (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getLegalActionSet() frameCount = 0 ramlist = list() st = time.time() for episode in range(1): total_reward = 0.0 while not ale.game_over(): a = legal_actions[np.random.randint(legal_actions.size)] ram = ale.getRAM() ramlist.append(ram) reward = ale.act(a) total_reward += reward frameCount += 1 print("Episode " + str(episode) + " ended with score: " + str(total_reward)) ale.reset_game() et = time.time() print(et-st, frameCount/(et-st)) import matplotlib.pyplot as plt ramarray = np.asarray(ramlist) w = np.where(ramarray > 3)[1] ramarray[:, w] = 0 plt.plot(ramarray) notZ = np.where(ramarray != 0)[1] unqNZ = np.unique(notZ) print(unqNZ)
def main(): import os import pickle import time import lasagne import matplotlib.pyplot as plt import numpy as np from learners.nns import AlloEggoCnn from scipy.misc import imresize from learningALE.handlers import ActionHandler, ActionPolicy from learningALE.libs.ale_python_interface import ALEInterface dtype = np.float16 plt.ion() # set up emulator ale = ALEInterface(True) ale.loadROM(b'D:\\_code\\breakout.bin') (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getMinimalActionSet() lives = 5 # set up vars skipFrame = 4 actionHandler = ActionHandler(ActionPolicy.eGreedy, (0.1, 0.1, 2), legal_actions) scoreList = list() cnn = AlloEggoCnn() with open(os.getcwd() + '\saves\cnnbestalloego.pkl', 'rb') as fin: parms = pickle.load(fin) lasagne.layers.set_all_param_values(cnn.a_out, parms) frameCount = 0 st = time.time() for episode in range(100): total_reward = 0.0 while not ale.game_over(): # get frames frames = list() reward = 0 for frame in range(skipFrame): gamescreen = ale.getScreenRGB() processedImg = np.asarray(imresize(gamescreen.view( np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0], 0.5, interp='nearest'), dtype=dtype) / 255 frames.append(processedImg) performedAction, actionInd = actionHandler.getLastAction() rew = ale.act(performedAction) if rew > 0: reward += 1 ram = ale.getRAM() if ram[57] != lives: reward -= 1 lives = ram[57] frames = np.asarray(frames) actionVect = cnn.get_output(frames.reshape( (1, skipFrame, 105, 80)))[0] actionHandler.setAction(actionVect) total_reward += reward frameCount += 1 * skipFrame ale.reset_game() actionHandler.anneal() scoreList.append(total_reward) lives = 5 print("Episode " + str(episode) + " ended with score: " + str(total_reward)) et = time.time() print('Total Time:', et - st, 'Frame Count:', frameCount, 'FPS:', frameCount / (et - st)) plt.clf() plt.plot(scoreList, '.') plt.pause(0.01) plt.ioff()
frameCount = 0 st = time.time() for episode in range(1): total_reward = 0.0 while not ale.game_over(): # get a random action a = legal_actions[np.random.randint(legal_actions.size)] # get gamescreen and convert to usable format (Height x Width x Channels) gamescreen = ale.getScreenRGB() gamescreen = np.asarray(gamescreen.view(np.uint8).reshape( screen_height, screen_width, 4)[:, :, 0], dtype=np.float) # get ram ram = ale.getRAM() # take the action and get the reward reward = ale.act(a) total_reward += reward frameCount += 1 print("Episode " + str(episode) + " ended with score: " + str(total_reward)) # game over man game over, reset ale.reset_game() # end time count and print total time and FPS et = time.time() print(et - st, frameCount / (et - st))
reward += ale.act(a) total_reward += reward currrewards.append(reward) currstates.append(imresize(np.asarray(frames), 0.525, interp='nearest')) frames = np.swapaxes(np.asarray(frames),0,2) from pygame import surfarray frames = surfarray.make_surface(frames) screen.blit(pygame.transform.scale(frames, (screen_width*2, screen_height*2)),(0,0)) #get RAM ram_size = ale.getRAMSize() ram = np.zeros((ram_size),dtype=np.uint8) ale.getRAM(ram) #Display ram bytes font = pygame.font.SysFont("Ubuntu Mono",32) text = font.render("RAM: " ,1,(255,208,208)) screen.blit(text,(330,10)) font = pygame.font.SysFont("Ubuntu Mono",25) height = font.get_height()*1.2 line_pos = 40 ram_pos = 0 while(ram_pos < 128): ram_string = ''.join(["%02X "%ram[x] for x in range(ram_pos,min(ram_pos+16,128))]) text = font.render(ram_string,1,(255,255,255))
currrewards.append(reward) currstates.append(imresize(np.asarray(frames), 0.525, interp='nearest')) frames = np.swapaxes(np.asarray(frames), 0, 2) from pygame import surfarray frames = surfarray.make_surface(frames) screen.blit( pygame.transform.scale(frames, (screen_width * 2, screen_height * 2)), (0, 0)) #get RAM ram_size = ale.getRAMSize() ram = np.zeros((ram_size), dtype=np.uint8) ale.getRAM(ram) #Display ram bytes font = pygame.font.SysFont("Ubuntu Mono", 32) text = font.render("RAM: ", 1, (255, 208, 208)) screen.blit(text, (330, 10)) font = pygame.font.SysFont("Ubuntu Mono", 25) height = font.get_height() * 1.2 line_pos = 40 ram_pos = 0 while (ram_pos < 128): ram_string = ''.join( ["%02X " % ram[x] for x in range(ram_pos, min(ram_pos + 16, 128))]) text = font.render(ram_string, 1, (255, 255, 255))
def main(): import os import pickle import time import lasagne import matplotlib.pyplot as plt import numpy as np from learners.nns import AlloEggoCnn from scipy.misc import imresize from learningALE.handlers import ActionHandler, ActionPolicy from learningALE.libs.ale_python_interface import ALEInterface dtype = np.float16 plt.ion() # set up emulator ale = ALEInterface(True) ale.loadROM(b'D:\\_code\\breakout.bin') (screen_width, screen_height) = ale.getScreenDims() legal_actions = ale.getMinimalActionSet() lives = 5 # set up vars skipFrame = 4 actionHandler = ActionHandler(ActionPolicy.eGreedy, (0.1, 0.1, 2), legal_actions) scoreList = list() cnn = AlloEggoCnn() with open(os.getcwd() + '\saves\cnnbestalloego.pkl', 'rb') as fin: parms = pickle.load(fin) lasagne.layers.set_all_param_values(cnn.a_out, parms) frameCount = 0 st = time.time() for episode in range(100): total_reward = 0.0 while not ale.game_over(): # get frames frames = list() reward = 0 for frame in range(skipFrame): gamescreen = ale.getScreenRGB() processedImg = np.asarray( imresize(gamescreen.view(np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0], 0.5, interp='nearest'), dtype=dtype)/255 frames.append(processedImg) performedAction, actionInd = actionHandler.getLastAction() rew = ale.act(performedAction) if rew > 0: reward += 1 ram = ale.getRAM() if ram[57] != lives: reward -= 1 lives = ram[57] frames = np.asarray(frames) actionVect = cnn.get_output(frames.reshape((1, skipFrame, 105, 80)))[0] actionHandler.setAction(actionVect) total_reward += reward frameCount += 1*skipFrame ale.reset_game() actionHandler.anneal() scoreList.append(total_reward) lives = 5 print("Episode " + str(episode) + " ended with score: " + str(total_reward)) et = time.time() print('Total Time:', et-st, 'Frame Count:', frameCount, 'FPS:',frameCount/(et-st)) plt.clf() plt.plot(scoreList, '.') plt.pause(0.01) plt.ioff()
legal_actions = ale.getLegalActionSet() frameCount = 0 st = time.time() for episode in range(1): total_reward = 0.0 while not ale.game_over(): # get a random action a = legal_actions[np.random.randint(legal_actions.size)] # get gamescreen and convert to usable format (Height x Width x Channels) gamescreen = ale.getScreenRGB() gamescreen = np.asarray(gamescreen.view(np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0], dtype=np.float) # get ram ram = ale.getRAM() # take the action and get the reward reward = ale.act(a) total_reward += reward frameCount += 1 print("Episode " + str(episode) + " ended with score: " + str(total_reward)) # game over man game over, reset ale.reset_game() # end time count and print total time and FPS et = time.time() print(et-st, frameCount/(et-st))