class emulator: def __init__(self, rom_name, vis): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_mum_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i print self.legal_actions self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: "+ str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): # numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) # self.ale.getScreenRGB(numpy_surface) # image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) image = self.ale.getScreenRGB() image = np.reshape(image, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image(), 0, False def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() if self.vis: cv2.imshow('preview', nextstate) return nextstate, reward, self.ale.game_over() def train(self): for episode in range(10): total_reward = 0 frame_number = 0 while not self.ale.game_over(): a = self.legal_actions[random.randrange(len(self.legal_actions))] # Apply an action and get the resulting reward reward = self.ale.act(a); total_reward += reward screen = self.ale.getScreenRGB() screen = np.array(screen).reshape([self.screen_height, self.screen_width, -1]) frame_number = self.ale.getEpisodeFrameNumber() cv2.imshow("screen", screen/255.0) cv2.waitKey(0) self.ale.saveScreenPNG("test_"+str(frame_number)+".png") print('Episode %d ended with score: %d' % (episode, total_reward)) print('Frame number is : ', frame_number) self.ale.reset_game()
class AtariEnvironment: def __init__(self, args, outputDir): self.outputDir = outputDir self.screenCaptureFrequency = args.screen_capture_freq self.ale = ALEInterface() self.ale.setInt(b'random_seed', 123456) random.seed(123456) # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo self.ale.setFloat(b'repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(args.rom) self.actionSet = self.ale.getMinimalActionSet() self.gameNumber = 0 self.stepNumber = 0 self.resetGame() def getNumActions(self): return len(self.actionSet) def getState(self): return self.state def getGameNumber(self): return self.gameNumber def getFrameNumber(self): return self.ale.getFrameNumber() def getEpisodeFrameNumber(self): return self.ale.getEpisodeFrameNumber() def getEpisodeStepNumber(self): return self.episodeStepNumber def getStepNumber(self): return self.stepNumber def getGameScore(self): return self.gameScore def isGameOver(self): return self.ale.game_over() def step(self, action): previousLives = self.ale.lives() reward = 0 isTerminal = 0 self.stepNumber += 1 self.episodeStepNumber += 1 for i in range(4): prevScreenRGB = self.ale.getScreenRGB() reward += self.ale.act(self.actionSet[action]) screenRGB = self.ale.getScreenRGB() # Detect end of episode, I don't think I'm handling this right in terms # of the overall game loop (??) if self.ale.lives() < previousLives or self.ale.game_over(): isTerminal = 1 break if self.gameNumber % self.screenCaptureFrequency == 0: dir = self.outputDir + '/screen_cap/game-%06d' % ( self.gameNumber) if not os.path.isdir(dir): os.makedirs(dir) self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())) maxedScreen = np.maximum(screenRGB, prevScreenRGB) self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber()) self.gameScore += reward return reward, self.state, isTerminal def resetGame(self): if self.ale.game_over(): self.gameNumber += 1 self.ale.reset_game() self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber()) self.gameScore = 0 self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number
class AtariEnvironment: def __init__(self, args, outputDir): self.outputDir = outputDir self.screenCaptureFrequency = args.screen_capture_freq self.ale = ALEInterface() self.ale.setInt(b'random_seed', 123456) random.seed(123456) # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo self.ale.setFloat(b'repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(args.rom) self.actionSet = self.ale.getMinimalActionSet() self.gameNumber = 0 self.stepNumber = 0 self.resetGame() def getNumActions(self): return len(self.actionSet) def getState(self): return self.state def getGameNumber(self): return self.gameNumber def getFrameNumber(self): return self.ale.getFrameNumber() def getEpisodeFrameNumber(self): return self.ale.getEpisodeFrameNumber() def getEpisodeStepNumber(self): return self.episodeStepNumber def getStepNumber(self): return self.stepNumber def getGameScore(self): return self.gameScore def isGameOver(self): return self.ale.game_over() def step(self, action): previousLives = self.ale.lives() reward = 0 isTerminal = 0 self.stepNumber += 1 self.episodeStepNumber += 1 for i in range(4): prevScreenRGB = self.ale.getScreenRGB() reward += self.ale.act(self.actionSet[action]) screenRGB = self.ale.getScreenRGB() # Detect end of episode, I don't think I'm handling this right in terms # of the overall game loop (??) if self.ale.lives() < previousLives or self.ale.game_over(): isTerminal = 1 break if self.gameNumber % self.screenCaptureFrequency == 0: dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber) if not os.path.isdir(dir): os.makedirs(dir) self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())) maxedScreen = np.maximum(screenRGB, prevScreenRGB) self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber()) self.gameScore += reward return reward, self.state, isTerminal def resetGame(self): if self.ale.game_over(): self.gameNumber += 1 self.ale.reset_game() self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber()) self.gameScore = 0 self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number