class game(object): def __init__(self, display): self.ale = ALEInterface() # Get & Set the desired settings self.ale.setInt('random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = display if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) # Load the ROM file self.ale.loadROM("ms_pacman.bin") def act(self, action): return self.ale.act(action) def getState(self): return get_feature(self.ale.getScreen()) def getScreen(self): return self.ale.getScreen() def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def game_over(self): return self.ale.game_over()
# ale.setBool('display_screen', True) # Load the ROM file ale.loadROM('Breakout.bin') # Get the list of legal actions # legal_actions = ale.getLegalActionSet() legal_actions = ale.getMinimalActionSet() print legal_actions # (screen_width,screen_height) = ale.getScreenDims() # screen_data = np.zeros(screen_width*screen_height,dtype=np.uint32) # ale.getScreenRGB(screen_data) (screen_width, screen_height) = ale.getScreenDims() screen_data = np.zeros(screen_width * screen_height, dtype=np.uint8) print type(ale.getScreen(screen_data)) # Play 10 episodes for episode in xrange(10): total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] # Apply an action and get the resulting reward reward = ale.act(a) print reward total_reward += reward print 'Episode', episode, 'ended with score:', total_reward ale.reset_game()
cv2.imshow("ImageGray", observation) cv2.waitKey(10) while frameCount is not maxFrame: ale.reset_game() score = 0 cost_average = 0.0 frameCountLast = frameCount t0 = time.time() t1s = t2s = t3s = t4s = t5s = t6s = t7s = t8s = t9s = 0 while not ale.game_over(): t00 = time.time() imgBinary = Scale(ale.getScreen()) t1 = time.time() t1s += t1 - t00 if np.random.rand(1) > explorationRate: [actionIndex, actionValue] = forward([imgBinary], Q_train, all=False) else: actionIndex = randrange(len(legal_actions)) # get action t2 = time.time() t2s += t2 - t1 reward = ale.act(legal_actions[actionIndex]) # reward
State1 = np.zeros([batchSize,network_size]) Action0 = np.zeros([batchSize]) Reward0 = np.zeros([batchSize]) for episode in xrange(maxEpisode): ale.reset_game() score = 0 cost_average = 0.0 frameCountLast = frameCount t0 = time.time() while not ale.game_over(): imgBinary = Scale(ale.getScreen()) if np.random.rand(1) > explorationRate: [actionIndex, actionValue] = forward([imgBinary],Q_train, all=False) else: actionIndex = randrange(len(legal_actions)) # get action reward = ale.act(legal_actions[actionIndex]) # reward memory.append([imgBinary,actionIndex,reward]) score += reward if frameCount >= startLearningFrame -1: index = np.random.permutation(len(memory) - 1)[0:batchSize] for i in xrange(batchSize): State0[i,:] = memory[index[i]][0] State1[i,:] = memory[index[i]+1][0] Action0[i] = memory[index[i]][1]
def forward(input, all = False): actionValues = sess.run(y, feed_dict={x: input}) if all is True: return actionValues actionValue_max= np.max(actionValues) index = np.argmax(actionValues,axis = 1) return [index, actionValue_max] ale = ALEInterface() ale.loadROM("Breakout.A26") legal_actions = ale.getLegalActionSet() img = ale.getScreen() actionIndex = forward(img) reward = ale.act(legal_actions(actionIndex)) # Get & Set the desired settings ale.setInt('random_seed', 123) ale.setInt("frame_skip",frameSkip) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame
f = open(file_name, "r") bg = [] while True: c = f.read(1) if not c: break if c == '*': bg.append(0) if c == ';': bg.append(144) if c == '|': bg.append(74) f.close() return bg for episode in xrange(200): total_reward = 0 k = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] # Apply an action and get the resulting reward reward = ale.act(a) if reward > 10: print "happened" screen = ale.getScreen() printScreen4(screen) total_reward += reward print 'Episode', episode, 'ended with score:', total_reward ale.reset_game()
terminal = 1 # t0s = t1s = t2s = t3s = t4s =t5s =t6s=t7s= 0 ale.reset_game() trainThread = threading.Thread(target=train) trainThread.start() for frameCount in xrange(maxFrame): t00 = time.time() lives = ale.lives() observe = Scale(ale.getScreen()) # 0.08s if terminal: actionIndex = 1 # # a random start # ale.act(1) # for i in xrange(np.random.randint(0,maxRandomStartFrame)): # ale.act(np.random.randint(len(legal_actions))) # ale.act(0) # actionIndex = 0 else: if np.random.rand(1) > explorationRate: [actionIndex, actionValue] = forward([np.transpose(memory.History,[1,2,0])],Q_train, all=False) else: actionIndex = np.random.randint(len(legal_actions)) # get action
saver.restore(sess, loadModelPath) def forward(input, all=False): actionValues = sess.run(y, feed_dict={x: input}) if all is True: return actionValues actionValue_max = np.max(actionValues) index = np.argmax(actionValues, axis=1) return [index, actionValue_max] ale = ALEInterface() ale.loadROM("Breakout.A26") legal_actions = ale.getLegalActionSet() img = ale.getScreen() actionIndex = forward(img) reward = ale.act(legal_actions(actionIndex)) # Get & Set the desired settings ale.setInt('random_seed', 123) ale.setInt("frame_skip", frameSkip) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init()
scoreEpisode = 0.0 cost_average = 0.0 frameCount = 0 frameCountLast = frameCount terminal = 1 t0s = t1s = t2s = t3s = t4s =t5s =t6s=t7s= 0 ale.reset_game() for frameCount in xrange(maxFrame): # t00 = time.time() lives = ale.lives() observe = Scale(ale.getScreen()) # 0.08s # t1 = time.time() # t1s += t1 - t00 if terminal: actionIndex = 1 else: if np.random.rand(1) > explorationRate: [actionIndex, actionValue] = forward([np.transpose(memory.History,[1,2,0])],Q_train, all=False) else: actionIndex = np.random.randint(len(legal_actions)) # get action # t2 = time.time() # t2s += t2 - t1
class Game: def __init__(self, state_height, state_width, display_screen=False): self.ale = ALEInterface() self.ale.setInt("frame_skip", 4) self.ale.setInt("random_seed", 123) self.ale.setBool("display_screen", display_screen) self.ale.loadROM("roms/breakout.bin") self.actions = self.ale.getMinimalActionSet() self.score = 0 self.actions_len = len(self.actions) self.screen_width, self.screen_height = self.ale.getScreenDims() self.state_width = state_width self.state_height = state_height self.state_len = self.state_width * self.state_height self.make_move(self.actions[0]) self.make_move(self.actions[1]) def get_state(self): screen_data = np.zeros(self.screen_width*self.screen_height,dtype=np.uint8) self.ale.getScreen(screen_data) screen_data_2D = np.reshape(screen_data, (self.screen_height, self.screen_width)) resized_screen_data_2D = imresize( screen_data_2D, (self.state_height, self.state_width)) resized_screen_data = np.reshape( resized_screen_data_2D, self.state_width * self.state_height) return resized_screen_data.astype(dtype=np.float32) / 255.0 def get_state_dims(self): return (self.state_width, self.state_height, 1) def save_state_to_img(self, fn): screen_data = np.zeros(self.screen_width*self.screen_height,dtype=np.uint8) self.ale.getScreen(screen_data) screen_data_2D = np.reshape(screen_data, (self.screen_height, self.screen_width)) resized_screen_data_2D = imresize( screen_data_2D, (self.state_height, self.state_width)) imsave(fn, resized_screen_data_2D) def make_move(self, action): r = self.ale.act(action) self.score += r return r def reset_game(self): self.ale.reset_game() self.score = 0 self.make_move(self.actions[0]) def game_over(self): return self.ale.game_over() def play(self): while True: while not self.game_over(): self.make_move(self.actions[np.random.randint(0, len(self.actions))]) print("Game Over! Score: %s" % self.score) self.reset_game() def play_interactive(self): """ play using 0,1,2,3 save using 8 """ buf = [] while True: S = self.get_state() a = int(raw_input()) if(a == 8): with open("data.pickle", "w") as f: pickle.dump(buf, f) break if(a > 3 or a is None): continue r = self.make_move(self.actions[a]) S_ = self.get_state() terminal = self.game_over() if terminal: self.reset_game() buf.append((S, a, r, S_, terminal))
def train_agent(gamepath, agent, n_episodes, display_screen, record_weights, reduce_exploration_prob_amount, n_frames_to_skip): """ :description: trains an agent to play a game :type gamepath: string :param gamepath: path to the binary of the game to be played :type agent: subclass RLAlgorithm :param agent: the algorithm/agent that learns to play the game :type n_episodes: int :param n_episodes: number of episodes of the game on which to train """ # load the ale interface to interact with ale = ALEInterface() ale.setInt('random_seed', 42) # display/recording settings, doesn't seem to work currently #recordings_dir = './recordings/breakout/' # previously "USE_SDL" if display_screen: if sys.platform == 'darwin': print 'darwin' import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX #ale.setString("record_screen_dir", recordings_dir); elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) ale.loadROM(gamepath) ale.setInt("frame_skip", n_frames_to_skip) screen_preprocessor = screen_utils.RGBScreenPreprocessor() screen_dims = ale.getScreenDims() print screen_dims rewards = [] best_reward = 0 print('starting training...') for episode in xrange(n_episodes): action = 0 reward = 0 newAction = None total_reward = 0 counter = 0 lives = ale.lives() screen = np.zeros((160 * 210), dtype=np.int8) #np.zeros((32, 32, 3), dtype=np.int8) state = { "screen": screen, #"objects" : None, #"prev_objects": None, #"prev_action": 0, "action": 0 } if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights: video = cv2.VideoWriter( 'video/episode-{}-{}-video.avi'.format(episode, agent.name), cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 24, screen_dims) start = time.time() while not ale.game_over(): # if newAction is None then we are training an off-policy algorithm # otherwise, we are training an on policy algorithm if newAction is None: action = agent.getAction(state) else: action = newAction reward += ale.act(action) if ale.lives() < lives: lives = ale.lives() #reward -= 1 total_reward += reward new_screen = ale.getScreen() #getScreenRGB() #print screen.shape, new_screen.shape if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights: video.write(ale.getScreenRGB()) #new_screen = screen_preprocessor.preprocess(new_screen) new_state = { "screen": new_screen, #"objects": None, #"prev_objects": state["objects"], #"prev_action": state["action"], "action": action } if counter % (n_frames_to_skip + 1) == 0: newAction = agent.incorporateFeedback(state, action, reward, new_state) state = new_state reward = 0 counter += 1 end = time.time() rewards.append(total_reward) if agent.explorationProb > MINIMUM_EXPLORATION_EPSILON: agent.explorationProb -= reduce_exploration_prob_amount print('episode: {}, score: {}, number of frames: {}, time: {:.4f}m'. format(episode, total_reward, counter, (end - start) / 60)) if total_reward > best_reward and record_weights: best_reward = total_reward print("Best reward: {}".format(total_reward)) if episode % PRINT_TRAINING_INFO_PERIOD == 0: print '\n############################' print '### training information ###' print("Average reward: {}".format(np.mean(rewards))) print("Last 50: {}".format( np.mean(rewards[-NUM_EPISODES_AVERAGE_REWARD_OVER:]))) print("Exploration probability: {}".format(agent.explorationProb)) #print('action: {}'.format(action)) print('size of weights dict: {}'.format(len(agent.weights))) #print('current objects: {}'.format(state['objects'])) #print('previous objects: {}'.format(state['prev_objects'])) weights = [v for k, v in agent.weights.iteritems()] min_feat_weight = min(weights) max_feat_weight = max(weights) avg_feat_weight = np.mean(weights) print('min feature weight: {}'.format(min_feat_weight)) print('max feature weight: {}'.format(max_feat_weight)) print('average feature weight: {}'.format(avg_feat_weight)) print '############################' print '############################\n' if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights: file_utils.save_rewards(rewards, filename='{}-rewards'.format(agent.name)) file_utils.save_weights(agent.weights, filename='episode-{}-{}-weights'.format( episode, agent.name)) video.release() ale.reset_game() return rewards
class MsPacManGame(object): """Ms. Pac-Man Arcade Learning Environment wrapper class.""" def __init__(self, seed, display): """Constructs a MsPacManGame. Args: seed: Initial random seed, randomized when None. display: Whether to display onto the screen or not. """ self._ale = ALEInterface() if seed is None: seed = random.randint(0, 255) self._ale.setInt("random_seed", seed) if display: if sys.platform == "darwin": # Use PyGame in macOS. import pygame pygame.init() # Sound doesn't work on macOS. self._ale.setBool("sound", False) elif sys.platform.startswith("linux"): self._ale.setBool("sound", True) self._ale.setBool("display_screen", True) self._ale.loadROM("MS_PACMAN.BIN") self._reward = 0 self._raw_ms_pacman_position = (0, 0) self.__screen = self._ale.getScreen() self.__ram = self._ale.getRAM() self._lives = self._ale.lives() self._update_state() self._go_to((94, 98), 3) @property def lives(self): """Current lives remaining.""" return self._lives @property def reward(self): """Current total reward.""" return self._reward @property def map(self): """Current game map.""" return self._map @property def sliced_map(self): """Current game slice map.""" return self._sliced_map @property def ms_pacman_position(self): """Ms. PacMan's position as a map index.""" return self._ms_pacman_position @property def fruit(self): """Fruit.""" return self._fruit @property def ghosts(self): """List of ghosts.""" return self._ghosts def available_actions(self): """Returns a list of available actions to consider.""" actions = [] for action, move in [ (2, (-1, 0)), # up (3, (0, 1)), # right (4, (0, -1)), # left (5, (1, 0)) # down ]: new_pos = self.get_next_position(self._ms_pacman_position, move) if 0 <= new_pos[0] < GameMap.HEIGHT: if self._map.map[new_pos] != GameMapObjects.WALL: actions.append(action) return actions def action_to_move(self, action): return [(-1, 0), (0, 1), (0, -1), (1, 0)][action - 2] def get_next_position(self, curr_position, move): new_pos = (curr_position[0] + move[0], curr_position[1] + move[1]) if new_pos[1] < 0: new_pos = (new_pos[0], new_pos[1] + GameMap.WIDTH) elif new_pos[1] >= GameMap.WIDTH: new_pos = (new_pos[0], new_pos[1] - GameMap.WIDTH) return new_pos def act(self, action): """Plays a given action in the game. Args: action: Action to play. Returns: Partial reward gained since last action. """ m = self.action_to_move(action) next_pos = self.get_next_position(self._ms_pacman_position, m) old_reward = self._reward old_lives = self._lives expected_reward = GameMapObjects.to_reward(self._map.map[next_pos]) MAX_ACTION_COUNT = 20 for _ in range(MAX_ACTION_COUNT): if expected_reward <= 0: if self._ms_pacman_position == next_pos: break elif self._reward != old_reward: break if self.game_over() or self._lives < old_lives: return GameMapObjects.to_reward(GameMapObjects.BAD_GHOST) self._reward += self._ale.act(action) self._update_state() self._update_map() return self._reward - old_reward def _go_to(self, raw_pos, action): """Goes to a given position.""" while (abs(self._raw_ms_pacman_position[0] - raw_pos[0]) > 1 or abs(self._raw_ms_pacman_position[1] - raw_pos[1]) > 1): self._ale.act(action) self._update_state() self._update_map() def game_over(self): """Returns whether the game reached a terminal state or not.""" return self._ale.game_over() def reset_game(self): """Resets the game to the initial state.""" self._reward = 0 return self._ale.reset_game() def _to_map_position(self, pos): """Converts a RAM coordinate into a map coordinate. Args: pos: (x, y) coordinates from RAM. Returns: Map index coordinate. """ x, y = pos i = round((y - 2) / 12.0) if x < 83: j = round((x - 18) / 8.0 + 1) elif 93 < x < 169: j = round((x - 22) / 8.0 + 1) elif x > 169: j = 0 elif x < 88: j = 9 else: j = 10 return i, j def _to_raw_position(self, pos): i, j = pos y = i * 12 + 2 if j == 0: x = 12 elif j <= 9: x = (j - 1) * 8 + 18 else: x = (j - 1) * 8 + 22 return x, y def _update_state(self): """Updates the internal state of the game.""" # Get new states from RAM. self._ale.getRAM(self.__ram) new_ms_pacman_position = (int(self.__ram[10]), int(self.__ram[16])) new_ghosts_ram = [ ((int(self.__ram[6]), int(self.__ram[12])), int(self.__ram[1])), ((int(self.__ram[7]), int(self.__ram[13])), int(self.__ram[2])), ((int(self.__ram[8]), int(self.__ram[14])), int(self.__ram[3])), ((int(self.__ram[9]), int(self.__ram[15])), int(self.__ram[4])) ] fruit = (int(self.__ram[11]), int(self.__ram[17])), int(self.__ram[5]) self._fruit = Fruit.from_ram(self._to_map_position(fruit[0]), fruit[1], fruit[0][0] != 0) # Update positions. self._raw_ms_pacman_position = new_ms_pacman_position self._ms_pacman_position = self._to_map_position( new_ms_pacman_position) self._ghosts = [ Ghost.from_ram(self._to_map_position(pos), ram) for pos, ram in new_ghosts_ram ] # Update lives. self._lives = self._ale.lives() def _update_map(self): # Get new map from screen. self._ale.getScreen(self.__screen) self._map = GameMap(self.__screen.reshape(210, 160)) self._blank_map = GameMap.from_map(self._map.map.copy()) self._map.map[self._ms_pacman_position] = GameMapObjects.MS_PACMAN if self._fruit.exists: self._map.map[self._fruit.position] = GameMapObjects.FRUIT for ghost in self._ghosts: if ghost.state == Ghost.GOOD: self._map.map[ghost.position] = GameMapObjects.GOOD_GHOST elif ghost.state == Ghost.BAD: self._map.map[ghost.position] = GameMapObjects.BAD_GHOST self._sliced_map = SlicedGameMap(self._map, self._ms_pacman_position)
ram_size = ale.getRAMSize() ram=np.zeros((ram_size),dtype=np.uint8) ale.getRAM(ram) print ram[54:108] (screen_width,screen_height) =ale.getScreenDims() screen_data=np.zeros(screen_width*screen_height,dtype=np.uint32) legal_actions = ale.getLegalActionSet() # Play 10 episodes for episode in xrange(10): total_reward = 0 a=4 while not ale.game_over(): time.sleep(0.1) a = legal_actions[randrange(len(legal_actions))] reward = ale.act(a); total_reward += reward temp = [i for i in ram] ale.getRAM(ram) #print ram # print [temp[i]-ram[i] for i in range(len(ram))] ale.getScreen(screen_data) print screen_data print 'Episode', episode, 'ended with score:', total_reward ale.reset_game()
class agent(object): def __init__(self): self.ale = ALEInterface() # Get & Set the desired settings self.ale.setInt('random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) # Load the ROM file self.ale.loadROM("ms_pacman.bin") #persistent: self.tetas = [] self.Q = self.txtToMap( 'qvalues.txt' ) #, a table of action values indexedby state and action, initially zero self.N = self.txtToMap( 'nvalues.txt' ) #, a table of frequenciesfor state-action pairs, initially zero self.s = None self.a = None self.r = 0 self.actions = self.ale.getMinimalActionSet() print self.actions #the previous state, action, and reward, initially null def Q_LEARNING_AGENT(self, state, reward): if self.ale.game_over(): self.updateQ(self.s, None, reward) if self.s is not None: self.incrementN(self.s, self.a) val = self.computeNewQ(self.s, self.a, self.r, state) self.updateQ(self.s, self.a, val) self.s = state self.a = self.chooseAct(state) self.r = reward return self.a def computeNewQ(self, s, a, reward, state): qsa = self.getQ(s, a) maxQ = self.getQ(state, self.actions[0]) for act in self.actions: val = self.getQ(state, act) if val > maxQ: maxQ = val n = self.getN(s, a) alp = self.alpha(n) v = qsa + alp * (reward + 0.9 * maxQ - qsa) return v def chooseAct(self, state): v = randrange(10) if v == 5: return self.actions[randrange(len(self.actions))] a = self.actions[0] maxQ = self.getQ(state, self.actions[0]) for act in self.actions: val = self.getQ(state, act) if val > maxQ: maxQ = val a = act return a def alpha(self, Nsa): return 0.9 def updateQ(self, s, a, value): self.Q[str(s) + "/" + str(a)] = value def getQ(self, s, a): return self.Q.get(str(s) + "/" + str(a), 0) def incrementN(self, s, a): self.N[str(s) + "/" + str(a)] = self.getN(s, a) + 1 def getN(self, s, a): return self.N.get(str(s) + "/" + str(a), 0) def play(self, number): for episode in xrange(number): total_reward = 0 self.s = None self.a = None reward = 0 while not self.ale.game_over(): state = hash(get_feature(self.ale.getScreen())) action = self.Q_LEARNING_AGENT(state, reward) # Apply an action and get the resulting reward reward = self.ale.act(action) total_reward += reward print 'Episode', episode, 'ended with score:', total_reward self.ale.reset_game() self.mapToTxt(self.Q, 'qvalues.txt') self.mapToTxt(self.N, 'nvalues.txt') def mapToTxt(self, hMap, filepath): f = open(filepath, 'r+') for elem in hMap.keys(): toWrite = str(elem) + " " + str(hMap[elem]) + "\n" f.write(toWrite) f.close() def txtToMap(self, filepath): newMap = {} f = open(filepath) while True: string = f.readline() if not string: break tmp = self.stringSplitter(string) newMap[tmp[0]] = float(tmp[1]) f.close() return newMap def stringSplitter(self, string): i = string.find(' ') head = string[:i] rest = string[i + 1:len(string) - 1] # getting rid of the \n's return (head, rest)
if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file ale.loadROM(sys.argv[1]) # Get the list of legal actions legal_actions = ale.getLegalActionSet() # Play 10 episodes screen = np.reshape(ale.getScreen(), (210, -1)) maze = detect_maze(screen) image = pacman_image(maze) # print_maze(maze) for episode in xrange(1): total_reward = 0 step = 1 while not ale.game_over(): # if step == 500: screen = np.reshape(ale.getScreen(), (210, -1)) if step % 3 == 0: image.new_image(screen) a = legal_actions[randrange(len(legal_actions))] step += 1 # Apply an action and get the resulting reward