def getAction(self, state): """ Returns the next action in the path chosen earlier (in registerInitialState). Returns gameover if no more actions to take. state = a GameState object (TetrisSearch.py) """ dont_burn_my_cpu = pygame.time.Clock() key_actions = { 'ESCAPE': state.quit, 'LEFT': lambda: state.move(-1), 'RIGHT': lambda: state.move(+1), 'DOWN': lambda: state.drop(True), 'UP': state.rotate_stone, 'p': state.toggle_pause, 'SPACE': state.start_game, 'RETURN': state.insta_drop } self.actionIndex = 0 i = self.actionIndex self.actionIndex += 1 for event in pygame.eveng.get(): if event.type == pygame.USEREVENT + 1: self.drop(False) elif event.type == pygame.QUIT: self.quit() elif i < len(self.actions): for key in key_actions: if self.actions[i] == eval(key): return key_actions[key]() if event.type == pygame.USEREVENT + 1: self.drop(False) elif event.type == pygame.QUIT: self.quit() else: return TetrisApp.quit()
class QLearningAgent(TetrisApp): def __init__(self, alpha = 0.01, gamma = .5, epsilon = 1): self.qval=util.Counter() self.alpha=alpha self.epsilon=epsilon self.discount=gamma self.Tetris= TetrisApp() self.boardprev=0. def observeTransition(self, state,action,nextState,deltaReward): self.episodeRewards += deltaReward self.update(state,action,nextState,deltaReward) # returns 0.0 if new state or the q value if we've seen it, and because # we cant use tuples as keys in a python dict we hash them def getQValue(self, state, action): if hash(str((state, action))) not in self.qval: self.qval[hash(str((state,action)))]=0.0 return self.qval[hash(str((state,action)))] def computeValueFromQValues(self, state): val = 0.0 action=self.computeActionFromQValues(state) if action != None: val= self.getQValue(state,action) return val def computeActionFromQValues(self, state): finalaction=None legalActions = self.Tetris.get_legal_actions(state[1]) if len(legalActions)!=0: maxval= -999999 for action in self.Tetris.get_legal_actions(state[1]): Qval=self.getQValue(state,action) if Qval>=maxval: maxval=Qval finalaction=action return finalaction def helperfunction(self, lst, legalactions): value, action, new_board = lst val = (value + max(self.ideal_place_2(new_board, legalactions,True))[0], action) return val def getAction(self, state): legalActions = self.Tetris.get_legal_actions(state[1]) action = None if len(legalActions)!=0: if util.flipCoin(self.epsilon): valuedict = {} actionlist= self.ideal_place_2(self.Tetris.board, legalActions, False) valuelist = map((lambda x: self.helperfunction(x, legalActions)), actionlist) return max(valuelist)[1] else: action = self.computeActionFromQValues(state) return action def update(self, state, action, nextState, reward): self.qval[hash(str((state,action)))]+= self.alpha*(reward+self.discount * self.computeValueFromQValues(nextState) - self.getQValue(state,action)) def getPolicy(self, state): return self.computeActionFromQValues(state) def getValue(self, state): return self.computeValueFromQValues(state) def run(self,n): key_actions = { 'ESCAPE': self.Tetris.quit, 'LEFT': lambda:self.Tetris.move(-1), 'RIGHT': lambda:self.Tetris.move(+1), 'DOWN': lambda:self.Tetris.drop(True), 'UP': self.Tetris.rotate_stone, 'SPACE': self.Tetris.toggle_pause, 'SPACE': self.Tetris.start_game, 'RETURN': self.Tetris.insta_drop } self.Tetris.board = tetris.new_board() self.boardprev=self.Tetris.board if n< value_iter_rounds: self.epsilon = 1 else: self.epsilon = 1/(15.*math.log(float(n)+1)) self.Tetris.gameover = False self.Tetris.paused = False dont_burn_my_cpu = pygame.time.Clock() rot, col = self.getAction((self.Tetris.get_board_state(self.Tetris.board),self.Tetris.stone)) prevboard = self.Tetris.board n+=1 while not(self.Tetris.gameover): self.update((prevboard,self.Tetris.stone), (rot,col), (self.Tetris.get_board_state(self.Tetris.board),self.Tetris.stone), self.Tetris.heuristic(self.Tetris.board)) piece = self.Tetris.stone prevboard = tetris.deepishcopy(self.Tetris.board) legalactions = self.Tetris.get_legal_actions(self.Tetris.stone) rot, col =self.getAction((self.Tetris.get_board_state(self.Tetris.board), self.Tetris.stone)) i= 1 while i ==1: self.Tetris.screen.fill((0,0,0)) if self.Tetris.gameover: self.Tetris.center_msg("""Game Over!\nYour score: %d Press space to continue""" % self.Tetris.score) if n< 10000: self.Tetris.start_game() else: self.Tetris.quit() else: if self.Tetris.paused: self.Tetris.center_msg("Paused") else: pygame.draw.line(self.Tetris.screen, (255,255,255), (self.Tetris.rlim+1, 0), (self.Tetris.rlim+1, self.Tetris.height-1)) self.Tetris.disp_msg("Next:", ( self.Tetris.rlim+cell_size, 2)) self.Tetris.disp_msg("Score: %d\n\nLevel: %d\ \nLines: %d" % (self.Tetris.score, self.Tetris.level, self.Tetris.lines), (self.Tetris.rlim+cell_size, cell_size*5)) self.Tetris.draw_matrix(self.Tetris.bground_grid, (0,0)) self.Tetris.draw_matrix(self.Tetris.board, (0,0)) self.Tetris.draw_matrix(self.Tetris.stone, (self.Tetris.stone_x, self.Tetris.stone_y)) self.Tetris.draw_matrix(self.Tetris.next_stone, (cols+1,2)) pygame.display.update() self.Tetris.place_brick(rot,col) i= 0 for event in pygame.event.get(): if event.type == pygame.USEREVENT+1: pass elif event.type == pygame.QUIT: self.Tetris.quit() elif event.type == pygame.KEYDOWN: for key in key_actions: if event.key == eval("pygame.K_" +key): key_actions[key]()