def updateQ(self, game):
     #unpack
     #player turn made move, taking the board from state s_prev to s
     hist = game.history[-1]
     s_prev, move, s, score = getkey(hist['s_prev']), hist['move'], getkey(hist['s']), hist['score']
     move = 3*move[0] + move[1]
     #update Q table
     if game.done:
         self.Q[s_prev][move] = score 
     else:
         func = {0:min, 1:max}[game.turn]
         expected = self.decay_rate * func(self.Q[s])
         self.Q[s_prev][move] *= (1 - self.learning_rate)
         self.Q[s_prev][move] += self.learning_rate * expected
Example #2
0
def learned_move(game, training):
    poss = game.allpossible()
    poss = [xy[0]*3+xy[1] for xy in poss]  
    move_list = [_ for _ in enumerate(training.Q[getkey(game.state)]) if (_[0] in poss)]
    func = {0:min, 1:max}[game.turn]
    best_move = func(move_list,key=lambda x:x[1])[0]
    return (best_move//3, best_move%3)
 def simulate_game(self):
     #create new board, play 1 full game and update Q with each step
     board = TTTBoard()
     while not board.done:
         xy = self.action(board)
         if not board.possible(xy):
             self.Q[getkey(board.state)][3*xy[0]+xy[1]] = .5 - board.turn
         else:
             board = board.update(xy)
             self.updateQ(board)
 def action(self, board):
     if np.random.rand() <= self.random_rate:
         xy = board.allpossible()[np.random.randint(len(board.allpossible()))]
     else:
         start = getkey(board.state)
         if board.turn == 1:
             i = np.argmax(self.Q[start])
         else:
             i = np.argmin(self.Q[start])
         xy = (i//3, i%3)
     return xy
 def recursive_train(self, game, move):
     next_board = game.update(move)
     if next_board.done:
         val = next_board.score()
     else:
         func = {0: max, 1: min}[game.turn]
         val = self.decay_rate * func([
             self.recursive_train(next_board, xy)
             for xy in next_board.allpossible()
         ])
     self.Q[getkey(game.state)][move[0] * 3 + move[1]] = val
     return val
 def train(self):
     game = TTTBoard()
     for move in game.allpossible():
         self.Q[getkey(game.state)][move[0] * 3 +
                                    move[1]] = self.recursive_train(
                                        game, move)