Esempio n. 1
0
    def test_get_move(self):
        board = TTTBoard(TestTTTBoard.blank_board)
        player1 = RandomPlayer(1, seed=42)
        player2 = RandomPlayer(1, seed=42)

        a = player1.get_move(board)
        b = player2.get_move(board)

        self.assertEqual(a.board, b.board)

        a = player2.get_move(board)
        b = player1.get_move(board)

        self.assertEqual(a.board, b.board)
Esempio n. 2
0
 def get_move(self, board):
     moves = board.available_moves()
     if moves:
         for move in moves:
             if THandPlayer.next_move_winner(board, move, self.mark):
                 return move
             elif THandPlayer.next_move_winner(board, move,
                                               self.opponent_mark):
                 return move
         else:
             return RandomPlayer.get_move(board)
Esempio n. 3
0
    def get_move(self, board):
        if np.random.uniform(
        ) < self.epsilon:  # With probability epsilon, choose a move at random ("epsilon-greedy" exploration)
            return RandomPlayer.get_move(board)
        else:
            state_key = QPlayer.make_and_maybe_add_key(board, self.mark,
                                                       self.Q)
            print state_key
            Qs = self.Q[state_key]
            print Qs

            if self.mark == "X":
                print QPlayer.stochastic_argminmax(Qs, max)
                return QPlayer.stochastic_argminmax(Qs, max)
            elif self.mark == "O":
                print QPlayer.stochastic_argminmax(Qs, min)
                return QPlayer.stochastic_argminmax(Qs, min)