Exemple #1
0
    def test_available_moves(self):
        size = 3
        board = clean_board(size)

        self.assertEqual(len(available_moves(board)), 9)

        apply_move_inplace(board, (0, 0), 1)
        self.assertEqual(len(available_moves(board)), 8)

        board = np.ones((size, size), dtype=np.int)
        self.assertEqual(len(available_moves(board)), 0)
Exemple #2
0
    def add_board(self, board):
        board_hash = hash_board(board)
        if self.q_table.get(board_hash) is None:
            legal_moves = available_moves(board)
            self.q_table[board_hash] = {move: 1.0 for move in legal_moves}

        return board_hash
 def get_move(self, board):
     legal_moves = available_moves(board)
     new_boards = np.array([apply_move(board, move, self.side_to_play) for move in legal_moves])
     # print(new_boards.reshape(len(legal_moves), 9))
     # possible_boards = [apply_move(board, move, self.side_to_play) for move in legal_moves]
     evaluations = self.model.predict(new_boards.reshape(len(legal_moves), 9))
     print(legal_moves)
     print(evaluations)
     return legal_moves[self.min_max_best_move(evaluations)]
Exemple #4
0
    def learn_q(self, board, move):
        board_hash = self.add_board(board)
        new_board = apply_move(board, move, self.side)
        new_board_hash = self.add_board(new_board)

        reward = self.calculate_reward(new_board)

        if reward != 0 or len(available_moves(new_board_hash)) == 0:
            expected = reward
        else:
            expected_rewards = self.q_table[new_board_hash]

            expected = reward + (0.9 * max(expected_rewards.values()))

        change = 0.3 * (expected - self.q_table[board_hash][move])
        self.q_table[board_hash][move] += change
 def get_move(self, board, side):
     return random.choice(available_moves(board))