Esempio n. 1
0
 def __init__(self, board_size, n_history, n_simul):
     self.env_simul = GomokuEnv(board_size, n_history, display=False)
     self.board_size = board_size
     self.n_simul = n_simul
     self.tree = None
     self.root = None
     self.state = None
     self.board = None
     # used for backup
     self.key_memory = deque()
     self.action_memory = deque()
     self.reset_tree()
Esempio n. 2
0
def main():
    env = GomokuEnv(BOARD_SIZE, HISTORY)
    manager = HumanUI()
    result = {'Black': 0, 'White': 0, 'Draw': 0}
    for g in range(GAME):
        print('#####  Game: {}  #####'.format(g + 1))
        state, board = env.reset()
        done = False
        idx = 0
        while not done:
            env.render()
            # start simulations
            action = manager.get_action(state, board, idx)
            state, board, z, done = env.step(action)
            #print(board)
            idx += 1
        if done:
            if z == 1:
                result['Black'] += 1
            elif z == -1:
                result['White'] += 1
            else:
                result['Draw'] += 1
            # render & reset tree
            env.render()
            manager.ai.reset_tree()
        # result
        print('')
        print('=' * 20, " {}  Game End  ".format(g + 1), '=' * 20)
        blw, whw, drw = result['Black'], result['White'], result['Draw']
        stat = (
            'Black Win: {}  White Win: {}  Draw: {}  Winrate: {:0.1f}%'.format(
                blw, whw, drw,
                1 / (1 + np.exp(whw / (g + 1)) / np.exp(blw / (g + 1))) * 100))
        print(stat, '\n')
Esempio n. 3
0
def play():
    env = GomokuEnv(BOARD_SIZE, HISTORY)
    mcts = MCTS(BOARD_SIZE, HISTORY, N_SIMUL)
    result = {'Black': 0, 'White': 0, 'Draw': 0}
    for g in range(GAME):
        print('#' * (BOARD_SIZE - 4), ' GAME: {} '.format(g + 1),
              '#' * (BOARD_SIZE - 4))
        # reset state
        state, board = env.reset()
        done = False
        while not done:
            env.render()
            # start simulations
            action = mcts.get_action(state, board)
            state, board, z, done = env.step(action)
        if done:
            if z == 1:
                result['Black'] += 1
            elif z == -1:
                result['White'] += 1
            else:
                result['Draw'] += 1
            # render & reset tree
            env.render()
            mcts.reset_tree()
        # result
        print('')
        print('=' * 20, " {}  Game End  ".format(g + 1), '=' * 20)
        blw, whw, drw = result['Black'], result['White'], result['Draw']
        stat = (
            'Black Win: {}  White Win: {}  Draw: {}  Winrate: {:0.1f}%'.format(
                blw, whw, drw,
                1 / (1 + np.exp(whw / (g + 1)) / np.exp(blw / (g + 1))) * 100))
        print(stat, '\n')
Esempio n. 4
0
from random import choice
from gomoku_env import GomokuEnv
from gameloop import Game

try:  # python3 compatibility
    input = raw_input
except NameError:
    pass


def player1_turn(state):
    return choice(state.available_turns())


def player1_human_turn(state):
    _ = input("Type your turn as x,y: ")
    x, y = _.split(",")
    x, y = int(x.strip()), int(y.strip())
    return x, y


def player2_turn(state):
    return choice(state.available_turns())


env = GomokuEnv(board_size=15, win_len=5)
game = Game(env, player1_turn, player2_turn)
game.loop()
Esempio n. 5
0
class MCTS:
    def __init__(self, board_size, n_history, n_simul):
        self.env_simul = GomokuEnv(board_size, n_history, display=False)
        self.board_size = board_size
        self.n_simul = n_simul
        self.tree = None
        self.root = None
        self.state = None
        self.board = None
        self.legal_move = None
        self.no_legal_move = None
        self.ucb = None

        # used for backup
        self.key_memory = None
        self.action_memory = None

        # init
        self._reset()
        self.reset_tree()

    def _reset(self):
        self.key_memory = deque(maxlen=self.board_size**2)
        self.action_memory = deque(maxlen=self.board_size**2)

    def reset_tree(self):
        self.tree = defaultdict(lambda: zeros(
            (self.board_size**2, 2), 'float'))

    def get_action(self, state, board):
        self.root = state.copy()
        self._simulation(state)
        # init root board after simulatons
        self.board = board
        board_fill = self.board[CURRENT] + self.board[OPPONENT]
        self.legal_move = argwhere(board_fill == 0).flatten()
        self.no_legal_move = argwhere(board_fill != 0).flatten()
        # root state's key
        root_key = hash(self.root.tostring())
        # argmax Q
        action = self._selection(root_key, c_ucb=0)
        print('')
        print(
            self.ucb.reshape(self.board_size,
                             self.board_size).round(decimals=4))
        return action

    def _simulation(self, state):
        start = time.time()
        finish = 0
        for sim in range(self.n_simul):
            print('\rsimulation: {}'.format(sim + 1), end='')
            sys.stdout.flush()
            # reset state
            self.state, self.board = self.env_simul.reset(state)
            done = False
            n_selection = 0
            n_expansion = 0
            while not done:
                board_fill = self.board[CURRENT] + self.board[OPPONENT]
                self.legal_move = argwhere(board_fill == 0).flatten()
                self.no_legal_move = argwhere(board_fill != 0).flatten()
                key = hash(self.state.tostring())
                # search my tree
                if key in self.tree:
                    # selection
                    action = self._selection(key, c_ucb=1)
                    self.action_memory.appendleft(action)
                    self.key_memory.appendleft(key)
                    n_selection += 1
                elif n_expansion == 0:
                    # expansion
                    action = self._expansion(key)
                    self.action_memory.appendleft(action)
                    self.key_memory.appendleft(key)
                    n_expansion += 1
                else:
                    # rollout
                    action = random.choice(self.legal_move)
                self.state, self.board, reward, done = \
                    self.env_simul.step(action)
            if done:
                # backup & reset memory
                self._backup(reward, n_selection + n_expansion)
                self._reset()
                finish = time.time() - start
                # if finish >= self.think_time:
                #     break
        print('\r{} simulations end ({:0.0f}s)'.format(sim + 1, finish))

    def _selection(self, key, c_ucb):
        edges = self.tree[key]
        ucb = self._get_ucb(edges, c_ucb)
        self.ucb = ucb
        if self.board[COLOR][0] == WHITE:
            # black's choice
            action = argwhere(ucb == ucb.max()).flatten()
        else:
            # white's choice
            action = argwhere(ucb == ucb.min()).flatten()
        action = action[random.choice(len(action))]
        return action

    def _expansion(self, key):
        # only select once for rollout
        action = self._selection(key, c_ucb=1)
        return action

    def _get_ucb(self, edges, c_ucb):
        total_N = 0
        ucb = zeros((self.board_size**2), 'float')
        for i in range(self.board_size**2):
            total_N += edges[i][N]
        # black's ucb
        if self.board[COLOR][0] == WHITE:
            for move in self.legal_move:
                if edges[move][N] != 0:
                    ucb[move] = edges[move][Q] + c_ucb * \
                        sqrt(2 * log(total_N) / edges[move][N])
                else:
                    ucb[move] = np.inf
            for move in self.no_legal_move:
                ucb[move] = -np.inf
        # white's ucb
        else:
            for move in self.legal_move:
                if edges[move][N] != 0:
                    ucb[move] = edges[move][Q] - c_ucb * \
                        sqrt(2 * log(total_N) / edges[move][N])
                else:
                    ucb[move] = -np.inf
            for move in self.no_legal_move:
                ucb[move] = np.inf
        return ucb

    def _backup(self, reward, steps):
        # steps is n_selection + n_expansion
        # update edges in my tree
        for i in range(steps):
            edges = self.tree[self.key_memory[i]]
            action = self.action_memory[i]
            edges[action][N] += 1
            edges[action][Q] += (reward - edges[action][Q]) / edges[action][N]
Esempio n. 6
0
class MCTS:
    def __init__(self, board_size, n_history, n_simul):
        self.env_simul = GomokuEnv(board_size, n_history, display=False)
        self.board_size = board_size
        self.n_simul = n_simul
        self.tree = None
        self.root = None
        self.state = None
        self.board = None
        # used for backup
        self.key_memory = deque()
        self.action_memory = deque()
        self.reset_tree()

    def reset_tree(self):
        self.tree = defaultdict(lambda: zeros((self.board_size**2, 2)))

    def get_action(self, state, board):
        self.root = state.copy()
        self._simulation(state)
        # init root board after simulatons
        self.board = board
        # root state's key
        root_key = hash(self.root.tostring())
        # argmax Q or argmin Q
        action = self._selection(root_key, c_pucb=0)
        return action

    def _simulation(self, state):
        start = time.time()
        finish = 0
        for sim in range(self.n_simul):
            print('\rsimulation: {}'.format(sim + 1), end='')
            sys.stdout.flush()
            # reset state
            self.state, self.board = self.env_simul.reset(state)
            done = False
            is_expansion = True

            while not done:
                key = hash(self.state.tostring())
                # search my tree
                if key in self.tree:
                    # selection
                    action = self._selection(key, c_pucb=5)
                    self.action_memory.appendleft(action)
                    self.key_memory.appendleft(key)
                else:
                    # expansion
                    legal_move, _ = self._get_legal_move(self.board)
                    action = random.choice(legal_move)
                    if is_expansion:
                        self.action_memory.appendleft(action)
                        self.key_memory.appendleft(key)
                        is_expansion = False

                self.state, self.board, reward, done = self.env_simul.step(
                    action)

            if done:
                # backup & reset memory
                self._backup(reward)
                finish = time.time() - start
                # if finish >= self.think_time:
                #     break
        print('\r{} simulations end ({:0.0f}s)'.format(sim + 1, finish))

    def _get_legal_move(self, board):
        board_fill = board[CURRENT] + board[OPPONENT]
        legal_move = argwhere(board_fill != 1).flatten()
        return legal_move, board_fill

    def _selection(self, key, c_pucb):
        edges = self.tree[key]
        pucb = self._get_pucb(edges, c_pucb)

        if c_pucb == 0:
            visit = edges[:, N]
            print('\nvisit count')
            print(visit.reshape(self.board_size, self.board_size).round())
            action = argwhere(visit == visit.max()).flatten()
            action = action[random.choice(len(action))]
            return action

        if self.board[COLOR][0] == WHITE:
            # black's choice
            action = argwhere(pucb == pucb.max()).flatten()
        else:
            # white's choice
            action = argwhere(pucb == pucb.min()).flatten()
        action = action[random.choice(len(action))]
        return action

    def _get_pucb(self, edges, c_pucb):
        legal_move, no_legal_loc = self._get_legal_move(self.board)
        prior = 1 / len(legal_move)
        total_N = edges.sum(0)[N]
        # black's pucb
        if self.board[COLOR][0] == WHITE:
            no_legal_loc *= -9999
            pucb = edges[:, Q] + \
                c_pucb * prior * sqrt(total_N) / (edges[:, N] + 1) + no_legal_loc
        # white's pucb
        else:
            no_legal_loc *= 9999
            pucb = edges[:, Q] - \
                c_pucb * prior * sqrt(total_N) / (edges[:, N] + 1) + no_legal_loc
        return pucb

    def _backup(self, reward):
        # update edges in my tree
        while self.action_memory:
            key = self.key_memory.popleft()
            action = self.action_memory.popleft()
            edges = self.tree[key]
            edges[action][N] += 1
            edges[action][Q] += (reward - edges[action][Q]) / edges[action][N]
        return 0