def expand_all(self, leaf):

        game = Game(leaf.state)
        allowedmoves = game.allowed_moves()
        for move in allowedmoves:
            child = self.createNode(game.nextstate(move), move, parent=leaf)
            leaf.children += [child]
Beispiel #2
0
    def back_prop_terminal(self, leaf_terminal):

        game = Game(leaf_terminal.state)
        gameover, winner = game.gameover()

        if winner == 0:
            leaf_terminal.W += 0
            leaf_terminal.N += 1
            leaf_terminal.Q = leaf_terminal.W / leaf_terminal.N
            new_reward = 0

        else:  #a terminal leaf is always a draw or reward 1 (for the player that played the move)
            new_reward = 1
            leaf_terminal.W += new_reward
            leaf_terminal.N += 1
            leaf_terminal.Q = leaf_terminal.W / leaf_terminal.N

        # Then init recursion
        current = leaf_terminal
        count = 1

        while current.parent is not None:
            current.parent.N += 1
            current.parent.W += ((-1)**count) * new_reward
            current.parent.Q = current.parent.W / current.parent.N
            # move up
            current = current.parent
            count += 1
Beispiel #3
0
def getcountermove(currentnode, tree):
    existcounter = False
    game = Game(currentnode.state)
    can_win, where_win, can_be_lost, where_lose = game.iscritical()

    if can_win == 1:  #then take it
        move = where_win[int(random.random() * len(where_win))]
        tree.expand_all(
            currentnode
        )  # must expand since not done in mcts sims in that case
        col = game.convert_move_to_col_index(move)
        for child in currentnode.children:
            child_col = game.convert_move_to_col_index(child.move)
            if child_col == col:
                currentnode = child
        existcounter = True

    elif can_be_lost == 1:  # then counter
        move = where_lose[int(random.random() * len(where_lose))]
        tree.expand_all(currentnode)  # must expand since not done in mcts
        col = game.convert_move_to_col_index(move)
        for child in currentnode.children:
            child_col = game.convert_move_to_col_index(child.move)
            if child_col == col:
                currentnode = child
        existcounter = True

    return currentnode, existcounter
    def eval_leaf(self, leaf):

        self.player.eval()
        np.random.seed()

        if leaf.isterminal() == 0:

            game = Game(leaf.state)
            flat = game.state_flattener(leaf.state)

            #NN call
            reward, P = self.player.forward(flat)
            proba_children = P.detach().numpy()[0]
            NN_q_value = reward.detach().numpy()[0][0]


            if self.use_dirichlet and leaf.parent is None :
                probs = np.copy(proba_children)
                alpha = config.alpha_dir
                epsilon = config.epsilon_dir

                dirichlet_input = [alpha for _ in range(config.L)]
                dirichlet_list = np.random.dirichlet(dirichlet_input)
                proba_children = (1 - epsilon) * probs + epsilon * dirichlet_list

            leaf.W = leaf.W  - NN_q_value
            leaf.N += 1
            leaf.Q = leaf.W / leaf.N

            if config.maskinmcts:
                mask = np.zeros(config.L)
                for child in leaf.children:
                    child_col=game.convert_move_to_col_index(child.move)
                    mask[child_col] = 1

                maskit = np.multiply(proba_children, mask)

                # for possible bug (when proba given by NN is strictly one for a full column)
                if np.sum(maskit) == 0:
                    print('happening') #actually never happens -> no overflow in softmax -> good
                    epsilon =0.01
                    proba_children = (proba_children + epsilon)
                    proba_children = proba_children/ np.sum(proba_children)
                    maskit = np.multiply(proba_children, mask)

                leaf.proba_children = maskit / np.sum(maskit)
            else:
                leaf.proba_children = proba_children

        else:
            # seems reasonnable to use the true value and not NN value
            game = Game(leaf.state)
            _, winner = game.gameover()
            truereward = np.abs(winner)

            #to be fair it should include the long_game_factor if used, but it doesnt change much
            leaf.W = leaf.W + truereward

            leaf.N += +1
            leaf.Q = leaf.W / leaf.N
Beispiel #5
0
def printstates(player):
    part_states = config.particular_states()
    # knowledge based on http://connect4.gamesolver.org
    # for instance for turn 5 : http://connect4.gamesolver.org/?pos=44444
    print('')
    print(
        '(probs should be max for optimal play, ie [0,0,0,1,0,0,0] from turn 0 to 4 included ; turn 5 flat prob, turn 6, [0,0,.5,0,0.5,0,0]'
    )
    print(
        'Q-values of the board should be minimal for the corresponding optimal move)'
    )
    print('')
    getbreak = 1

    for i in range(len(part_states)):

        state = config.getstate(i)
        game = Game(state)

        dirichletforprinting = False
        tree = MCTS_NN(player, dirichletforprinting)
        rootnode = tree.createNode(game.state)
        tree.expand_all(rootnode)
        tree.eval_leaf(rootnode)
        pchild = rootnode.proba_children
        pchild = [int(1000 * x) / 10 for x in pchild]

        for child in rootnode.children:
            tree.expand_all(child)
            tree.eval_leaf(child)

        Qs = [-int(100 * child.Q) / 100 for child in rootnode.children]
        Qchilds = [-child.Q for child in rootnode.children]

        turn = str(bin(state[0])).count('1') + str(bin(state[1])).count('1')
        print('turn', int(turn), 'Qval of this board',
              -int(1000 * rootnode.Q) / 1000)
        print('children probs', pchild, 'and of corresponding Q-val', Qs)
        time.sleep(0.01)

        #for automatic break of the main loop when the model is good enough
        #we require probabilities for central column to be at least 92%
        if int(turn) <= 4 and pchild[3] < 92:
            getbreak = 0

        # and lowest Q-value for the optimal move
        if int(turn) <= 4:
            if Qchilds[3] > Qchilds[0] or Qchilds[3] > Qchilds[1] or Qchilds[
                    3] > Qchilds[2] or Qchilds[3] > Qchilds[4] or Qchilds[
                        3] > Qchilds[5] or Qchilds[3] > Qchilds[6]:
                getbreak = 0

        #and, in the main program, an ELO of at least 1800 (see main)
    return getbreak
    def superselect(self,current,cpuct):

        # superselection rule : take the win or counter the lose:
        game = Game(current.state)
        can_win, wherewin, can_lose, wherelose = game.iscritical()

        if can_win:
            i_win = wherewin[int(random.random() * len(wherewin))]
            # get actual pos in children of child with this column index
            for child in current.children:
                child_col=game.convert_move_to_col_index(child.move)
                if child_col == i_win:
                    current = child

        elif can_lose:
            i_counter_lose = wherelose[int(random.random() * len(wherelose))]
            for child in current.children:
                child_col=game.convert_move_to_col_index(child.move)
                if child_col == i_counter_lose:
                    current = child

        else:
            values = []
            for child in current.children:
                values += [self.PUCT(child, cpuct)]

            max_val = max(values)
            where_max = [i for i, j in enumerate(values) if j == max_val]

            if len(where_max) == 1:
                current = current.children[where_max[0]]
            else:
                imax = where_max[int(random.random() * len(where_max))]
                current = current.children[imax]

        return current
Beispiel #7
0
def onevsonegame(budget1, random1, counter1, usecounter_in_rollout_1, budget2,
                 random2, counter2, usecounter_in_rollout_2, whostarts, index):

    import random
    random.seed()
    np.random.seed()

    if whostarts == 'budget1':
        modulo = 1
    elif whostarts == 'budget2':
        modulo = 0

    # init tree, root, game
    tree = MCTS()
    c_uct = 1
    game = Game()
    turn = 0
    gameover = 0
    rootnode = tree.createNode(game.state)
    currentnode = rootnode

    # main loop
    while gameover == 0:

        turn = turn + 1

        if turn % 2 == modulo:
            #player = 'player1'
            sim_number = budget1
            usecounterinrollout = usecounter_in_rollout_1
            counter = counter1
            rd = random1

        else:
            #player = 'player2'
            sim_number = budget2
            usecounterinrollout = usecounter_in_rollout_2
            counter = counter2
            rd = random2

        if rd:  #completely random play / or random + counter
            if counter:
                currentnode, existscounter = getcountermove(currentnode, tree)
                if existscounter == False:
                    if len(currentnode.children) == 0:
                        tree.expand_all(currentnode)
                    randindex = int(random.random() *
                                    (len(currentnode.children)))
                    currentnode = currentnode.children[randindex]

            else:
                if len(currentnode.children) == 0:
                    tree.expand_all(currentnode)
                randindex = int(random.random() * (len(currentnode.children)))
                currentnode = currentnode.children[randindex]

        else:
            if counter:
                currentnode, existscounter = getcountermove(currentnode, tree)
                if existscounter == False:
                    for sims in range(0, sim_number):
                        tree.simulate(currentnode, UCT_simu, c_uct,
                                      usecounterinrollout)

                    visits = np.array(
                        [child.N for child in currentnode.children])
                    max_visits = np.where(visits == np.max(visits))[0]
                    imax = max_visits[int(random.random() * len(max_visits))]
                    currentnode = currentnode.children[imax]

            else:

                for sims in range(0, sim_number):
                    tree.simulate(currentnode, UCT_simu, c_uct,
                                  usecounterinrollout)

                visits = np.array([child.N for child in currentnode.children])
                max_visits = np.where(visits == np.max(visits))[0]
                imax = max_visits[int(random.random() * len(max_visits))]
                currentnode = currentnode.children[imax]

        # then reinit tree
        game = Game(currentnode.state)
        tree = MCTS()
        rootnode = tree.createNode(game.state)
        currentnode = rootnode
        gameover, winner = game.gameover()

    #print('end of game')
    if winner == 0:
        toreturn = 'draw'

    elif winner == 1:
        if whostarts == 'budget1':
            toreturn = 'budget1'
        else:
            toreturn = 'budget2'

    elif winner == -1:
        if whostarts == 'budget1':
            toreturn = 'budget2'
        else:
            toreturn = 'budget1'

    monresult = {'result': toreturn}
    filename = './data/game' + str(index) + '.txt'
    with open(filename, 'wb') as file:
        pickle.dump(monresult, file)
    file.close()
Beispiel #8
0
    def default_rollout_policy(self, node, usecounter):

        gameloc = Game(node.state)

        if node.isterminal() == 0:
            #init
            allowedmoves = gameloc.allowed_moves()
            gameover = 0

            # completely random rollout/ or random rollout but take the win or counter the lose, is usecounter
            while gameover == 0:

                if usecounter:
                    can_win, where_win, can_lose, where_lose = gameloc.iscritical(
                    )
                    if can_win:
                        move = where_win[int(random.random() * len(where_win))]
                        gameloc.takestep(move)
                        allowedmoves = gameloc.allowed_moves()
                        gameover, _ = gameloc.gameover()

                    elif can_lose:
                        imax = where_lose[int(random.random() *
                                              len(where_lose))]
                        gameloc.takestep(imax)
                        allowedmoves = gameloc.allowed_moves()
                        gameover, _ = gameloc.gameover()

                    else:
                        randommove = allowedmoves[int(random.random() *
                                                      len(allowedmoves))]
                        gameloc.takestep(randommove)
                        allowedmoves = gameloc.allowed_moves()
                        gameover, _ = gameloc.gameover()
                else:
                    randommove = allowedmoves[int(random.random() *
                                                  len(allowedmoves))]
                    gameloc.takestep(randommove)
                    allowedmoves = gameloc.allowed_moves()
                    gameover, _ = gameloc.gameover()

        _, winner = gameloc.gameover()

        return winner
Beispiel #9
0
 def expand_all(self, node):
     game = Game(node.state)
     allowed_moves = game.allowed_moves()
     for move in allowed_moves:
         child = self.createNode(game.nextstate(move), move, parent=node)
         node.children += [child]
Beispiel #10
0
 def isterminal(self):
     game = Game(self.state)
     gameover, _ = game.gameover()  #is 0 or 1
     return gameover
Beispiel #11
0
def onevsonehuman(budget, whostarts):
    if whostarts == 'computer':
        modulo = 1
    else:
        modulo = 0

    file_path_resnet = './best_model_resnet.pth'
    best_player_so_far = ResNet.resnet18()
    best_player_so_far.load_state_dict(torch.load(file_path_resnet))

    game = Game()
    tree = MCTS_NN(best_player_so_far, use_dirichlet=False)
    rootnode = tree.createNode(game.state)
    currentnode = rootnode

    turn = 0
    isterminal = 0

    while isterminal == 0:

        turn = turn + 1

        if turn % 2 == modulo:
            player = 'computer'
            sim_number = budget
        else:
            player = 'human'

        if player == 'computer':

            print('===============IA playing================')
            for sims in range(0, sim_number):
                tree.simulate(currentnode, cpuct=1)

            treefordisplay = MCTS_NN(best_player_so_far, False)
            rootnodedisplay = treefordisplay.createNode(game.state)
            treefordisplay.expand_all(rootnodedisplay)
            tree.eval_leaf(rootnodedisplay)
            pchild = rootnodedisplay.proba_children
            pchild = [int(1000 * x) / 10 for x in pchild]
            for child in rootnodedisplay.children:
                treefordisplay.eval_leaf(child)
            Qs = [
                int(100 * child.Q) / 100 for child in rootnodedisplay.children
            ]
            print('NN thoughts', pchild, Qs)
            visits_after_all_simulations = []

            for child in currentnode.children:
                visits_after_all_simulations.append(child.N)

            print('result visits', visits_after_all_simulations)
            values = np.asarray(visits_after_all_simulations)
            imax = np.random.choice(np.where(values == np.max(values))[0])
            print('choice made', imax)
            currentnode = currentnode.children[imax]

        else:  #human player
            print('=============== your turn =====================')
            game = Game(currentnode.state)
            game.display_it()
            moves = game.allowed_moves()
            print(
                'chose a move from 0 to 6 -- beware of full columns! (not taken into account : e.g. if column three is full, enter 5 instead of 6 to play in the last column)'
            )
            human_choice = int(input())
            game.takestep(moves[human_choice])
            currentnode = Node(game.state, moves[human_choice])

        # reinit tree
        game = Game(currentnode.state)
        tree = MCTS_NN(best_player_so_far, use_dirichlet=False)
        rootnode = tree.createNode(game.state)

        currentnode = rootnode

        isterminal = currentnode.isterminal()

    game = Game(currentnode.state)
    game.display_it()
    gameover, winner = game.gameover()

    #print('end of game')
    if winner == 0:
        toreturn = 'draw'
        print('draw')

    elif winner == 1:
        if whostarts == 'computer':
            print('computer wins')
            toreturn = 'budget1'

        else:
            print('you win')
            toreturn = 'budget2'

    elif winner == -1:
        if whostarts == 'computer':
            print(' you win')
            toreturn = 'budget2'

        else:
            print('computer wins')
            toreturn = 'budget1'

    return toreturn
Beispiel #12
0
def NN_against_mcts(player_NN, budget_NN, budget_MCTS, whostarts, c_uct, cpuct,
                    tau, tau_zero, use_dirichlet, index):
    random.seed()
    np.random.seed()

    if whostarts == 'player_nn':
        modulo = 1
    elif whostarts == 'player_mcts':
        modulo = 0

    w_nn_start = 0
    w_nn_second = 0
    gameover = 0
    turn = 0

    while gameover == 0:

        turn = turn + 1

        if turn % 2 == modulo:
            player = 'player_nn'
            sim_number = budget_NN

        else:
            player = 'player_mcts'
            sim_number = budget_MCTS

        #init tree for NN or MCTS
        if turn == 1:
            if player == 'player_nn':
                game = Game()
                tree = MCTS_NN(player_NN, use_dirichlet)
                rootnode = tree.createNode(game.state)
                currentnode = rootnode
            else:
                game = Game()
                tree = MCTS()
                rootnode = tree.createNode(game.state)
                currentnode = rootnode

        if player == 'player_nn':

            for sims in range(0, sim_number):
                tree.simulate(currentnode, cpuct)

            visits_after_all_simulations = []

            for child in currentnode.children:
                visits_after_all_simulations.append(child.N**(1 / tau))

            all_visits = np.asarray(visits_after_all_simulations)
            probvisit = all_visits / np.sum(all_visits)

            # take a step
            if turn < tau_zero:
                currentnode = np.random.choice(currentnode.children,
                                               p=probvisit)
            else:
                max = np.random.choice(
                    np.where(all_visits == np.max(all_visits))[0])
                currentnode = currentnode.children[max]

            # reinit tree for next player : mcts
            game = Game(currentnode.state)
            tree = MCTS()
            rootnode = tree.createNode(game.state)
            currentnode = rootnode
            gameover = currentnode.isterminal()

        if player == 'player_mcts':
            for sims in range(0, sim_number):
                tree.simulate(currentnode, UCT_simu, c_uct,
                              config.use_counter_in_pure_mcts)

            visits_after_all_simulations = []

            for child in currentnode.children:
                visits_after_all_simulations.append(child.N)

            values = np.asarray(visits_after_all_simulations)
            imax = np.random.choice(np.where(values == np.max(values))[0])
            currentnode = currentnode.children[imax]

            # reinit tree for next player : neural net
            game = Game(currentnode.state)
            tree = MCTS_NN(player_NN, use_dirichlet)
            rootnode = tree.createNode(game.state)
            currentnode = rootnode
            gameover = currentnode.isterminal()

    game = Game(currentnode.state)
    gameover, winner = game.gameover()

    wp1 = 0
    wp2 = 0
    draw = 0

    if winner == 0:
        draw = 1

    elif winner == 1:
        if whostarts == 'player_nn':
            wp1 = 1
            w_nn_start = 1

        else:
            wp2 = 1

    elif winner == -1:
        if whostarts == 'player_nn':
            wp2 = 1
        else:
            wp1 = 1
            w_nn_second = 1

    save_dic = {}
    save_dic['data'] = np.asarray([wp1, wp2, draw, w_nn_start, w_nn_second])
    filename = './data/nn_against_mcts' + str(index) + '.txt'
    with open(filename, 'wb') as file:
        pickle.dump(save_dic, file)
    file.close()
Beispiel #13
0
def onevsonegame(player1, budget1, player2, budget2, whostarts, cpuct, tau,
                 tau_zero, use_dirichlet, index):

    #not sure if required but safety first!
    random.seed()
    np.random.seed()

    new_data_for_the_game = np.zeros((3 * config.L * config.H + config.L + 1))

    if whostarts == 'player1':
        modulo = 1
        budget1 = config.SIM_NUMBER
        budget2 = config.sim_number_defense

    elif whostarts == 'player2':
        modulo = 0
        budget2 = config.SIM_NUMBER
        budget1 = config.sim_number_defense

    gameover = 0
    turn = 0

    while gameover == 0:
        turn = turn + 1

        if turn % 2 == modulo:
            player = 'player1'
            sim_number = budget1
            who_plays = player1
        else:
            player = 'player2'
            sim_number = budget2
            who_plays = player2

        #init tree
        if turn == 1:
            game = Game()
            tree = MCTS_NN(who_plays, use_dirichlet)
            rootnode = tree.createNode(game.state)
            currentnode = rootnode

        for sims in range(0, sim_number):
            tree.simulate(currentnode, cpuct)

        visits_after_all_simulations = []
        childmoves = []

        for child in currentnode.children:
            visits_after_all_simulations.append(child.N**(1 / tau))
            childmoves.append(child.move)

        all_visits = np.asarray(visits_after_all_simulations)
        probvisit = all_visits / np.sum(all_visits)
        child_col = [
            game.convert_move_to_col_index(move) for move in childmoves
        ]

        #store the data created
        child_col = np.asarray(child_col, dtype=int)
        unmask_pi = np.zeros(config.L)
        unmask_pi[child_col] = probvisit
        flatten_state = game.state_flattener(currentnode.state)

        #init z to zero ; z is the actual reward from the current's player point of view, see below
        this_turn_data = np.hstack((flatten_state, unmask_pi, 0))
        new_data_for_the_game = np.vstack(
            (new_data_for_the_game, this_turn_data))

        #then take a step
        if turn < tau_zero:
            currentnode = np.random.choice(currentnode.children, p=probvisit)
        else:
            max = np.random.choice(
                np.where(all_visits == np.max(all_visits))[0])
            currentnode = currentnode.children[max]

        # reinit tree for next turn
        game = Game(currentnode.state)
        if player == 'player1':
            tree = MCTS_NN(player2, use_dirichlet)
        else:
            tree = MCTS_NN(player1, use_dirichlet)

        rootnode = tree.createNode(game.state)
        currentnode = rootnode

        gameover = currentnode.isterminal()

    # game has terminated. Then, exit while, and  :
    new_data_for_the_game = np.delete(new_data_for_the_game, 0, 0)

    game = Game(currentnode.state)
    gameover, winner = game.gameover()

    if config.use_z_last:
        #include last winning move? unclear because there we don't have probabilities => put uniform prob
        # default : don't use z_last
        flatten_state = game.state_flattener(currentnode.state)
        unmask_pi = np.ones(config.L) / config.L
        this_turn_data = np.hstack((flatten_state, unmask_pi, 0))
        new_data_for_the_game = np.vstack(
            (new_data_for_the_game, this_turn_data))

    #update the z's and winner stats
    wp1 = 0  # win player 1, etc
    wp2 = 0
    winstart = 0
    winsecond = 0
    draw = 0

    # backfill the z such as it becomes the actual reward from the current's player point of view:
    history_size = new_data_for_the_game.shape[0]

    if winner == 0:
        z = 0
        draw = 1

    elif winner == 1:
        if config.favorlonggames:
            z = 1 - config.long_game_factor * history_size / 42  #the reward is bigger for shorter games
        else:
            z = 1
        winstart += 1
        if whostarts == 'player1':
            wp1 = 1
        else:
            wp2 = 1

    elif winner == -1:
        winsecond += 1
        if config.favorlonggames:
            z = -1 + config.long_game_factor * history_size / 42  #the reward is less negative for long games
        else:
            z = -1
        if whostarts == 'player1':
            wp2 = 1
        else:
            wp1 = 1

    z_vec = np.zeros(history_size)

    for i in range(history_size):
        z_vec[i] = ((-1)**i) * z

    new_data_for_the_game[:, -1] = z_vec

    #data extension using parity along the x axis
    board_size = config.L * config.H

    if config.data_extension:
        extend_data = np.zeros(
            (new_data_for_the_game.shape[0], new_data_for_the_game.shape[1]))

        for i in range(extend_data.shape[0]):
            board = np.copy(new_data_for_the_game[i,
                                                  0:3 * board_size]).reshape(
                                                      (3, config.H, config.L))
            yellowboard = board[0]
            redboard = board[1]
            player_turn = board[2]

            #parity operation on array for both yellow and red boards
            flip_yellow = np.fliplr(yellowboard)
            flip_red = np.fliplr(redboard)

            extend_data[i, 0:board_size] = flip_yellow.flatten()
            extend_data[i, board_size:2 * board_size] = flip_red.flatten()
            extend_data[i,
                        2 * board_size:3 * board_size] = player_turn.flatten()

            # parity operation on the Pi's
            pi_s = np.copy(
                new_data_for_the_game[i, 3 * board_size:3 * board_size +
                                      config.L])
            flip_pi = np.flip(pi_s, axis=0)

            extend_data[i, 3 * board_size:3 * board_size + config.L] = flip_pi
            extend_data[i, -1] = np.copy(new_data_for_the_game[i, -1])

        #stack
        new_data_for_the_game = np.vstack((new_data_for_the_game, extend_data))

    #save data of self play in a file indexed by the CPU used.
    mydata = {
        'data': [
            new_data_for_the_game, wp1, wp2, draw, winstart, winsecond,
            history_size
        ]
    }
    filename = './data/createdata' + str(index) + '.txt'
    with open(filename, 'wb') as file:
        pickle.dump(mydata, file)
    file.close()
Beispiel #14
0
 def PUCT(self, child, cpuct):
     game = Game()
     col_of_child = game.convert_move_to_col_index(child.move)
     return child.Q + cpuct*child.parent.proba_children[col_of_child]*np.sqrt(child.parent.N)/(1+child.N)