コード例 #1
0
 def __init__(self,number_actions=9,policy_value_model=None):
     self.Q = {}
     self.N = {}
     self.P = {}
     self.number_actions = number_actions
     self.tictactoe_functions = tictactoe_methods()
     self.model = model_wrapper(policy_value_model=policy_value_model)
コード例 #2
0
ファイル: train.py プロジェクト: aiatuci/AlphaZero
def pit(old_model,new_model,number_of_games=25):
    # test the models against each other and accpet the new model
    # if it wins 55% of the non tie games games
    tictactoe_functions = tictactoe_methods()
    winners = {0:0,1:0,2:0}
    # winners = {'tie':0,'old_model':0,'new_model':0}
    turn = 1
    # check the two models are different
    for game in range(number_of_games):
        board = tictactoe_functions.get_initial_board()
        if(game < number_of_games //2):
            # old_model goes first
            player1 = old_model
            player2 = new_model

        else:
            # new model goes first
            player1 = new_model 
            player2 = old_model

        for player_turn in range(9):

            if(player_turn % 2 == 0):
                # get player 1's action
                action_list = get_model_action(player1,board,turn)
                action = np.argmax(action_list) 
            else:
                action_list = get_model_action(player2,board,turn)
                action = np.argmax(action_list) 
                
            if(player_turn == 0):
                # first move completely random
                action = np.random.choice(9,1)[0]



            board = tictactoe_functions.get_next_board(board, action, turn)
            winner = tictactoe_functions.get_winner(board)
            if(winner != -1):
                # update winners array
                if(game >= number_of_games // 2):
                    # flip the winner so it corresponds with the correct model
                    if(winner == 2):
                        winner = 1
                    elif(winner == 1):
                        winner = 2

                winners[winner] += 1
                # clear both players mcts tree
                player1.clear_tree()
                player2.clear_tree()
                break
            turn = 2 if turn == 1 else 1
    return winners
コード例 #3
0
ファイル: train.py プロジェクト: AndrewLaird/alpha_tictactoe
def run_game(mcts_model, temp=1.0):

    tictactoe_functions = tictactoe_methods()
    board = tictactoe_functions.get_initial_board()
    experience = []
    turn = 1
    for game_step in range(10):

        # temp controls how likely
        # the tree will explore
        # 1 for exploration
        # 0 for the pit, always take the best action
        action_probs = get_game_action_probs(mcts_model,
                                             board,
                                             turn,
                                             temp=temp)

        #chose a action from these probabilities
        action = np.random.choice(9, 1, p=action_probs)[0]

        if (game_step == 0):
            # truly random for first step
            action = np.random.choice(9, 1)[0]

        # initially use a placeholder value for value
        # this experience is [obs, action, value]

        # quadruple our experience by rotating the board
        # if turn == 2 we have to flip the board for training
        training_board = board
        if (turn == 2):
            training_board = tictactoe_functions.flip_board(board)
        #experience.append([training_board,action_probs,-9999])

        # rotating boards to get more info
        rotated_boards = tictactoe_functions.get_rotated_boards(training_board)
        rotated_action_probs = tictactoe_functions.get_rotated_boards(
            action_probs)
        for i in range(4):
            experience.append(
                [rotated_boards[i], rotated_action_probs[i], -9999])

        board = tictactoe_functions.get_next_board(board, action, turn)

        winner = tictactoe_functions.get_winner(board)
        if (winner != -1):
            return winner, experience
            break
        turn = 2 if turn == 1 else 1
コード例 #4
0
def run_game_with_human(mcts_model, human_player_pos):

    tictactoe_functions = tictactoe_methods()
    board = tictactoe_functions.get_initial_board()
    turn = 1
    for game_steps in range(11):
        print("Turn #%d:" % turn)
        tictactoe_functions.pretty_print(board)
        if (turn % 2 == human_player_pos):
            # robot turn
            # simulating the games
            simulation_steps = 300
            for i in range(simulation_steps):
                #print('sim:',i)
                mcts_model.simulate_step(board, turn)

            # getting the actions from the mcts tree
            searched_board = board
            if (turn == 2):
                searched_board = tictactoe_functions.flip_board(board)
            actions_list = [n for n in mcts_model.get_N(searched_board)]

            action = np.argmax(actions_list)
            print("Visit counts:", actions_list)
            print("value of each next state:",
                  mcts_model.get_Q(searched_board))
            print("policy:", mcts_model.get_P(searched_board))
        else:
            # player turn
            x, y = [int(x) for x in input("input[0-2] x y: ").split()]
            action = y * 3 + x

        board = tictactoe_functions.get_next_board(board, action, turn)

        winner = tictactoe_functions.get_winner(board)
        if (winner != -1):
            tictactoe_functions.pretty_print(board)
            return winner
        turn = 2 if turn == 1 else 1