def board_config_test(model_cls):
    """
    Test the results obtained by a model in pre-defined board configurations
    and return a percentage of the success.

    model : Neural Network Model

    """

    game = Connect4Env(None)
    success = 0
    model = model_cls
    for elem in BOARD_CONFIGS:
        board = BOARD_CONFIGS[elem][0]
        act_to_do = BOARD_CONFIGS[elem][1]

        game.reset()
        game.board = board

        input_dict = {"obs": {}}
        # if model.use_conv:
        #     reshaped_board = np.expand_dims(board, axis=(0,-1))

        # else:
        #     reshaped_board = np.reshape(board, (1, board.shape[0] * board.shape[1]))
        action_mask = game.get_moves(True)
        input_dict["obs"]["state"] = board  #reshaped_board
        input_dict["obs"]["action_mask"] = action_mask
        action_logits, _ = model.forward(input_dict, None, None)
        act = np.argmax(action_logits)

        if act in act_to_do:
            success += 1

    num_of_configs = len(BOARD_CONFIGS)
    success_rate = success / num_of_configs
    return success_rate, success, num_of_configs
def player_vs_AI(screen,model,lstm_model,weights,close_window = True ):
    
    global agent
    game = Connect4Env(None,width=Connect3Config.WIDTH,
        height=Connect3Config.HEIGHT,
        n_actions=Connect3Config.N_ACTIONS,
        connect=Connect3Config.CONNECT,
    )
    board = game.board    
    board_to_print = np.transpose(board)
    draw_board(board_to_print,screen)
    
    myfont = pygame.font.SysFont("monospace", 55)

    full_game = []
    timestep = 0
    game_over = False
    actions = {}
    print("printing agent = " + str(agent))
    if agent is not None:
        print("Agent chosen using the model")
        w2_indx = lvl_to_indx(int(agent))
        w2_key = list(weights.keys())[w2_indx]
        w2 = weights[w2_key]
        lvl = indx_to_lvl(w2_indx)
        # lvl = weights.keys().index(w2)
    else:
        print("Agent chosen randomly...")
        w2_indx = np.random.choice(range(len(weights)))
        w2_key = list(weights.keys())[w2_indx]
        w2 = weights[w2_key]
        lvl = indx_to_lvl(w2_indx)
    
    if randomize:
        starting_player = random.choice([player1_ID, player2_ID])
    else:
        starting_player = player1_ID
    game.reset(starting_player=starting_player,randomize=False)
    
    board_plus_action_total = []
    done = {}
    done["__all__"] = False
    
    
    print("You are now playing against an agent of level " + str(lvl) )
    
    while not game_over:
        timestep += 1
        actual_player = game.current_player
        board = game.board
        board_p2 = game.board_p2
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                sys.exit()
    
            if event.type == pygame.MOUSEMOTION:
                pygame.draw.rect(screen, BLACK, (0,0, width, SQUARESIZE))
                posx = event.pos[0]
                if actual_player == player1_ID:
                    pygame.draw.circle(screen, RED, (posx, int(SQUARESIZE/2)), RADIUS)
    
            pygame.display.update()
    
            if event.type == pygame.MOUSEBUTTONDOWN:
                pygame.draw.rect(screen, BLACK, (0,0, width, SQUARESIZE))
                #print(event.pos)
                # Ask for Player 1 Input                
                if actual_player == player1_ID:
                    action_mask = game.get_moves(False) 
                    posx = event.pos[0]
                    act = int(math.floor(posx/SQUARESIZE))
                    print("player 1 move: " + str(act))
                    if act in action_mask:
                        flattened_board = np.ndarray.flatten(board)
                        board_plus_actions = np.append(flattened_board,float(act))
                        board_plus_action_total.append([board_plus_actions])
                
                        actions[player1] = act                
                        _, rew, done, _ = game.step(actions)
    
                        #print_board(board)
                        board = game.board
                        board_to_print = np.transpose(board)
                        draw_board(board_to_print,screen)
    
    
        # # Ask for Player 2 Input
        if actual_player == player2_ID:
            input_dict = {"obs": {}}
            action_mask = game.get_moves(True)
            input_dict["obs"]["state"] = board_p2 #reshaped_board
            input_dict["obs"]["action_mask"] = action_mask
            action_logits, _ = model.forward(input_dict, None, None)
            if timestep > number_of_stochastic_moves:
                act = np.argmax(action_logits[0])
            elif timestep <= number_of_stochastic_moves:
                action_prob = [np.exp(single_log)/sum(np.exp(action_logits[0])) for single_log in action_logits[0]]
                act = np.random.choice([0,1,2,3,4],1,p=action_prob)[0]  
                
            actions[player2] = act
            pygame.time.wait(1000)
            _, rew, done, _ = game.step(actions)
            print(game)
            #game.render()    
            board = game.board
            board_to_print = np.transpose(board)
            draw_board(board_to_print,screen)
                
                
        if done["__all__"]:
            # ADD ENCODED GAME TO THE LISt
            print(rew)
            if rew["player1"] == 1.0:
                print("Player 1 won!!")
                label = myfont.render("Player 1 won!!", 1, RED)
                screen.blit(label, (40, 10))
                pygame.display.update()
                pygame.time.wait(1000)

            elif rew["player1"] == -1.0:
                print("Player 2 won!!")
                label = myfont.render("Player 2 won!!", 1, YELLOW)
                screen.blit(label, (40,10))
                pygame.display.update()
                pygame.time.wait(1000)
                
            elif rew["player1"] == 0.0:
                print("Draw")
                label = myfont.render("Draw!!", 1, WHITE)
                screen.blit(label, (40,10))
                pygame.display.update()
                pygame.time.wait(1000)
                
            if len(board_plus_action_total) < sequence_len:
                print("Game finished too early, restarting...")
                timestep = 0
                game.reset(randomize=True)
                game_over = False
                continue
            
            game_over = True
            board_plus_action_and_outcome = board_plus_action_total
            
            for j in range(len(board_plus_action_and_outcome)-(sequence_len-1)):
                full_game.append([])
                full_game[-1].append(board_plus_action_and_outcome[j:j+sequence_len])
        

        if game_over:
            full_game = np.asarray(full_game)
            full_game = np.squeeze(full_game)
            if len(full_game.shape) == 2:
                full_game = np.expand_dims(full_game, axis=0)
            full_game = full_game.astype("float32")
            y = lstm_model(full_game,training=False)
                    
            predicted_values = tf.math.reduce_mean(y,axis=0)
            predicted_indx = tf.math.argmax(predicted_values) 
            agent = predicted_indx
        
            print("Model output probability: " + str(predicted_values.numpy()))
            pygame.time.wait(3000)
            
    if close_window:
        pygame.display.quit()
 # generate a fake input to define the model stucture and then load the weights 
 # [batch,timestep,features]
 # random_input = np.random.rand(1,lstm_timesteps,features_len)
 random_input = np.random.rand(1,lstm_timesteps,features_len)
 random_input = random_input.astype('float32')
 lstm_model(random_input)
 lstm_model.set_weights(lstm_weights[()])
 
 randomize = True
 player1_ID = Connect3Config.PLAYER1_ID
 player2_ID = Connect3Config.PLAYER2_ID
 player1 = Connect3Config.PLAYER1
 player2 = Connect3Config.PLAYER2
 game = Connect4Env(None,width=Connect3Config.WIDTH,
     height=Connect3Config.HEIGHT,
     n_actions=Connect3Config.N_ACTIONS,
     connect=Connect3Config.CONNECT,
 )
 
 
 # =============================================================================
 # PYGAME
 # =============================================================================
 # pygame.init()
 # #pygame.display.update()
 # screen = pygame.display.set_mode(size)
 # pygame.display.set_caption("Connect 3")
 # player_vs_AI(screen,model,lstm_model) 
 
 app = tkinterApp(model,lstm_model,weights) 
 app.mainloop() 
Beispiel #4
0
def model_vs_model_stochastic(model1,
                              model2,
                              number_of_games,
                              discount_rate,
                              randomize=True):
    """
    Compute the results of different games between 2 models. Instead of picking
    the best action, we pick an action with probability equals to the action 
    probability

    Parameters
    ----------
    model1 : Neural Network model
    model2 : Neural Network model
    number_of_games : int
        number of games to play 
    discount_rate : float
        scale the final reward 
    randomize : Bool
        decide if the first player to move is decided randomly

    Returns
    -------
    """
    game = Connect4Env(None)
    # how long a game last
    final_value = 0
    p1_win = 0
    for i in range(number_of_games):
        timestep = 0
        reward = 0
        game_over = False
        actions = {}
        if randomize:
            starting_player = random.choice([player1_ID, player2_ID])
        else:
            starting_player = player1_ID
        game.reset(starting_player=starting_player)
        while not game_over:
            timestep += 1
            actual_player = game.current_player
            board = game.board
            board_p2 = game.board_p2
            if actual_player == player1_ID:
                input_dict = {"obs": {}}
                # if model1.use_conv:
                #     reshaped_board = np.expand_dims(board, axis=(0,-1))

                # else:
                #     reshaped_board = np.reshape(board, (1, board.shape[0] * board.shape[1]))
                action_mask = game.get_moves(True)
                input_dict["obs"]["state"] = board  #reshaped_board
                input_dict["obs"]["action_mask"] = action_mask
                action_logits, _ = model1.forward(input_dict, None, None)
                action_prob = [
                    np.exp(single_log) / sum(np.exp(action_logits[0]))
                    for single_log in action_logits[0]
                ]
                act = np.random.choice([0, 1, 2, 3, 4, 5, 6], 1,
                                       p=action_prob)[0]
                #act = np.argmax(action_logits[0])

                actions[player1] = act
                _, rew, done, _ = game.step(actions)
            elif actual_player == player2_ID:
                input_dict = {"obs": {}}
                # if model2.use_conv:
                #     reshaped_board = np.expand_dims(board_p2, axis=(0,-1))

                # else:
                #     reshaped_board = np.reshape(board_p2, (1, board_p2.shape[0] * board_p2.shape[1]))
                action_mask = game.get_moves(True)
                input_dict["obs"]["state"] = board_p2  #reshaped_board
                input_dict["obs"]["action_mask"] = action_mask
                action_logits, _ = model2.forward(input_dict, None, None)
                action_prob = [
                    np.exp(single_log) / sum(np.exp(action_logits[0]))
                    for single_log in action_logits[0]
                ]
                act = np.random.choice([0, 1, 2, 3, 4, 5, 6], 1,
                                       p=action_prob)[0]
                actions[player2] = act
                _, rew, done, _ = game.step(actions)
            else:
                raise ValueError("Player index is not valid, should be 0 or 1")

            if done["__all__"]:
                game_over = True
                reward = rew["player2"]
                if rew["player1"] == 1.0:
                    p1_win += 1.0

                if discount_rate == 1:
                    final_value += reward
                else:
                    final_value += (discount_rate**timestep) * reward

    final_value = final_value / number_of_games
    p1_win_rate = p1_win / number_of_games

    return final_value, p1_win_rate
Beispiel #5
0
def model_vs_model_connect3_generate_data(model1,
                                          model2,
                                          number_of_games,
                                          discount_rate,
                                          randomize=True,
                                          number_of_stochastic_moves=0):
    """ 
    generates game data from games with the board seen by model1 pov

    """
    game = Connect4Env(
        None,
        width=Connect3Config.WIDTH,
        height=Connect3Config.HEIGHT,
        n_actions=Connect3Config.N_ACTIONS,
        connect=Connect3Config.CONNECT,
    )
    # how long a game last
    final_value = 0
    p1_win = 0
    # game encoded as a string
    games_list = []
    number_of_equal_games = 0

    for i in range(number_of_games):
        timestep = 0
        reward = 0
        game_over = False
        actions = {}
        encoded_game = []
        if randomize:
            starting_player = random.choice([player1_ID, player2_ID])
            if starting_player == player1_ID:
                encoded_game.append("p1_")
            elif starting_player == player2_ID:
                encoded_game.append("p2_")
        else:
            starting_player = player1_ID
            encoded_game.append("p1_")
        game.reset(starting_player=starting_player, randomize=False)
        while not game_over:
            timestep += 1
            actual_player = game.current_player
            board = game.board
            board_p2 = game.board_p2

            if actual_player == player1_ID:
                input_dict = {"obs": {}}
                action_mask = game.get_moves(True)
                input_dict["obs"]["state"] = board
                input_dict["obs"]["action_mask"] = action_mask
                action_logits, _ = model1.forward(input_dict, None, None)
                if timestep > number_of_stochastic_moves:
                    act = np.argmax(action_logits[0])
                elif timestep <= number_of_stochastic_moves:
                    action_prob = [
                        np.exp(single_log) / sum(np.exp(action_logits[0]))
                        for single_log in action_logits[0]
                    ]
                    act = np.random.choice([0, 1, 2, 3, 4], 1,
                                           p=action_prob)[0]

                encoded_game.append(str(act))
                actions[player1] = act
                _, rew, done, _ = game.step(actions)

            elif actual_player == player2_ID:
                input_dict = {"obs": {}}
                action_mask = game.get_moves(True)
                input_dict["obs"]["state"] = board_p2  #reshaped_board
                input_dict["obs"]["action_mask"] = action_mask
                action_logits, _ = model2.forward(input_dict, None, None)
                if timestep > number_of_stochastic_moves:
                    act = np.argmax(action_logits[0])
                elif timestep <= number_of_stochastic_moves:
                    action_prob = [
                        np.exp(single_log) / sum(np.exp(action_logits[0]))
                        for single_log in action_logits[0]
                    ]
                    act = np.random.choice([0, 1, 2, 3, 4], 1,
                                           p=action_prob)[0]

                encoded_game.append(str(act))
                actions[player2] = act
                _, rew, done, _ = game.step(actions)

            else:
                raise ValueError("Player index is not valid, should be 0 or 1")

            if done["__all__"]:
                game_over = True
                game_str = ''.join(encoded_game)
                # ADD ENCODED GAME TO THE LISt
                if game_str in games_list:
                    number_of_equal_games += 1
                elif game_str not in games_list:
                    games_list.append(game_str)

                reward = rew["player2"]
                if rew["player1"] == 1.0:
                    p1_win += 1.0

                if discount_rate == 1:
                    final_value += reward
                else:
                    final_value += (discount_rate**timestep) * reward
    print("The number of equal games is: " + str(number_of_equal_games))
    final_value = final_value / number_of_games
    p1_win_rate = p1_win / number_of_games

    return final_value, p1_win_rate, games_list
Beispiel #6
0
def model_vs_model_connect3(model1,
                            model2,
                            number_of_games,
                            discount_rate,
                            randomize=True,
                            number_of_stochastic_moves=0):

    game = Connect4Env(
        None,
        width=Connect3Config.WIDTH,
        height=Connect3Config.HEIGHT,
        n_actions=Connect3Config.N_ACTIONS,
        connect=Connect3Config.CONNECT,
    )
    # how long a game last
    final_value = 0
    p1_win = 0
    for i in range(number_of_games):
        timestep = 0
        reward = 0
        game_over = False
        actions = {}
        if randomize:
            starting_player = random.choice([player1_ID, player2_ID])
        else:
            starting_player = player1_ID
        game.reset(starting_player=starting_player)
        while not game_over:
            timestep += 1
            actual_player = game.current_player
            board = game.board
            board_p2 = game.board_p2
            if actual_player == player1_ID:
                input_dict = {"obs": {}}
                action_mask = game.get_moves(True)
                input_dict["obs"]["state"] = board
                input_dict["obs"]["action_mask"] = action_mask
                action_logits, _ = model1.forward(input_dict, None, None)
                if timestep > number_of_stochastic_moves:
                    act = np.argmax(action_logits[0])
                elif timestep <= number_of_stochastic_moves:
                    action_prob = [
                        np.exp(single_log) / sum(np.exp(action_logits[0]))
                        for single_log in action_logits[0]
                    ]
                    act = np.random.choice([0, 1, 2, 3, 4], 1,
                                           p=action_prob)[0]
                actions[player1] = act
                _, rew, done, _ = game.step(actions)
            elif actual_player == player2_ID:
                input_dict = {"obs": {}}
                action_mask = game.get_moves(True)
                input_dict["obs"]["state"] = board_p2  #reshaped_board
                input_dict["obs"]["action_mask"] = action_mask
                action_logits, _ = model2.forward(input_dict, None, None)
                if timestep > number_of_stochastic_moves:
                    act = np.argmax(action_logits[0])
                elif timestep <= number_of_stochastic_moves:
                    action_prob = [
                        np.exp(single_log) / sum(np.exp(action_logits[0]))
                        for single_log in action_logits[0]
                    ]
                    act = np.random.choice([0, 1, 2, 3, 4], 1,
                                           p=action_prob)[0]
                actions[player2] = act
                _, rew, done, _ = game.step(actions)

            else:
                raise ValueError("Player index is not valid, should be 0 or 1")
            if done["__all__"]:
                game_over = True
                reward = rew["player2"]
                if rew["player1"] == 1.0:
                    p1_win += 1.0

                if discount_rate == 1:
                    final_value += reward
                else:
                    final_value += (discount_rate**timestep) * reward

    final_value = final_value / number_of_games
    p1_win_rate = p1_win / number_of_games

    return final_value, p1_win_rate
Beispiel #7
0
def model_vs_minimax_connect3_stochastic(model,
                                         depth,
                                         number_of_games,
                                         checkpoint=None,
                                         logger=None,
                                         randomize=True):
    """
        used to evaluate the model against the minimax algorithm.
        The action chosen by the model is picked following the distribution
        of the action probabilities. 
    
    """

    game = Connect4Env(
        None,
        width=Connect3Config.WIDTH,
        height=Connect3Config.HEIGHT,
        n_actions=Connect3Config.N_ACTIONS,
        connect=Connect3Config.CONNECT,
    )
    model_name = model.name
    if logger:
        logger.info("**********" + str(model_name) +
                    "_(X) VS (O)_MINIMAX_depth_" + str(depth) + "**********")
    print("Starting Evaluation")
    for i in range(number_of_games):
        game_over = False
        actions = {}
        if randomize:
            starting_player = random.choice([player1_ID, player2_ID])
        else:
            starting_player = player1_ID
        game.reset(starting_player=starting_player, randomize=False)
        while not game_over:
            actual_player = game.current_player
            board = game.board
            if actual_player == player1_ID:
                input_dict = {"obs": {}}
                # if model.use_conv:
                #     reshaped_board = np.expand_dims(board, axis=(0,-1))

                # else:
                #     reshaped_board = np.reshape(board, (1, board.shape[0] * board.shape[1]))
                action_mask = game.get_moves(True)
                input_dict["obs"]["state"] = board  #reshaped_board
                input_dict["obs"]["action_mask"] = action_mask
                action_logits, _ = model.forward(input_dict, None, None)
                action_prob = [
                    np.exp(single_log) / sum(np.exp(action_logits[0]))
                    for single_log in action_logits[0]
                ]
                act = np.random.choice([0, 1, 2, 3, 4], 1, p=action_prob)[0]
                actions[player1] = act
                _, _, done, _ = game.step(actions)
            elif actual_player == player2_ID:
                act, _ = minimax_connect3(board, player2_ID, True, depth=depth)
                actions[player2] = act
                _, _, done, _ = game.step(actions)
            else:
                raise ValueError("Player index is not valid, should be 0 or 1")
            if logger:
                logger.info("Game number " + str(i) + "/" +
                            str(number_of_games))
                logger.info("Player " + str(actual_player + 1) + " actions: " +
                            str(act))
                logger.info("\n" + repr(board))
                logger.info(
                    board_print(board, Connect3Config.HEIGHT,
                                Connect3Config.WIDTH))

            if done["__all__"]:
                if logger:
                    logger.info("PLAYER " + str(game.winner + 1) + " WON...")
                    logger.info("CURRENT SCORE: " + str(game.score[player1]) +
                                " VS " + str(game.score[player2]))
                game_over = True

    # score = game.score[player1] / number_of_games + game.num_draws / (
    #     2 * number_of_games
    # )
    print("Evaluation Over")

    elo_diff = compute_elo_difference(game.score[player1], game.num_draws,
                                      number_of_games)

    # print("\nplayer 1 score: " + str(game.score[player1]))
    # print("player 2 score: " + str(game.score[player2]))
    # print("number of draw: " + str(game.num_draws))
    # print(
    #     "elo difference computed over "
    #     + str(number_of_games)
    #     + " between the 2 algortithms is "
    #     + str(elo_diff)
    # )

    return elo_diff, game.score[player1], game.score[player2], game.num_draws
Beispiel #8
0
def model_vs_minimax_connect3(model,
                              depth,
                              number_of_games,
                              checkpoint=None,
                              logger=None,
                              randomize=True):
    """
        used to evaluate the model against the minimax algorithm.
        The action chosen by the model is picked following the distribution
        of the action probabilities. 
    
    """

    game = Connect4Env(
        None,
        width=Connect3Config.WIDTH,
        height=Connect3Config.HEIGHT,
        n_actions=Connect3Config.N_ACTIONS,
        connect=Connect3Config.CONNECT,
    )
    model_name = model.name
    if logger:
        logger.info("**********" + str(model_name) +
                    "_(X) VS (O)_MINIMAX_depth_" + str(depth) + "**********")
    print("Starting Evaluation")
    for i in range(number_of_games):
        game_over = False
        actions = {}
        if randomize:
            starting_player = random.choice([player1_ID, player2_ID])
        else:
            starting_player = player1_ID
        game.reset(starting_player=starting_player, randomize=False)
        while not game_over:
            actual_player = game.current_player
            board = game.board
            if actual_player == player1_ID:
                input_dict = {"obs": {}}
                action_mask = game.get_moves(True)
                input_dict["obs"]["state"] = board  #reshaped_board
                input_dict["obs"]["action_mask"] = action_mask
                action_logits, _ = model.forward(input_dict, None, None)
                act = np.argmax(action_logits[0])
                actions[player1] = act
                _, _, done, _ = game.step(actions)
            elif actual_player == player2_ID:
                act, _ = minimax_connect3(board, player2_ID, True, depth=depth)
                actions[player2] = act
                _, _, done, _ = game.step(actions)
            else:
                raise ValueError("Player index is not valid, should be 0 or 1")
            if logger:
                logger.info("Game number " + str(i) + "/" +
                            str(number_of_games))
                logger.info("Player " + str(actual_player + 1) + " actions: " +
                            str(act))
                logger.info("\n" + repr(board))
                logger.info(
                    board_print(board, Connect3Config.HEIGHT,
                                Connect3Config.WIDTH))

            if done["__all__"]:
                if logger:
                    logger.info("PLAYER " + str(game.winner + 1) + " WON...")
                    logger.info("CURRENT SCORE: " + str(game.score[player1]) +
                                " VS " + str(game.score[player2]))
                game_over = True

    print("Evaluation Over")

    elo_diff = compute_elo_difference(game.score[player1], game.num_draws,
                                      number_of_games)
    return elo_diff, game.score[player1], game.score[player2], game.num_draws
Beispiel #9
0
def minimax_vs_random_elo(
    depth,
    number_of_games,
    logger,
):
    """
    Use the inverse of the elo formula to compute the outcome of a match,
    given that we already know the result of a match.
    It tests the relative elo between random algorithm and a minimax algorithm
    
    Elo formula:
        expected_score = 1/(1+10^((elo_diff)/400)) 
    Inverse formula: 
        elo_diff = -400*log(1/expected_score - 1)
    
    INPUT:
    depth: int
        number of game to play before updating the elo
    number_of_games: int
        number of game to play before updating the elo
    RETURN:
    elo_diff: float
        elo difference
    """
    game = Connect4Env(None)
    if logger:
        logger.info("**********MINIMAX_depth_" + str(depth) +
                    "_(X) VS (O)_RANDOM" + "**********")
    for i in tqdm(range(number_of_games)):
        game_over = False
        actions = {}
        starting_player = random.choice([player1_ID, player2_ID])
        game.reset(starting_player=starting_player)
        print("\nPlayer " + str(starting_player + 1) + " is starting")
        while not game_over:
            actual_player = game.current_player
            board = game.board
            if actual_player == player1_ID:
                act, action_values = minimax(board,
                                             player1_ID,
                                             True,
                                             depth=depth,
                                             return_distr=True)
                actions[player1] = act
                _, _, done, _ = game.step(actions)
            elif actual_player == player2_ID:
                act = random.choice(game.get_moves(False))
                actions[player2] = act
                _, _, done, _ = game.step(actions)
            else:
                raise ValueError("Player index is not valid, should be 0 or 1")
            if logger:
                logger.info("Game number " + str(i) + "/" +
                            str(number_of_games))
                if actual_player == player1_ID:
                    logger.info("action distribution: " + str(action_values))
                logger.info("Player " + str(actual_player + 1) + " actions: " +
                            str(act))
                logger.info("\n" + repr(board))
                logger.info(board_print(board))

            if done["__all__"]:
                logger.info("PLAYER " + str(game.winner + 1) + " WON...")
                logger.info("CURRENT SCORE: " + str(game.score[player1]) +
                            " VS " + str(game.score[player2]))
                game_over = True

    if game.score[player1] > game.score[player2]:
        score = game.score[player1] / number_of_games + game.num_draws / (
            2 * number_of_games)
    elif game.score[player1] < game.score[player2]:
        score = game.score[player2] / number_of_games + game.num_draws / (
            2 * number_of_games)
    elif game.score[player1] == game.score[player2]:
        return 0

    if score >= 10 / 11:
        elo_diff = 400
    else:
        elo_diff = -400 * math.log((1 / score - 1), 10)

    print("\nplayer 1 score: " + str(game.score[player1]))
    print("player 2 score: " + str(game.score[player2]))
    print("number of draw: " + str(game.num_draws))
    print("elo difference computed over " + str(number_of_games) +
          " between the 2 algortithms is " + str(elo_diff))

    return elo_diff
Beispiel #10
0
def model_vs_minimax(model,
                     depth,
                     number_of_games,
                     checkpoint=None,
                     logger=None,
                     randomize=True):
    """
    Use the inverse of the elo formula to compute the outcome of a match,
    given that we already know the result of a match.
    It tests the relative elo between a custom model and a minimax algorithm
    
    Elo formula:
        expected_score = 1/(1+10^((elo_diff)/400)) 
    Inverse formula: 
        elo_diff = -400*log(1/score - 1)
    INPUT:
    model: 
        tensorflow model
    checkpoint:
        path to chekpoint of the model to use
    """

    game = Connect4Env(None)
    model_name = model.name
    if logger:
        logger.info("**********" + str(model_name) +
                    "_(X) VS (O)_MINIMAX_depth_" + str(depth) + "**********")
    print("Starting Evaluation")
    for i in range(number_of_games):
        game_over = False
        actions = {}
        if randomize:
            starting_player = random.choice([player1_ID, player2_ID])
        else:
            starting_player = player1_ID
        game.reset(starting_player=starting_player)
        while not game_over:
            actual_player = game.current_player
            board = game.board
            if actual_player == player1_ID:
                input_dict = {"obs": {}}
                action_mask = game.get_moves(True)
                input_dict["obs"]["state"] = board  #reshaped_board
                input_dict["obs"]["action_mask"] = action_mask
                action_logits, _ = model.forward(input_dict, None, None)
                act = np.argmax(action_logits[0])
                actions[player1] = act
                _, _, done, _ = game.step(actions)
            elif actual_player == player2_ID:
                act, _ = minimax(board, player2_ID, True, depth=depth)
                actions[player2] = act
                _, _, done, _ = game.step(actions)
            else:
                raise ValueError("Player index is not valid, should be 0 or 1")
            if logger:
                logger.info("Game number " + str(i) + "/" +
                            str(number_of_games))
                logger.info("Player " + str(actual_player + 1) + " actions: " +
                            str(act))
                logger.info("\n" + repr(board))
                logger.info(board_print(board))

            if done["__all__"]:
                logger.info("PLAYER " + str(game.winner + 1) + " WON...")
                logger.info("CURRENT SCORE: " + str(game.score[player1]) +
                            " VS " + str(game.score[player2]))
                game_over = True
    print("Evaluation Over")

    elo_diff = compute_elo_difference(game.score[player1], game.num_draws,
                                      number_of_games)
    return elo_diff, game.score[player1], game.score[player2], game.num_draws
Beispiel #11
0
def minimax_vs_minimax_connect3_elo(depth1,
                                    depth2,
                                    number_of_games,
                                    logger=None):
    """
    Use the inverse of the elo formula to compute the outcome of a match,
    given that we already know the result of a match.
    It tests the relative elo between minimax algorithm and a minimax algorithm
    
    Elo formula:
        expected_score = 1/(1+10^((elo_diff)/400)) 
    Inverse formula: 
        elo_diff = -400*log(1/expected_score - 1)
    """
    game = Connect4Env(
        None,
        width=Connect3Config.WIDTH,
        height=Connect3Config.HEIGHT,
        n_actions=Connect3Config.N_ACTIONS,
        connect=Connect3Config.CONNECT,
    )
    if logger:
        logger.info("**********MINIMAX_depth_" + str(depth1) +
                    "_(X) VS (O)_MINIMAX_depth_" + str(depth2) + "**********")
    for i in tqdm(range(number_of_games)):
        game_over = False
        actions = {}
        starting_player = random.choice([player1_ID, player2_ID])
        game.reset(starting_player=starting_player)
        print("\nPlayer " + str(starting_player + 1) + " is starting")
        while not game_over:
            actual_player = game.current_player
            board = game.board
            if actual_player == player1_ID:
                act, _ = minimax_connect3(board,
                                          player1_ID,
                                          True,
                                          depth=depth1)
                actions[player1] = act
                _, _, done, _ = game.step(actions)
            elif actual_player == player2_ID:
                act, _ = minimax_connect3(board,
                                          player2_ID,
                                          True,
                                          depth=depth2)
                actions[player2] = act
                _, _, done, _ = game.step(actions)
            else:
                raise ValueError("Player index is not valid, should be 0 or 1")
            if logger:
                logger.info("Game number " + str(i) + "/" +
                            str(number_of_games))
                logger.info("Player " + str(actual_player + 1) + " actions: " +
                            str(act))
                logger.info("\n" + repr(board))
                logger.info(
                    board_print(board, Connect3Config.HEIGHT,
                                Connect3Config.WIDTH))

            if done["__all__"]:
                if logger:
                    logger.info("PLAYER " + str(game.winner + 1) + " WON...")
                    logger.info("CURRENT SCORE: " + str(game.score[player1]) +
                                " VS " + str(game.score[player2]))
                game_over = True

    if game.score[player1] > game.score[player2]:
        score = game.score[player1] / number_of_games + game.num_draws / (
            2 * number_of_games)
    elif game.score[player1] < game.score[player2]:
        score = game.score[player2] / number_of_games + game.num_draws / (
            2 * number_of_games)
    elif game.score[player1] == game.score[player2]:
        return 0

    if score >= 10 / 11:
        elo_diff = 400
    else:
        elo_diff = -400 * math.log((1 / score - 1), 10)

    print("\nplayer 1 score: " + str(game.score[player1]))
    print("player 2 score: " + str(game.score[player2]))
    print("number of draw: " + str(game.num_draws))
    print("elo difference computed over " + str(number_of_games) +
          " games between the 2 minimax of depth " + str(depth1) + " " +
          str(depth2) + " is " + str(elo_diff))

    return elo_diff
        input_dict["obs"]["state"] = board  #reshaped_board
        input_dict["obs"]["action_mask"] = action_mask
        action_logits, _ = model.forward(input_dict, None, None)
        act = np.argmax(action_logits)

        if act in act_to_do:
            success += 1

    num_of_configs = len(BOARD_CONFIGS)
    success_rate = success / num_of_configs
    return success_rate, success, num_of_configs


if __name__ == "__main__":
    # check if the given board configuration and actions are valid:
    env = Connect4Env(None)
    for elem in BOARD_CONFIGS:
        if elem.startswith("win_in_2"):
            continue
        actions_to_take = BOARD_CONFIGS[elem][1]
        board_config = BOARD_CONFIGS[elem][0]
        for act in actions_to_take:
            env.reset()
            env.board = board_config
            env.current_player = 0
            action = {"player1": act}
            obs, reward, done, info = env.step(action)
            if done["__all__"] != True:
                print("Configuration Error: " + str(elem) + ":")
                print("The configuration " + str(env) +
                      " does not end in a winning move " + str(act))