Ejemplo n.º 1
0
print("pytorch version: ", torch.__version__)


# load the network
path_list = os.listdir(network_dir)
path_list.sort(key=utils.natural_keys)


# load the test set with the solved positions
test_set = pd.read_csv(test_set_path, sep=",")


# test the best network to quickly get a result
net_path = network_dir + path_list[-1]
net = data_storage.load_net(net_path, evaluation.torch_device)
policy_error, value_error = evaluation.net_prediction_error(net, test_set)
logger.debug("prediction-error: {}, value-error: {}, network: {}".format(policy_error, value_error, net_path))


# calculate the prediction error of the networks
generation = []
net_prediciton_error = []
net_value_error = []
mcts_prediciton_error = []
path_list = os.listdir(network_dir)
path_list.sort(key=utils.natural_keys)


# empty board test
board = connect4.Connect4Board()
Ejemplo n.º 2
0
def __self_play_worker__(network_path, game_count):
    """
    plays a number of self play games
    :param network_path:        path of the network
    :param game_count:          the number of self-play games to play
    :return:                    a list of dictionaries with all training examples
    """
    # load the network
    net = data_storage.load_net(network_path, Config.evaluation_device)

    training_expl_list = []

    # initialize the mcts object for all games
    mcts_list = [MCTS() for _ in range(game_count)]

    # initialize the lists that keep track of the game
    player_list = [[] for _ in range(game_count)]
    state_list = [[] for _ in range(game_count)]
    state_id_list = [[] for _ in range(game_count)]
    policy_list = [[] for _ in range(game_count)]

    move_count = 0
    all_terminated = False
    while not all_terminated:
        # ===========================================  append the correct values to the lists for the training data
        for i_mcts_ctx, mcts_ctx in enumerate(mcts_list):
            # skip terminated games
            if mcts_ctx.board.terminal:
                continue

            # add regular board
            state, player = mcts_ctx.board.white_perspective()
            state_id = mcts_ctx.board.state_id()
            state_list[i_mcts_ctx].append(state)
            state_id_list[i_mcts_ctx].append(state_id)
            player_list[i_mcts_ctx].append(player)

            # add mirrored board
            board_mirrored = mcts_ctx.board.mirror()
            state_m, player_m = board_mirrored.white_perspective()
            state_id_m = board_mirrored.state_id()
            state_list[i_mcts_ctx].append(state_m)
            state_id_list[i_mcts_ctx].append(state_id_m)
            player_list[i_mcts_ctx].append(player_m)

        # =========================================== execute the mcts simulations for all boards
        mcts.run_simulations(mcts_list, Config.mcts_sim_count, net,
                             Config.alpha_dirich)

        # ===========================================  get the policy from the mcts
        temp = 0 if move_count >= Config.temp_threshold else Config.temp

        for i_mcts_ctx, mcts_ctx in enumerate(mcts_list):
            # skip terminated games
            if mcts_ctx.board.terminal:
                continue

            policy = mcts_list[i_mcts_ctx].policy_from_state(
                mcts_ctx.board.state_id(), temp)
            policy_list[i_mcts_ctx].append(policy)

            # add the mirrored policy as well
            policy_m = np.flip(policy)
            policy_list[i_mcts_ctx].append(policy_m)

            # sample from the policy to determine the move to play
            move = np.random.choice(len(policy), p=policy)
            mcts_ctx.board.play_move(move)

        move_count += 1

        # ===========================================  check if there are still boards with running games
        all_terminated = True
        for mcts_ctx in mcts_list:
            if not mcts_ctx.board.terminal:
                all_terminated = False
                break

    # =========================================== add the training example
    for i_mcts_ctx, mcts_ctx in enumerate(mcts_list):
        reward = mcts_ctx.board.training_reward()
        for i_player, player in enumerate(player_list[i_mcts_ctx]):
            value = reward if player == CONST.WHITE else -reward

            # save the training example
            training_expl_list.append({
                "state":
                state_list[i_mcts_ctx][i_player],
                "state_id":
                state_id_list[i_mcts_ctx][i_player],
                "player":
                player,
                "policy":
                policy_list[i_mcts_ctx][i_player],
                "value":
                value
            })

    # free up some resources
    del net
    del mcts_list
    torch.cuda.empty_cache()

    return training_expl_list
Ejemplo n.º 3
0
def main_evaluation(game_class, result_folder):
    # configuration values
    game_count = 200        # the number of test games to play
    mcts_sim_count = 200     # the number of mcts simulations to perform
    temp = 0.3              # the temperature used to get the policy for the move selection, gives some randomness


    # the logger
    utils.init_logger(logging.DEBUG, file_name="log/app.log")
    logger = logging.getLogger('evaluation')

    # set the random seed
    random.seed(a=None, version=2)
    np.random.seed(seed=None)


    # load the network
    network_dir = config.save_dir + "/networks/"
    path_list = os.listdir(network_dir)
    path_list.sort(key=utils.natural_keys)


    # let all network play against the last generation without any mcts
    best_net_path = network_dir + path_list[-1]
    best_net = data_storage.load_net(best_net_path, torch_device)



    generation = []
    prediction_score = []
    for i in range(len(path_list)):
        generation.append(i)
        net_path = network_dir + path_list[i]
        net = data_storage.load_net(net_path, torch_device)
        score = net_vs_net_prediction(net, best_net, game_count, game_class)
        prediction_score.append(score)

        logger.debug("prediction score: {}, network: {}".format(score, net_path))



    # let all network play against the last generation with mcts
    mcts_score = []
    path_list = []      # [path_list[0], path_list[-2]]
    for i in range(len(path_list)):
        net_path = network_dir + path_list[i]
        net = data_storage.load_net(net_path, torch_device)
        score = net_vs_net_mcts(net, best_net, mcts_sim_count, temp, game_count, game_class)
        mcts_score.append(score)

        logger.debug("mcts_score score: {}, network: {}".format(score, net_path))


    # save the results
    np.save(result_folder +"/net_vs_net_pred.npy", np.array(prediction_score))
    np.save(result_folder + "/net_vs_net_mcts.npy", np.array(mcts_score))
    np.save(result_folder + "/net_vs_net_gen.npy", np.array(generation))


    # set the style of the plot
    plt.style.use('seaborn-dark-palette')


    # plot the prediction score
    fig1 = plt.figure(1)
    plt.plot(generation, prediction_score)
    axes = plt.gca()
    axes.set_ylim([0, 0.55])
    axes.grid(True, color=(0.9, 0.9, 0.9))
    plt.title("Prediction Score vs Best Network")
    plt.xlabel("Generation")
    plt.ylabel("Prediction Score")
    fig1.show()


    # # plot the mcts score
    # fig2 = plt.figure(2)
    # plt.plot(generation, mcts_score)
    # axes = plt.gca()
    # axes.set_ylim([0, 0.55])
    # axes.grid(True, color=(0.9, 0.9, 0.9))
    # plt.title("MCTS Prediction Score vs Best Network")
    # plt.xlabel("Generation")
    # plt.ylabel("MCTS Score")
    # fig2.show()

    plt.show()
Ejemplo n.º 4
0
# create the minimax state dict
minimax.create_state_dict()

# load the network
path_list = os.listdir(network_dir)
path_list.sort(key=utils.natural_keys)

# define the parameters for the evaluation
torch_device = torch.device('cpu')  # torch device that is used for evaluation
game_count = 300  # the number of games to play
mcts_sim_count = 20  # the number of mcts simulations

# test the best network to quickly get a result
net_path = network_dir + path_list[-1]
net = data_storage.load_net(net_path, torch_device)
white_score = minimax.play_minimax_games(net, game_count, mcts_sim_count,
                                         CONST.WHITE)
black_score = minimax.play_minimax_games(net, game_count, mcts_sim_count,
                                         CONST.BLACK)
logger.debug("white score: {}, black: {}, network: {}".format(
    white_score, black_score, net_path))

# let the different networks play against a minimax player
generation = []
white_scores = []
black_scores = []
path_list = os.listdir(network_dir)
path_list.sort(key=utils.natural_keys)

# get the prediction error of all networks
Ejemplo n.º 5
0
def __self_play_worker__(game_class, network_path, game_count):
    """
    plays a number of self play games
    :param game_class:          the class of the implemented games
    :param network_path:        path of the network
    :param game_count:          the number of self-play games to play
    :return:                    a list of dictionaries with all training examples
    """
    # load the network
    net = data_storage.load_net(network_path, config.evaluation_device)

    training_expl_list = []

    # initialize the mcts object for all games
    mcts_list = [MCTS(game_class()) for _ in range(game_count)]

    # initialize the lists that keep track of the games
    player_list = [[] for _ in range(game_count)]
    state_list = [[] for _ in range(game_count)]
    state_id_list = [[] for _ in range(game_count)]
    policy_list = [[] for _ in range(game_count)]

    move_count = 0
    all_terminated = False
    while not all_terminated:
        # =========================================== execute one mcts simulations for all boards
        mcts.run_simulations(mcts_list, config.mcts_sim_count, net,
                             config.alpha_dirich)

        # ===========================================  get the policy from the mcts
        temp = 0 if move_count >= config.temp_threshold else config.temp

        for i_mcts_ctx, mcts_ctx in enumerate(mcts_list):
            # skip terminated games
            if mcts_ctx.board.is_terminal():
                continue

            policy = mcts_list[i_mcts_ctx].policy_from_state(
                mcts_ctx.board.state_id(), temp)

            # add regular board
            state, player = mcts_ctx.board.white_perspective()
            state_id = mcts_ctx.board.state_id()
            state_list[i_mcts_ctx].append(state)
            state_id_list[i_mcts_ctx].append(state_id)
            player_list[i_mcts_ctx].append(player)
            policy_list[i_mcts_ctx].append(policy)

            # add symmetric boards
            board_symmetries, policy_symmetries = mcts_ctx.board.symmetries(
                policy)
            if board_symmetries is not None:
                for board_sym, policy_sym in zip(board_symmetries,
                                                 policy_symmetries):
                    state_s, player_s = board_sym.white_perspective()
                    state_id_s = board_sym.state_id()
                    state_list[i_mcts_ctx].append(state_s)
                    state_id_list[i_mcts_ctx].append(state_id_s)
                    player_list[i_mcts_ctx].append(player_s)

                    policy_list[i_mcts_ctx].append(policy_sym)

            # sample from the policy to determine the move to play
            action = np.random.choice(len(policy), p=policy)
            mcts_ctx.board.execute_action(action)

        move_count += 1

        # ===========================================  check if there are still boards with running games
        all_terminated = True
        for mcts_ctx in mcts_list:
            if not mcts_ctx.board.is_terminal():
                all_terminated = False
                break

    # =========================================== add the training example
    for i_mcts_ctx, mcts_ctx in enumerate(mcts_list):
        reward = mcts_ctx.board.training_reward()
        for i_player, player in enumerate(player_list[i_mcts_ctx]):
            value = reward if player == CONST.WHITE else -reward

            # save the training example
            training_expl_list.append({
                "state":
                state_list[i_mcts_ctx][i_player],
                "state_id":
                state_id_list[i_mcts_ctx][i_player],
                "player":
                player,
                "policy":
                policy_list[i_mcts_ctx][i_player],
                "value":
                value
            })

    # free up some resources
    del net
    del mcts_list
    torch.cuda.empty_cache()

    return training_expl_list