def main(argv):
    seed = np.random.randint(low=0, high=1e5)

    root_path = './' + "real_world" + "_" + FLAGS.meta_method + '_regret/'

    if not os.path.exists(root_path):
        os.makedirs(root_path)

    real_world_meta_games = load_pkl(
        '../real_world_games/real_world_meta_games.pkl')

    # game_types = ['10,4-Blotto', 'AlphaStar', 'Kuhn-poker', 'Random game of skill', 'Transitive game',
    #               'connect_four', 'quoridor(board_size=4)', 'misere(game=tic_tac_toe())', 'hex(board_size=3)',
    #               'go(board_size=4,komi=6.5)']

    # for game_type in game_types:

    checkpoint_dir = os.path.join(
        os.getcwd(), root_path) + FLAGS.game_type + '_' + str(seed)
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    sys.stdout = open(checkpoint_dir + '/stdout.txt', 'w+')

    print("================================================")
    print("======The current game is ", FLAGS.game_type, "=========")
    print("================================================")

    console(meta_games=real_world_meta_games[FLAGS.game_type],
            meta_method=FLAGS.meta_method,
            empirical_game_size=FLAGS.num_emp_strategies,
            num_samples=FLAGS.num_samples,
            checkpoint_dir=checkpoint_dir)
Exemple #2
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError("Too many command-line arguments.")

    seed = set_random_seed(FLAGS.seed)
    if not FLAGS.MRCP_deterministic:
        seed = None  # invalidate the seed so it does not get passed into psro_trainer

    # root_path = './' + "real_world" + "_supplement_" + FLAGS.closed_method + '/'
    root_path = './' + "real_world" + "_full_" + FLAGS.closed_method + '/'

    if not os.path.exists(root_path):
        os.makedirs(root_path)

    real_world_meta_games = load_pkl(
        './real_world_games/real_world_meta_games.pkl')

    # game_types = ['10,4-Blotto', 'AlphaStar', 'Kuhn-poker', 'Random game of skill', 'Transitive game',
    #               'connect_four', 'quoridor(board_size=4)', 'misere(game=tic_tac_toe())', 'hex(board_size=3)',
    #               'go(board_size=4,komi=6.5)']

    checkpoint_dir = FLAGS.game_type + "_" + str(seed)
    checkpoint_dir = os.path.join(os.getcwd(), root_path, checkpoint_dir) + '/'

    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    sys.stdout = open(checkpoint_dir + '/stdout.txt', 'w+')

    print("================================================")
    print("======The current game is ", FLAGS.game_type, "=========")
    print("================================================")

    if FLAGS.num_iterations > real_world_meta_games[
            FLAGS.game_type][0].shape[0]:
        num_iterations = real_world_meta_games[FLAGS.game_type][0].shape[0]
    else:
        num_iterations = FLAGS.num_iterations

    psro(meta_games=real_world_meta_games[FLAGS.game_type],
         game_type=FLAGS.game_type,
         num_rounds=10,
         seed=seed,
         checkpoint_dir=checkpoint_dir,
         num_iterations=num_iterations,
         closed_method=FLAGS.closed_method)
import matplotlib.pyplot as plt
import matplotlib as mpl
import math

idx = 3
round = '10'
game_type = "szs"
MSS = "FP"
# root_path = './data/' + game_type + '_' + MSS + '/' + game_type + '_' + MSS + str(idx) + '/'

root_path = './data/10,4Blotto/'

regret_path = root_path + '10,4-Blotto_90335regret_of_samples_' + round + '.pkl'
improvement_path = root_path + '10,4-Blotto_90335performance_improvement_' + round + '.pkl'

regret = load_pkl(regret_path)
improvement = load_pkl(improvement_path)

NE_regret = 0.13
NE_improvement = 0.0048
MRCP_regret = 0.055
MRCP_improvement = 0.00013


plt.scatter(regret, improvement)
plt.plot(NE_regret, NE_improvement, '-ro')
# plt.plot(MRCP_regret, MRCP_improvement, '-go')

plt.xticks(size = 17)
plt.yticks(size = 17)
from nash_solver.gambit_tools import load_pkl
import numpy as np

root_path = './data/'
meta_games = load_pkl(root_path + "meta_games.pkl")


import os
from nash_solver.gambit_tools import load_pkl
import matplotlib
matplotlib.use("TkAgg")
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from scipy.signal import savgol_filter
import math

load_path = os.getcwd() + '/data/data1/'
zero_sum_DO = load_pkl(load_path + 'zero_sum_DO.pkl')
zero_sum_FP = load_pkl(load_path + 'zero_sum_FP.pkl')
zero_sum_DO_FP = load_pkl(load_path + 'zero_sum_DO_SP.pkl')

zero_sum_DO = np.mean(zero_sum_DO, axis=0)
zero_sum_FP = np.mean(zero_sum_FP, axis=0)
zero_sum_DO_FP = np.mean(zero_sum_DO_FP, axis=0)

# idx = 6
# zero_sum_DO = zero_sum_DO[idx]
# zero_sum_FP = zero_sum_FP[idx]
# zero_sum_DO_FP = zero_sum_DO_FP[idx]

window_size = 15
order = 2

# Focus on fictitious play
fic_zero_sum_DO_FP = []
for i in range(len(zero_sum_DO_FP)):
    if i % 2 == 0:
Exemple #6
0
 def real_world_game(self, game_name):
     meta_games = load_pkl("./real_world_games/real_world_meta_games.pkl")
     return meta_games[game_name]
Exemple #7
0
def psro(generator,
         game_type,
         num_rounds,
         seed,
         checkpoint_dir,
         num_iterations=20,
         closed_method="alter"):
    if game_type == "zero_sum":
        meta_games = generator.zero_sum_game()
    elif game_type == "general_sum":
        meta_games = generator.general_sum_game()
    elif game_type == "symmetric_zero_sum":
        meta_games = generator.symmetric_zero_sum_game()
    elif game_type == "kuhn":
        kuhn_meta_games = load_pkl("./MRCP/kuhn_meta_game.pkl")
        meta_games = kuhn_meta_games[
            0]  # The first element of kuhn_meta_game.pkl is meta_games.
        generator.num_strategies = 64
    else:
        for pkl in os.listdir('efg_game'):
            print(pkl)
            if pkl.split('.pkl')[0] == game_type:
                with open('efg_game/' + pkl, 'rb') as f:
                    meta_games = pickle.load(f)
        if not 'meta_games' in locals():
            raise ValueError

    # for example 1 in paper
    # meta_games = [np.array([[0,-0.1,-3],[0.1,0,2],[3,-2,0]]),np.array([[0,0.1,3],[-0.1,0,-2],[-3,2,0]])]
    # generator.num_strategies = 3
    # num_rounds = 1
    # num_iterations = 10
    init_strategies = np.random.randint(0, meta_games[0].shape[0], num_rounds)

    DO_trainer = PSRO_trainer(meta_games=meta_games,
                              num_strategies=generator.num_strategies,
                              num_rounds=num_rounds,
                              meta_method=double_oracle,
                              checkpoint_dir=checkpoint_dir,
                              num_iterations=num_iterations,
                              seed=seed,
                              init_strategies=init_strategies)

    FP_trainer = PSRO_trainer(meta_games=meta_games,
                              num_strategies=generator.num_strategies,
                              num_rounds=num_rounds,
                              meta_method=fictitious_play,
                              checkpoint_dir=checkpoint_dir,
                              num_iterations=num_iterations,
                              seed=seed,
                              init_strategies=init_strategies)

    PRD_trainer = PSRO_trainer(meta_games=meta_games,
                               num_strategies=generator.num_strategies,
                               num_rounds=num_rounds,
                               meta_method=prd_solver,
                               checkpoint_dir=checkpoint_dir,
                               num_iterations=num_iterations,
                               seed=seed,
                               init_strategies=init_strategies)

    CRD_trainer = PSRO_trainer(meta_games=meta_games,
                               num_strategies=generator.num_strategies,
                               num_rounds=num_rounds,
                               meta_method=regret_controled_RD,
                               checkpoint_dir=checkpoint_dir,
                               num_iterations=num_iterations,
                               seed=seed,
                               init_strategies=init_strategies)

    # IDO_trainer = PSRO_trainer(meta_games=meta_games,
    #                            num_strategies=generator.num_strategies,
    #                            num_rounds=num_rounds,
    #                            meta_method=iterative_double_oracle,
    #                            checkpoint_dir=checkpoint_dir,
    #                            num_iterations=num_iterations,
    #                            seed=seed,
    #                            init_strategies=init_strategies)
    #
    # IPRD_trainer = PSRO_trainer(meta_games=meta_games,
    #                             num_strategies=generator.num_strategies,
    #                             num_rounds=num_rounds,
    #                             meta_method=iterated_prd,
    #                             checkpoint_dir=checkpoint_dir,
    #                             num_iterations=num_iterations,
    #                             seed=seed,
    #                             init_strategies=init_strategies)

    # IDOS_trainer = PSRO_trainer(meta_games=meta_games,
    #                             num_strategies=generator.num_strategies,
    #                             num_rounds=num_rounds,
    #                             meta_method=iterative_double_oracle_player_selection,
    #                             checkpoint_dir=checkpoint_dir,
    #                             num_iterations=num_iterations,
    #                             seed=seed,
    #                             init_strategies=init_strategies)

    MRCP_trainer = PSRO_trainer(meta_games=meta_games,
                                num_strategies=generator.num_strategies,
                                num_rounds=num_rounds,
                                meta_method=mrcp_solver,
                                checkpoint_dir=checkpoint_dir,
                                num_iterations=num_iterations,
                                seed=seed,
                                init_strategies=init_strategies,
                                closed_method=closed_method)

    DO_trainer.loop()
    print("#####################################")
    print('DO looper finished looping')
    print("#####################################")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    with open(checkpoint_dir + game_type + '_meta_games.pkl', 'wb') as f:
        pickle.dump(meta_games, f)
    nashconv_names = [
        'nashconvs_' + str(t) for t in range(len(DO_trainer.neconvs))
    ]
    mrconv_names = [
        'mrcpcons_' + str(t) for t in range(len(DO_trainer.mrconvs))
    ]
    df = pd.DataFrame(np.transpose(DO_trainer.neconvs+DO_trainer.mrconvs),\
            columns=nashconv_names+mrconv_names)
    df.to_csv(checkpoint_dir + game_type + '_DO.csv', index=False)
    with open(checkpoint_dir + game_type + '_mrprofile_DO.pkl', 'wb') as f:
        pickle.dump(DO_trainer.mrprofiles, f)

    FP_trainer.loop()
    print("#####################################")
    print('FP looper finished looping')
    print("#####################################")
    df = pd.DataFrame(np.transpose(FP_trainer.neconvs+FP_trainer.mrconvs),\
            columns=nashconv_names+mrconv_names)
    df.to_csv(checkpoint_dir + game_type + '_FP.csv', index=False)
    with open(checkpoint_dir + game_type + '_mrprofile_FP.pkl', 'wb') as f:
        pickle.dump(FP_trainer.mrprofiles, f)

    # PRD_trainer.loop()
    # print("#####################################")
    # print('PRD looper finished looping')
    # print("#####################################")
    # df = pd.DataFrame(np.transpose(PRD_trainer.neconvs + PRD_trainer.mrconvs), \
    #                     columns=nashconv_names + mrconv_names)
    # df.to_csv(checkpoint_dir + game_type + '_PRD.csv', index=False)
    # with open(checkpoint_dir + game_type + '_mrprofile_PRD.pkl', 'wb') as f:
    #     pickle.dump(PRD_trainer.mrprofiles, f)

    CRD_trainer.loop()
    print("#####################################")
    print('CRD looper finished looping')
    print("#####################################")
    df = pd.DataFrame(np.transpose(CRD_trainer.neconvs + CRD_trainer.mrconvs), \
                      columns=nashconv_names + mrconv_names)
    df.to_csv(checkpoint_dir + game_type + '_CRD.csv', index=False)
    with open(checkpoint_dir + game_type + '_mrprofile_CRD.pkl', 'wb') as f:
        pickle.dump(CRD_trainer.mrprofiles, f)

    # IDO_trainer.loop()
    # print("#####################################")
    # print('IDO looper finished looping')
    # print("#####################################")
    # df = pd.DataFrame(np.transpose(IDO_trainer.neconvs + IDO_trainer.mrconvs), \
    #                     columns=nashconv_names + mrconv_names)
    # df.to_csv(checkpoint_dir + game_type + '_IDO.csv', index=False)
    # with open(checkpoint_dir + game_type + '_mrprofile_IDO.pkl', 'wb') as f:
    #     pickle.dump(IDO_trainer.mrprofiles, f)
    #
    # IPRD_trainer.loop()
    # print("#####################################")
    # print('IPRD looper finished looping')
    # print("#####################################")
    # df = pd.DataFrame(np.transpose(IPRD_trainer.neconvs + IPRD_trainer.mrconvs), \
    #                      columns=nashconv_names + mrconv_names)
    # df.to_csv(checkpoint_dir + game_type + '_IPRD.csv', index=False)
    # with open(checkpoint_dir + game_type + '_mrprofile_IPRD.pkl', 'wb') as f:
    #     pickle.dump(IPRD_trainer.mrprofiles, f)

    # IDOS_trainer.loop()
    # print("#####################################")
    # print('IDOS looper finished looping')
    # print("#####################################")
    # df = pd.DataFrame(np.transpose(IDOS_trainer.neconvs + IDOS_trainer.mrconvs), \
    #                     columns=nashconv_names + mrconv_names)
    # df.to_csv(checkpoint_dir + game_type + '_IDOS.csv', index=False)
    # with open(checkpoint_dir + game_type + '_mrprofile_IDOS.pkl', 'wb') as f:
    #     pickle.dump(IDOS_trainer.mrprofiles, f)

    # MRCP_trainer.loop()
    # print("#####################################")
    # print('MRCP looper finished looping')
    # print("#####################################")
    # df = pd.DataFrame(np.transpose(MRCP_trainer.neconvs+MRCP_trainer.mrconvs),\
    #         columns=nashconv_names+mrconv_names)
    # df.to_csv(checkpoint_dir+game_type+'_MRCP.csv',index=False)
    # with open(checkpoint_dir + game_type + '_mrprofile_MRCP.pkl','wb') as f:
    #     pickle.dump(DO_trainer.mrprofiles, f)

    print("The current game type is ", game_type)
    print("DO neco av:", np.mean(DO_trainer.neconvs, axis=0))
    print("DO mrcp av:", np.mean(DO_trainer.mrconvs, axis=0))
    print("FP fpco av:", np.mean(FP_trainer.nashconvs, axis=0))
    print("FP neco av:", np.mean(FP_trainer.neconvs, axis=0))
    print("FP mrcp av:", np.mean(FP_trainer.mrconvs, axis=0))
    # print("PRD prdco av:", np.mean(PRD_trainer.nashconvs, axis=0))
    # print("PRD neco av:", np.mean(PRD_trainer.neconvs, axis=0))
    # print("PRD mrcp av:", np.mean(PRD_trainer.mrconvs, axis=0))
    print("CRD CRDco av:", np.mean(CRD_trainer.nashconvs, axis=0))
    print("CRD neco av:", np.mean(CRD_trainer.neconvs, axis=0))
    print("CRD mrcp av:", np.mean(CRD_trainer.mrconvs, axis=0))
    # print("IDO IDOco av:", np.mean(IDO_trainer.nashconvs, axis=0))
    # print("IDO neco av:", np.mean(IDO_trainer.neconvs, axis=0))
    # print("IDO mrcp av:", np.mean(IDO_trainer.mrconvs, axis=0))
    # print("IPRD IDOco av:", np.mean(IDO_trainer.nashconvs, axis=0))
    # print("IDO neco av:", np.mean(IDO_trainer.neconvs, axis=0))
    # print("IDO mrcp av:", np.mean(IDO_trainer.mrconvs, axis=0))
    # print("IDOS IDOSco av:", np.mean(IDOS_trainer.nashconvs, axis=0))
    # print("IDOS neco av:", np.mean(IDOS_trainer.neconvs, axis=0))
    # print("IDOS mrcp av:", np.mean(IDOS_trainer.mrconvs, axis=0))
    # print("MR neco av:", np.mean(MRCP_trainer.neconvs, axis=0))
    # print("MR mrcp av:", np.mean(MRCP_trainer.mrconvs, axis=0))

    print("====================================================")
import numpy as np
from meta_strategies import prd_solver, iterated_quantal_response_solver
from nash_solver.gambit_tools import load_pkl
from nash_solver.replicator_dynamics_solver import replicator_dynamics

meta_games = load_pkl("./data/meta_game.pkl")
num_strs = np.shape(meta_games[0])[0]
print("Num of strs:", num_strs)

sub_games = []
i = 30
for player in range(3):
    sub_games.append(meta_games[player][:i, :i, :i])

dev_strs, nashconv = prd_solver(
    sub_games, [list(range(i)), list(range(i)),
                list(range(i))])
# dev_strs, nashconv = iterated_quantal_response_solver(meta_games, [list(range(num_strs)), list(range(num_strs))])

print(nashconv)
Exemple #9
0
def empirical_game_generator(generator,
                             game_type,
                             meta_method,
                             empirical_game_size,
                             seed=None,
                             checkpoint_dir=None):
    """
    Generate an empirical game which is a subgame of the full matrix game.
    :param generator:  a Game generator
    :param game_type: type of the game, options: "symmetric_zero_sum", "zero_sum", "general_sum"
    :param meta_method: Method for generating the empirical game.
    :param empirical_game_size:
    :param seed: random seed
    :param checkpoint_dir:
    :return:
    """
    # Generate the underlying true game.
    if game_type == "zero_sum":
        meta_games = generator.zero_sum_game()
    elif game_type == "general_sum":
        meta_games = generator.general_sum_game()
    elif game_type == "symmetric_zero_sum":
        meta_games = generator.symmetric_zero_sum_game()
    elif game_type == "kuhn":
        kuhn_meta_games = load_pkl("./kuhn_meta_game.pkl")
        meta_games = kuhn_meta_games[
            0]  # The first element of kuhn_meta_game.pkl is meta_games.
        empirical_game_size = 52
    else:
        raise ValueError("Undefined game type.")

    # Assume players have the same number of strategies.
    num_strategies = np.shape(meta_games[0])[0]

    # A list that records which iteration the empirical game is recorded.
    if empirical_game_size > num_strategies:
        raise ValueError("The size of EG is large than the full game.")

    empricial_game_record = list(range(10, 101, 10))

    if empirical_game_size < max(empricial_game_record):
        raise ValueError(
            "The number of sampled EG is large than generated EG.")

    # Create a meta-trainer.
    if meta_method == "DO":
        trainer = PSRO_trainer(meta_games=meta_games,
                               num_strategies=num_strategies,
                               num_rounds=1,
                               meta_method=double_oracle,
                               checkpoint_dir=checkpoint_dir,
                               num_iterations=empirical_game_size,
                               empricial_game_record=empricial_game_record,
                               seed=seed,
                               init_strategies=None,
                               calculate_neconv=False,
                               calculate_mrcpconv=False)
    elif meta_method == "FP":
        trainer = PSRO_trainer(meta_games=meta_games,
                               num_strategies=num_strategies,
                               num_rounds=1,
                               meta_method=fictitious_play,
                               checkpoint_dir=checkpoint_dir,
                               num_iterations=empirical_game_size,
                               empricial_game_record=empricial_game_record,
                               seed=seed,
                               init_strategies=None,
                               calculate_neconv=False,
                               calculate_mrcpconv=False)
    elif meta_method == "MRCP":
        trainer = PSRO_trainer(meta_games=meta_games,
                               num_strategies=num_strategies,
                               num_rounds=1,
                               meta_method=mrcp_solver,
                               checkpoint_dir=checkpoint_dir,
                               num_iterations=empirical_game_size,
                               empricial_game_record=empricial_game_record,
                               seed=seed,
                               init_strategies=None,
                               calculate_neconv=False,
                               calculate_mrcpconv=False)
    else:
        raise ValueError("Undefined meta-method.")

    # Don't use trainer.iteration() since the empirical game won't be initialized.
    trainer.loop()

    return meta_games, trainer.get_recorded_empirical_game()
# Load payoffs
# with open("./spinning_top_payoffs.pkl", "rb") as fh:
#   payoffs = pickle.load(fh)

# real_world_meta_games = copy.copy(payoffs)
# # Iterate over games
# print("======================================================")
# for game_name in payoffs:
#     real_world_meta_games[game_name] = [payoffs[game_name], -payoffs[game_name]]
#
#   print(f"Game name: {game_name}")
#   print(f"Number of strategies: {payoffs[game_name].shape[0]}")
#   print(f"Shape of the payoff matrix: {payoffs[game_name].shape}")
#   print("======================================================")
#   print()

# # Sort strategies by mean winrate for nice presentation
# order = np.argsort(-payoffs[game_name].mean(1))
#
# # Plot the payoff
# plt.figure()
# plt.title(game_name)
# plt.imshow(payoffs[game_name][order, :][:, order])
# plt.axis('off')
# plt.show()
# plt.close()

# print([payoffs["RPS"], -payoffs["RPS"]])
# save_pkl(obj=real_world_meta_games, path="./real_world_meta_games.pkl")
meta_games = load_pkl("./real_world_meta_games.pkl")
print(list(meta_games.keys()))
def MRCP_regret_comparison(generator,
                           game_type,
                           discount,
                           empirical_game_size=40,
                           checkpoint_dir=None):
    """
    Compare the MRCP regret given by Ameoba method to the regret given by upper-bounded approach.
    The regret of the NE is listed as a benchmark.
    :param generator: a Game generator
    :param game_type: type of the game, options: "symmetric_zero_sum", "zero_sum", "general_sum"
    :param empirical_game_size:
    :return:
    """
    if game_type == "zero_sum":
        meta_games = generator.zero_sum_game()
    elif game_type == "general_sum":
        meta_games = generator.general_sum_game()
    elif game_type == "symmetric_zero_sum":
        meta_games = generator.general_sum_game()
    elif game_type == "kuhn":
        kuhn_meta_games = load_pkl("./kuhn_meta_game.pkl")
        meta_games = kuhn_meta_games[0]
    else:
        raise ValueError("Undefined game type.")

    num_total_strategies = np.shape(meta_games[0])[0]
    num_player = len(meta_games)
    empirical_game = []
    # Generate a random empirical game within the true game.
    for player in range(num_player):
        empirical_game.append(
            sorted(
                list(
                    np.random.choice(range(0, num_total_strategies),
                                     empirical_game_size,
                                     replace=False))))

    # Fix same starting point
    sections = [len(ele) for ele in empirical_game]
    init_var = np.random.rand(sum(sections))

    # Create different MRCP calculator with/without upper-bounded approximation.
    exact_calculator = minimum_regret_profile_calculator(full_game=meta_games,
                                                         var=init_var.copy())
    appro_calculator = minimum_regret_profile_calculator(full_game=meta_games,
                                                         approximation=True,
                                                         var=init_var.copy(),
                                                         discount=discount)

    # Calculate the MRCP and the regret of MRCP with different methods.
    time0 = time.time()
    print("Begin calculating the exact MRCP.")
    mrcp_profile, mrcp_value = exact_calculator(empirical_game=empirical_game)
    print("Finish calculating the exact MRCP.")
    time1 = time.time()
    print("Begin calculating the approximate MRCP.")
    appro_mrcp_profile, appro_mrcp_value = appro_calculator(
        empirical_game=empirical_game)
    print("Finish calculating the approximate MRCP.")
    time2 = time.time()

    # Calculate the NE of the empirical game for comparison.
    _, nashconv = double_oracle(meta_games=meta_games,
                                empirical_games=empirical_game,
                                checkpoint_dir=checkpoint_dir)

    ########## Evaluation ###########
    l2_norm = 0
    for player in range(num_player):
        l2_norm += np.linalg.norm(mrcp_profile[player] -
                                  appro_mrcp_profile[player])

    print("The L2 distance is:", l2_norm)
    print("The regret of MRCP:", mrcp_value)
    print("The regret of approximate MRCP:", appro_mrcp_value)
    print("The regret of NE:", nashconv)
    print("Time without approxiamtion:", time1 - time0)
    print("Time with approximation:", time2 - time1)
    print("MRCP:", profile_filter(mrcp_profile))
    print("Approximate MRCP:", profile_filter(appro_mrcp_profile))

    return l2_norm, mrcp_value, appro_mrcp_value, nashconv