def main(argv): seed = np.random.randint(low=0, high=1e5) root_path = './' + "real_world" + "_" + FLAGS.meta_method + '_regret/' if not os.path.exists(root_path): os.makedirs(root_path) real_world_meta_games = load_pkl( '../real_world_games/real_world_meta_games.pkl') # game_types = ['10,4-Blotto', 'AlphaStar', 'Kuhn-poker', 'Random game of skill', 'Transitive game', # 'connect_four', 'quoridor(board_size=4)', 'misere(game=tic_tac_toe())', 'hex(board_size=3)', # 'go(board_size=4,komi=6.5)'] # for game_type in game_types: checkpoint_dir = os.path.join( os.getcwd(), root_path) + FLAGS.game_type + '_' + str(seed) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) sys.stdout = open(checkpoint_dir + '/stdout.txt', 'w+') print("================================================") print("======The current game is ", FLAGS.game_type, "=========") print("================================================") console(meta_games=real_world_meta_games[FLAGS.game_type], meta_method=FLAGS.meta_method, empirical_game_size=FLAGS.num_emp_strategies, num_samples=FLAGS.num_samples, checkpoint_dir=checkpoint_dir)
def main(argv): if len(argv) > 1: raise app.UsageError("Too many command-line arguments.") seed = set_random_seed(FLAGS.seed) if not FLAGS.MRCP_deterministic: seed = None # invalidate the seed so it does not get passed into psro_trainer # root_path = './' + "real_world" + "_supplement_" + FLAGS.closed_method + '/' root_path = './' + "real_world" + "_full_" + FLAGS.closed_method + '/' if not os.path.exists(root_path): os.makedirs(root_path) real_world_meta_games = load_pkl( './real_world_games/real_world_meta_games.pkl') # game_types = ['10,4-Blotto', 'AlphaStar', 'Kuhn-poker', 'Random game of skill', 'Transitive game', # 'connect_four', 'quoridor(board_size=4)', 'misere(game=tic_tac_toe())', 'hex(board_size=3)', # 'go(board_size=4,komi=6.5)'] checkpoint_dir = FLAGS.game_type + "_" + str(seed) checkpoint_dir = os.path.join(os.getcwd(), root_path, checkpoint_dir) + '/' if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) sys.stdout = open(checkpoint_dir + '/stdout.txt', 'w+') print("================================================") print("======The current game is ", FLAGS.game_type, "=========") print("================================================") if FLAGS.num_iterations > real_world_meta_games[ FLAGS.game_type][0].shape[0]: num_iterations = real_world_meta_games[FLAGS.game_type][0].shape[0] else: num_iterations = FLAGS.num_iterations psro(meta_games=real_world_meta_games[FLAGS.game_type], game_type=FLAGS.game_type, num_rounds=10, seed=seed, checkpoint_dir=checkpoint_dir, num_iterations=num_iterations, closed_method=FLAGS.closed_method)
import matplotlib.pyplot as plt import matplotlib as mpl import math idx = 3 round = '10' game_type = "szs" MSS = "FP" # root_path = './data/' + game_type + '_' + MSS + '/' + game_type + '_' + MSS + str(idx) + '/' root_path = './data/10,4Blotto/' regret_path = root_path + '10,4-Blotto_90335regret_of_samples_' + round + '.pkl' improvement_path = root_path + '10,4-Blotto_90335performance_improvement_' + round + '.pkl' regret = load_pkl(regret_path) improvement = load_pkl(improvement_path) NE_regret = 0.13 NE_improvement = 0.0048 MRCP_regret = 0.055 MRCP_improvement = 0.00013 plt.scatter(regret, improvement) plt.plot(NE_regret, NE_improvement, '-ro') # plt.plot(MRCP_regret, MRCP_improvement, '-go') plt.xticks(size = 17) plt.yticks(size = 17)
from nash_solver.gambit_tools import load_pkl import numpy as np root_path = './data/' meta_games = load_pkl(root_path + "meta_games.pkl")
import os from nash_solver.gambit_tools import load_pkl import matplotlib matplotlib.use("TkAgg") import matplotlib.pyplot as plt import matplotlib as mpl import numpy as np from scipy.signal import savgol_filter import math load_path = os.getcwd() + '/data/data1/' zero_sum_DO = load_pkl(load_path + 'zero_sum_DO.pkl') zero_sum_FP = load_pkl(load_path + 'zero_sum_FP.pkl') zero_sum_DO_FP = load_pkl(load_path + 'zero_sum_DO_SP.pkl') zero_sum_DO = np.mean(zero_sum_DO, axis=0) zero_sum_FP = np.mean(zero_sum_FP, axis=0) zero_sum_DO_FP = np.mean(zero_sum_DO_FP, axis=0) # idx = 6 # zero_sum_DO = zero_sum_DO[idx] # zero_sum_FP = zero_sum_FP[idx] # zero_sum_DO_FP = zero_sum_DO_FP[idx] window_size = 15 order = 2 # Focus on fictitious play fic_zero_sum_DO_FP = [] for i in range(len(zero_sum_DO_FP)): if i % 2 == 0:
def real_world_game(self, game_name): meta_games = load_pkl("./real_world_games/real_world_meta_games.pkl") return meta_games[game_name]
def psro(generator, game_type, num_rounds, seed, checkpoint_dir, num_iterations=20, closed_method="alter"): if game_type == "zero_sum": meta_games = generator.zero_sum_game() elif game_type == "general_sum": meta_games = generator.general_sum_game() elif game_type == "symmetric_zero_sum": meta_games = generator.symmetric_zero_sum_game() elif game_type == "kuhn": kuhn_meta_games = load_pkl("./MRCP/kuhn_meta_game.pkl") meta_games = kuhn_meta_games[ 0] # The first element of kuhn_meta_game.pkl is meta_games. generator.num_strategies = 64 else: for pkl in os.listdir('efg_game'): print(pkl) if pkl.split('.pkl')[0] == game_type: with open('efg_game/' + pkl, 'rb') as f: meta_games = pickle.load(f) if not 'meta_games' in locals(): raise ValueError # for example 1 in paper # meta_games = [np.array([[0,-0.1,-3],[0.1,0,2],[3,-2,0]]),np.array([[0,0.1,3],[-0.1,0,-2],[-3,2,0]])] # generator.num_strategies = 3 # num_rounds = 1 # num_iterations = 10 init_strategies = np.random.randint(0, meta_games[0].shape[0], num_rounds) DO_trainer = PSRO_trainer(meta_games=meta_games, num_strategies=generator.num_strategies, num_rounds=num_rounds, meta_method=double_oracle, checkpoint_dir=checkpoint_dir, num_iterations=num_iterations, seed=seed, init_strategies=init_strategies) FP_trainer = PSRO_trainer(meta_games=meta_games, num_strategies=generator.num_strategies, num_rounds=num_rounds, meta_method=fictitious_play, checkpoint_dir=checkpoint_dir, num_iterations=num_iterations, seed=seed, init_strategies=init_strategies) PRD_trainer = PSRO_trainer(meta_games=meta_games, num_strategies=generator.num_strategies, num_rounds=num_rounds, meta_method=prd_solver, checkpoint_dir=checkpoint_dir, num_iterations=num_iterations, seed=seed, init_strategies=init_strategies) CRD_trainer = PSRO_trainer(meta_games=meta_games, num_strategies=generator.num_strategies, num_rounds=num_rounds, meta_method=regret_controled_RD, checkpoint_dir=checkpoint_dir, num_iterations=num_iterations, seed=seed, init_strategies=init_strategies) # IDO_trainer = PSRO_trainer(meta_games=meta_games, # num_strategies=generator.num_strategies, # num_rounds=num_rounds, # meta_method=iterative_double_oracle, # checkpoint_dir=checkpoint_dir, # num_iterations=num_iterations, # seed=seed, # init_strategies=init_strategies) # # IPRD_trainer = PSRO_trainer(meta_games=meta_games, # num_strategies=generator.num_strategies, # num_rounds=num_rounds, # meta_method=iterated_prd, # checkpoint_dir=checkpoint_dir, # num_iterations=num_iterations, # seed=seed, # init_strategies=init_strategies) # IDOS_trainer = PSRO_trainer(meta_games=meta_games, # num_strategies=generator.num_strategies, # num_rounds=num_rounds, # meta_method=iterative_double_oracle_player_selection, # checkpoint_dir=checkpoint_dir, # num_iterations=num_iterations, # seed=seed, # init_strategies=init_strategies) MRCP_trainer = PSRO_trainer(meta_games=meta_games, num_strategies=generator.num_strategies, num_rounds=num_rounds, meta_method=mrcp_solver, checkpoint_dir=checkpoint_dir, num_iterations=num_iterations, seed=seed, init_strategies=init_strategies, closed_method=closed_method) DO_trainer.loop() print("#####################################") print('DO looper finished looping') print("#####################################") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) with open(checkpoint_dir + game_type + '_meta_games.pkl', 'wb') as f: pickle.dump(meta_games, f) nashconv_names = [ 'nashconvs_' + str(t) for t in range(len(DO_trainer.neconvs)) ] mrconv_names = [ 'mrcpcons_' + str(t) for t in range(len(DO_trainer.mrconvs)) ] df = pd.DataFrame(np.transpose(DO_trainer.neconvs+DO_trainer.mrconvs),\ columns=nashconv_names+mrconv_names) df.to_csv(checkpoint_dir + game_type + '_DO.csv', index=False) with open(checkpoint_dir + game_type + '_mrprofile_DO.pkl', 'wb') as f: pickle.dump(DO_trainer.mrprofiles, f) FP_trainer.loop() print("#####################################") print('FP looper finished looping') print("#####################################") df = pd.DataFrame(np.transpose(FP_trainer.neconvs+FP_trainer.mrconvs),\ columns=nashconv_names+mrconv_names) df.to_csv(checkpoint_dir + game_type + '_FP.csv', index=False) with open(checkpoint_dir + game_type + '_mrprofile_FP.pkl', 'wb') as f: pickle.dump(FP_trainer.mrprofiles, f) # PRD_trainer.loop() # print("#####################################") # print('PRD looper finished looping') # print("#####################################") # df = pd.DataFrame(np.transpose(PRD_trainer.neconvs + PRD_trainer.mrconvs), \ # columns=nashconv_names + mrconv_names) # df.to_csv(checkpoint_dir + game_type + '_PRD.csv', index=False) # with open(checkpoint_dir + game_type + '_mrprofile_PRD.pkl', 'wb') as f: # pickle.dump(PRD_trainer.mrprofiles, f) CRD_trainer.loop() print("#####################################") print('CRD looper finished looping') print("#####################################") df = pd.DataFrame(np.transpose(CRD_trainer.neconvs + CRD_trainer.mrconvs), \ columns=nashconv_names + mrconv_names) df.to_csv(checkpoint_dir + game_type + '_CRD.csv', index=False) with open(checkpoint_dir + game_type + '_mrprofile_CRD.pkl', 'wb') as f: pickle.dump(CRD_trainer.mrprofiles, f) # IDO_trainer.loop() # print("#####################################") # print('IDO looper finished looping') # print("#####################################") # df = pd.DataFrame(np.transpose(IDO_trainer.neconvs + IDO_trainer.mrconvs), \ # columns=nashconv_names + mrconv_names) # df.to_csv(checkpoint_dir + game_type + '_IDO.csv', index=False) # with open(checkpoint_dir + game_type + '_mrprofile_IDO.pkl', 'wb') as f: # pickle.dump(IDO_trainer.mrprofiles, f) # # IPRD_trainer.loop() # print("#####################################") # print('IPRD looper finished looping') # print("#####################################") # df = pd.DataFrame(np.transpose(IPRD_trainer.neconvs + IPRD_trainer.mrconvs), \ # columns=nashconv_names + mrconv_names) # df.to_csv(checkpoint_dir + game_type + '_IPRD.csv', index=False) # with open(checkpoint_dir + game_type + '_mrprofile_IPRD.pkl', 'wb') as f: # pickle.dump(IPRD_trainer.mrprofiles, f) # IDOS_trainer.loop() # print("#####################################") # print('IDOS looper finished looping') # print("#####################################") # df = pd.DataFrame(np.transpose(IDOS_trainer.neconvs + IDOS_trainer.mrconvs), \ # columns=nashconv_names + mrconv_names) # df.to_csv(checkpoint_dir + game_type + '_IDOS.csv', index=False) # with open(checkpoint_dir + game_type + '_mrprofile_IDOS.pkl', 'wb') as f: # pickle.dump(IDOS_trainer.mrprofiles, f) # MRCP_trainer.loop() # print("#####################################") # print('MRCP looper finished looping') # print("#####################################") # df = pd.DataFrame(np.transpose(MRCP_trainer.neconvs+MRCP_trainer.mrconvs),\ # columns=nashconv_names+mrconv_names) # df.to_csv(checkpoint_dir+game_type+'_MRCP.csv',index=False) # with open(checkpoint_dir + game_type + '_mrprofile_MRCP.pkl','wb') as f: # pickle.dump(DO_trainer.mrprofiles, f) print("The current game type is ", game_type) print("DO neco av:", np.mean(DO_trainer.neconvs, axis=0)) print("DO mrcp av:", np.mean(DO_trainer.mrconvs, axis=0)) print("FP fpco av:", np.mean(FP_trainer.nashconvs, axis=0)) print("FP neco av:", np.mean(FP_trainer.neconvs, axis=0)) print("FP mrcp av:", np.mean(FP_trainer.mrconvs, axis=0)) # print("PRD prdco av:", np.mean(PRD_trainer.nashconvs, axis=0)) # print("PRD neco av:", np.mean(PRD_trainer.neconvs, axis=0)) # print("PRD mrcp av:", np.mean(PRD_trainer.mrconvs, axis=0)) print("CRD CRDco av:", np.mean(CRD_trainer.nashconvs, axis=0)) print("CRD neco av:", np.mean(CRD_trainer.neconvs, axis=0)) print("CRD mrcp av:", np.mean(CRD_trainer.mrconvs, axis=0)) # print("IDO IDOco av:", np.mean(IDO_trainer.nashconvs, axis=0)) # print("IDO neco av:", np.mean(IDO_trainer.neconvs, axis=0)) # print("IDO mrcp av:", np.mean(IDO_trainer.mrconvs, axis=0)) # print("IPRD IDOco av:", np.mean(IDO_trainer.nashconvs, axis=0)) # print("IDO neco av:", np.mean(IDO_trainer.neconvs, axis=0)) # print("IDO mrcp av:", np.mean(IDO_trainer.mrconvs, axis=0)) # print("IDOS IDOSco av:", np.mean(IDOS_trainer.nashconvs, axis=0)) # print("IDOS neco av:", np.mean(IDOS_trainer.neconvs, axis=0)) # print("IDOS mrcp av:", np.mean(IDOS_trainer.mrconvs, axis=0)) # print("MR neco av:", np.mean(MRCP_trainer.neconvs, axis=0)) # print("MR mrcp av:", np.mean(MRCP_trainer.mrconvs, axis=0)) print("====================================================")
import numpy as np from meta_strategies import prd_solver, iterated_quantal_response_solver from nash_solver.gambit_tools import load_pkl from nash_solver.replicator_dynamics_solver import replicator_dynamics meta_games = load_pkl("./data/meta_game.pkl") num_strs = np.shape(meta_games[0])[0] print("Num of strs:", num_strs) sub_games = [] i = 30 for player in range(3): sub_games.append(meta_games[player][:i, :i, :i]) dev_strs, nashconv = prd_solver( sub_games, [list(range(i)), list(range(i)), list(range(i))]) # dev_strs, nashconv = iterated_quantal_response_solver(meta_games, [list(range(num_strs)), list(range(num_strs))]) print(nashconv)
def empirical_game_generator(generator, game_type, meta_method, empirical_game_size, seed=None, checkpoint_dir=None): """ Generate an empirical game which is a subgame of the full matrix game. :param generator: a Game generator :param game_type: type of the game, options: "symmetric_zero_sum", "zero_sum", "general_sum" :param meta_method: Method for generating the empirical game. :param empirical_game_size: :param seed: random seed :param checkpoint_dir: :return: """ # Generate the underlying true game. if game_type == "zero_sum": meta_games = generator.zero_sum_game() elif game_type == "general_sum": meta_games = generator.general_sum_game() elif game_type == "symmetric_zero_sum": meta_games = generator.symmetric_zero_sum_game() elif game_type == "kuhn": kuhn_meta_games = load_pkl("./kuhn_meta_game.pkl") meta_games = kuhn_meta_games[ 0] # The first element of kuhn_meta_game.pkl is meta_games. empirical_game_size = 52 else: raise ValueError("Undefined game type.") # Assume players have the same number of strategies. num_strategies = np.shape(meta_games[0])[0] # A list that records which iteration the empirical game is recorded. if empirical_game_size > num_strategies: raise ValueError("The size of EG is large than the full game.") empricial_game_record = list(range(10, 101, 10)) if empirical_game_size < max(empricial_game_record): raise ValueError( "The number of sampled EG is large than generated EG.") # Create a meta-trainer. if meta_method == "DO": trainer = PSRO_trainer(meta_games=meta_games, num_strategies=num_strategies, num_rounds=1, meta_method=double_oracle, checkpoint_dir=checkpoint_dir, num_iterations=empirical_game_size, empricial_game_record=empricial_game_record, seed=seed, init_strategies=None, calculate_neconv=False, calculate_mrcpconv=False) elif meta_method == "FP": trainer = PSRO_trainer(meta_games=meta_games, num_strategies=num_strategies, num_rounds=1, meta_method=fictitious_play, checkpoint_dir=checkpoint_dir, num_iterations=empirical_game_size, empricial_game_record=empricial_game_record, seed=seed, init_strategies=None, calculate_neconv=False, calculate_mrcpconv=False) elif meta_method == "MRCP": trainer = PSRO_trainer(meta_games=meta_games, num_strategies=num_strategies, num_rounds=1, meta_method=mrcp_solver, checkpoint_dir=checkpoint_dir, num_iterations=empirical_game_size, empricial_game_record=empricial_game_record, seed=seed, init_strategies=None, calculate_neconv=False, calculate_mrcpconv=False) else: raise ValueError("Undefined meta-method.") # Don't use trainer.iteration() since the empirical game won't be initialized. trainer.loop() return meta_games, trainer.get_recorded_empirical_game()
# Load payoffs # with open("./spinning_top_payoffs.pkl", "rb") as fh: # payoffs = pickle.load(fh) # real_world_meta_games = copy.copy(payoffs) # # Iterate over games # print("======================================================") # for game_name in payoffs: # real_world_meta_games[game_name] = [payoffs[game_name], -payoffs[game_name]] # # print(f"Game name: {game_name}") # print(f"Number of strategies: {payoffs[game_name].shape[0]}") # print(f"Shape of the payoff matrix: {payoffs[game_name].shape}") # print("======================================================") # print() # # Sort strategies by mean winrate for nice presentation # order = np.argsort(-payoffs[game_name].mean(1)) # # # Plot the payoff # plt.figure() # plt.title(game_name) # plt.imshow(payoffs[game_name][order, :][:, order]) # plt.axis('off') # plt.show() # plt.close() # print([payoffs["RPS"], -payoffs["RPS"]]) # save_pkl(obj=real_world_meta_games, path="./real_world_meta_games.pkl") meta_games = load_pkl("./real_world_meta_games.pkl") print(list(meta_games.keys()))
def MRCP_regret_comparison(generator, game_type, discount, empirical_game_size=40, checkpoint_dir=None): """ Compare the MRCP regret given by Ameoba method to the regret given by upper-bounded approach. The regret of the NE is listed as a benchmark. :param generator: a Game generator :param game_type: type of the game, options: "symmetric_zero_sum", "zero_sum", "general_sum" :param empirical_game_size: :return: """ if game_type == "zero_sum": meta_games = generator.zero_sum_game() elif game_type == "general_sum": meta_games = generator.general_sum_game() elif game_type == "symmetric_zero_sum": meta_games = generator.general_sum_game() elif game_type == "kuhn": kuhn_meta_games = load_pkl("./kuhn_meta_game.pkl") meta_games = kuhn_meta_games[0] else: raise ValueError("Undefined game type.") num_total_strategies = np.shape(meta_games[0])[0] num_player = len(meta_games) empirical_game = [] # Generate a random empirical game within the true game. for player in range(num_player): empirical_game.append( sorted( list( np.random.choice(range(0, num_total_strategies), empirical_game_size, replace=False)))) # Fix same starting point sections = [len(ele) for ele in empirical_game] init_var = np.random.rand(sum(sections)) # Create different MRCP calculator with/without upper-bounded approximation. exact_calculator = minimum_regret_profile_calculator(full_game=meta_games, var=init_var.copy()) appro_calculator = minimum_regret_profile_calculator(full_game=meta_games, approximation=True, var=init_var.copy(), discount=discount) # Calculate the MRCP and the regret of MRCP with different methods. time0 = time.time() print("Begin calculating the exact MRCP.") mrcp_profile, mrcp_value = exact_calculator(empirical_game=empirical_game) print("Finish calculating the exact MRCP.") time1 = time.time() print("Begin calculating the approximate MRCP.") appro_mrcp_profile, appro_mrcp_value = appro_calculator( empirical_game=empirical_game) print("Finish calculating the approximate MRCP.") time2 = time.time() # Calculate the NE of the empirical game for comparison. _, nashconv = double_oracle(meta_games=meta_games, empirical_games=empirical_game, checkpoint_dir=checkpoint_dir) ########## Evaluation ########### l2_norm = 0 for player in range(num_player): l2_norm += np.linalg.norm(mrcp_profile[player] - appro_mrcp_profile[player]) print("The L2 distance is:", l2_norm) print("The regret of MRCP:", mrcp_value) print("The regret of approximate MRCP:", appro_mrcp_value) print("The regret of NE:", nashconv) print("Time without approxiamtion:", time1 - time0) print("Time with approximation:", time2 - time1) print("MRCP:", profile_filter(mrcp_profile)) print("Approximate MRCP:", profile_filter(appro_mrcp_profile)) return l2_norm, mrcp_value, appro_mrcp_value, nashconv