def env_rand_gen_and_save(env_name, num_attr_N=11, num_attr_E=4, T=10, graphid=1, numNodes=30, numEdges=100, numRoot=4, numGoals=6, history=3): env = Environment(num_attr_N=num_attr_N, num_attr_E=num_attr_E, T=T, graphid=graphid, numNodes=numNodes, numEdges=numEdges, numRoot=numRoot, numGoals=numGoals, history=history) env.randomDAG() path = os.getcwd() + "/env_data/" + env_name + ".pkl" print("env path is ", path) # if fp.isExist(path): # raise ValueError("Env with such name already exists.") fp.save_pkl(env, path) print(env_name + " has been saved.") return env
def sim_and_modifiy_Series(): #TODO: make sure this is correct print('Begin simulation and modify payoff matrix.') path = os.getcwd() + '/data/game.pkl' game = fp.load_pkl(path) env = game.env num_episodes = game.num_episodes #TODO: add str first and then calculate payoff old_dim, old_dim1 = game.dim_payoff_def() new_dim, new_dim1 = game.num_str() if old_dim != old_dim1 or new_dim != new_dim1: raise ValueError("Payoff dimension does not match.") def_str_list = game.def_str att_str_list = game.att_str position_col_list = [] position_row_list = [] for i in range(new_dim - 1): position_col_list.append((i, new_dim - 1)) for j in range(new_dim): position_row_list.append((new_dim - 1, j)) att_col = [] att_row = [] def_col = [] def_row = [] #TODO: check the path is correct for pos in position_col_list: idx_def, idx_att = pos # if MPI_flag: # aReward, dReward = do_MPI_sim(att_str_list[idx_att], def_str_list[idx_def]) # else: aReward, dReward = series_sim(env, game, att_str_list[idx_att], def_str_list[idx_def], num_episodes) att_col.append(aReward) def_col.append(dReward) for pos in position_row_list: idx_def, idx_att = pos # if MPI_flag: # aReward, dReward = do_MPI_sim(att_str_list[idx_att], def_str_list[idx_def]) # else: aReward, dReward = series_sim(env, game, att_str_list[idx_att], def_str_list[idx_def], num_episodes) att_row.append(aReward) def_row.append(dReward) game.add_col_att( np.reshape(np.round(np.array(att_col), 2), newshape=(len(att_col), 1))) game.add_col_def( np.reshape(np.round(np.array(def_col), 2), newshape=(len(att_col), 1))) game.add_row_att(np.round(np.array(att_row), 2)[None]) game.add_row_def(np.round(np.array(def_row), 2)[None]) fp.save_pkl(game, path=path) print("Done simulation and modify payoff matrix.")
def do_MPI_sim_retrain(nn_att, nn_def): path = os.getcwd() path_att = path + '/sim_arg/nn_att.pkl' path_def = path + '/sim_arg/nn_def.pkl' fp.save_pkl(nn_att, path_att) fp.save_pkl(nn_def, path_def) command_line = "mpirun python " + path + "/sim_MPI_retrain.py" aReward_list = [] dReward_list = [] num_mpirun = 5 for i in range(num_mpirun): call_and_wait(command_line) aReward, dReward = fp.load_pkl(path + '/sim_arg/result.pkl') aReward_list.append(aReward) dReward_list.append(dReward) # return aReward, dReward return np.sum(aReward_list) / num_mpirun, np.sum(dReward_list) / num_mpirun
def sim_and_modifiy_MPI(): #TODO: load game path = os.getcwd() game_path = os.getcwd() + '/game_data/game.pkl' game = fp.load_pkl(game_path) comm = MPI.COMM_WORLD rank = comm.rank size = comm.size nn_att = fp.load_pkl(path + '/sim_arg/nn_att.pkl') nn_def = fp.load_pkl(path + '/sim_arg/nn_def.pkl') # aReward, dReward = series_sim(game.env, game, nn_att, nn_def, size) aReward, dReward = series_sim_single(game.env, game, nn_att, nn_def) reward_tuple = (aReward, dReward) data = comm.gather(reward_tuple, root=0) if rank == 0: data = np.array(data) fp.save_pkl(np.round(np.sum(data, 0) / size, 1), path=path + '/sim_arg/result.pkl')
def do_MPI_sim(nn_att, nn_def): path = os.getcwd() path_att = path + '/sim_arg/nn_att.pkl' path_def = path + '/sim_arg/nn_def.pkl' fp.save_pkl(nn_att, path_att) fp.save_pkl(nn_def, path_def) command_line = "mpirun python " + path + "/sim_MPI.py" # aReward_list = [] # dReward_list = [] # num_mpirun = 5 # for i in range(num_mpirun): # call_and_wait(command_line) # aReward, dReward = fp.load_pkl(path + '/sim_arg/result.pkl') # aReward_list.append(aReward) # dReward_list.append(dReward) call_and_wait(command_line) # sim_and_modifiy_MPI() aReward, dReward = fp.load_pkl(path + '/sim_arg/result.pkl') return aReward, dReward
def EGTA_restart(restart_epoch, start_hado=2, retrain=False, game_path=os.getcwd() + '/game_data/game.pkl'): if retrain: print("=======================================================") print("============Continue Running HADO-EGTA=================") print("=======================================================") else: print("=======================================================") print("=============Continue Running DO-EGTA==================") print("=======================================================") epoch = restart_epoch - 1 sys.stdout.flush() arg_path = os.getcwd() + '/inner_egta_arg/' hado_arg = (start_hado, retrain) epoch_arg = epoch fp.save_pkl(hado_arg, path=arg_path + 'hado_arg.pkl') fp.save_pkl(epoch_arg, path=arg_path + 'epoch_arg.pkl') count = 8 - restart_epoch while count != 0: # while True: do_train_and_sim() game = fp.load_pkl(game_path) epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl') # # find nash equilibrium using gambit analysis payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att print("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) game.env.attacker.nn_att = None game.env.defender.nn_def = None fp.save_pkl(game, game_path) print("Round_" + str(epoch) + " has done and game was saved.") print("=======================================================") # break count -= 1 sys.stdout.flush() #TODO: make sure this is correct. print("END EPOCH: " + str(epoch)) print(datetime.datetime.now())
def EGTA(start_hado=2, retrain=False, epoch=1, game_path=os.getcwd() + '/game_data/game.pkl'): if retrain: print("=======================================================") print("==============Begin Running HADO-EGTA==================") print("=======================================================") else: print("=======================================================") print("===============Begin Running DO-EGTA===================") print("=======================================================") sys.stdout.flush() arg_path = os.getcwd() + '/inner_egta_arg/' hado_arg = (start_hado, retrain) epoch_arg = epoch fp.save_pkl(hado_arg, path=arg_path + 'hado_arg.pkl') fp.save_pkl(epoch_arg, path=arg_path + 'epoch_arg.pkl') count = 18 while count != 0: # while True: do_train_and_sim() game = fp.load_pkl(game_path) epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl') # find nash equilibrium using gambit analysis payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att print("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) fp.save_pkl(game, game_path) print("Round_" + str(epoch) + " has done and game was saved.") print("=======================================================") # break count -= 1 sys.stdout.flush() #TODO: make sure this is correct. print("END: " + str(epoch)) os._exit(os.EX_OK)
def whole_payoff_matrix(num_str, child_partition, env_name='run_env_B', save_path=None, matrix_name=None): """ Simulate the complete payoff matrix. :param num_str: number of total strategies. :param child_partition: a dict recording number of strategies for each child game. {"baseline": 40, "RS":40} :param env_name: name of envrionment :param str_range: the range of strategies to be simulated. {"start": 3, "end": 20} :param str_path_dict: {0/1:{'baselines': '/home/wangyzh/baselines/attackgraph/attacker_strategies/'}} :return: NE """ print('Begin simulating payoff matrix of combined game.') print("*********************************************") print("*********************************************") game = initialize(load_env=env_name, env_name=None) print("*********************************************") print("*********************************************") sys.stdout.flush() env = game.env num_episodes = game.num_episodes # Assume two players have the same number of strategies. payoff_matrix_att = np.zeros((num_str, num_str)) payoff_matrix_def = np.zeros((num_str, num_str)) att_str_dict = load_policies(game, child_partition, identity=1) def_str_dict = load_policies(game, child_partition, identity=0) ## method_pos_def records the starting idx of each method when combined. method_pos_def = 0 for key_def in child_partition: for i in np.arange(1, child_partition[key_def]+1): def_str = key_def + '/defender_strategies/def_str_epoch' + str(i+1) + '.pkl' entry_pos_def = method_pos_def + i method_pos_att = 0 for key_att in child_partition: print('Current Method is ', (key_def, key_att), "Defender's pos is ", i+1, '# attacker strategies is ', child_partition[key_att]) sys.stdout.flush() for j in np.arange(1,child_partition[key_att]+1): att_str = key_att + '/attacker_strategies/att_str_epoch' + str(j+1) + '.pkl' entry_pos_att = method_pos_att + j # print current simulation info. # if j == child_partition[key_att]: # print("----------------------------------------------------") # print('Current position:', (i+1,j+1), 'Pos:', (entry_pos_def-1, entry_pos_att-1)) # sys.stdout.flush() att_nn = att_str_dict[att_str] def_nn = def_str_dict[def_str] aReward, dReward = series_sim_combined(env, att_nn, def_nn, num_episodes=num_episodes) payoff_matrix_att[entry_pos_def-1, entry_pos_att-1] = aReward payoff_matrix_def[entry_pos_def-1, entry_pos_att-1] = dReward # update the starting position. method_pos_att += child_partition[key_att] ## Periodically saving the payoff matrix. if save_path is None: save_path = os.getcwd() + '/combined_game/matrice/' if matrix_name is None: fp.save_pkl(payoff_matrix_att, save_path + 'payoff_matrix_att.pkl') fp.save_pkl(payoff_matrix_def, save_path + 'payoff_matrix_def.pkl') else: fp.save_pkl(payoff_matrix_att, save_path + 'payoff_matrix_att_' + matrix_name + '.pkl') fp.save_pkl(payoff_matrix_def, save_path + 'payoff_matrix_def_' + matrix_name + '.pkl') method_pos_def += child_partition[key_def] print('Done simulating payoff matrix of combined game.') return payoff_matrix_att, payoff_matrix_def
def initialize(load_env=None, env_name=None): print("=======================================================") print("=======Begin Initialization and first epoch============") print("=======================================================") # Create Environment if isinstance(load_env, str): path = os.getcwd() + '/env_data/' + load_env + '.pkl' if not fp.isExist(path): raise ValueError("The env being loaded does not exist.") env = fp.load_pkl(path) else: # env is created and saved. env = dag.env_rand_gen_and_save(env_name) # save graph copy env.save_graph_copy() env.save_mask_copy() # create players and point to their env env.create_players() env.create_action_space() # load param param_path = os.getcwd() + '/network_parameters/param.json' param = jp.load_json_data(param_path) # initialize game data game = game_data.Game_data(env, num_episodes=param['num_episodes'], threshold=param['threshold']) game.set_hado_param(param=param['hado_param']) game.set_hado_time_step(param['retrain_timesteps']) game.env.defender.set_env_belong_to(game.env) game.env.attacker.set_env_belong_to(game.env) env.defender.set_env_belong_to(env) env.attacker.set_env_belong_to(env) # uniform strategy has been produced ahead of time print("epoch 1:", datetime.datetime.now()) epoch = 1 act_att = 'att_str_epoch1.pkl' act_def = 'def_str_epoch1.pkl' game.add_att_str(act_att) game.add_def_str(act_def) print('Begin simulation for uniform strategy.') sys.stdout.flush() # simulate using random strategies and initialize payoff matrix # if MPI_flag: # aReward, dReward = do_MPI_sim(act_att, act_def) # else: aReward, dReward = series_sim(game.env, game, act_att, act_def, game.num_episodes) print('Done simulation for uniform strategy.') sys.stdout.flush() game.init_payoffmatrix(dReward, aReward) ne = {} ne[0] = np.array([1], dtype=np.float32) ne[1] = np.array([1], dtype=np.float32) game.add_nasheq(epoch, ne) # save a copy of game data game_path = os.getcwd() + '/game_data/game.pkl' fp.save_pkl(game, game_path) sys.stdout.flush() return game
def EGTA_restart(restart_epoch, start_hado=2, retrain=False, transfer=False, game_path=os.getcwd() + '/game_data/game.pkl'): if retrain: print("=======================================================") print("============Continue Running HADO-EGTA=================") print("=======================================================") else: print("=======================================================") print("=============Continue Running DO-EGTA==================") print("=======================================================") epoch = restart_epoch - 1 game = fp.load_pkl(game_path) env = game.env retrain_start = False count = 8 - restart_epoch while count != 0: # while True: # fix opponent strategy mix_str_def = game.nasheq[epoch][0] mix_str_att = game.nasheq[epoch][1] aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 print("Current epoch is " + str(epoch)) print("epoch " + str(epoch) + ':', datetime.datetime.now()) # train and save RL agents if retrain and epoch > start_hado: retrain_start = True print("Begin training attacker......") a_BD = training.training_att(game, mix_str_def, epoch, retrain=retrain_start, transfer=transfer) print("Attacker training done......") print("Begin training defender......") d_BD = training.training_def(game, mix_str_att, epoch, retrain=retrain_start, transfer=transfer) print("Defender training done......") if retrain and epoch > start_hado: print("Begin retraining attacker......") training.training_hado_att(game) print("Attacker retraining done......") print("Begin retraining defender......") training.training_hado_def(game) print("Defender retraining done......") # Simulation for retrained strategies and choose the best one as player's strategy. print('Begin retrained sim......') a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch) print('Done retrained sim......') game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) # else: # # # Judge beneficial deviation # # one plays nn and another plays ne strategy # print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.") # nn_att = "att_str_epoch" + str(epoch) + ".pkl" # nn_def = mix_str_def # # if MPI_flag: # # a_BD, _ = do_MPI_sim(nn_att, nn_def) # # else: # a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for a_BD.") # # print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.") # nn_att = mix_str_att # nn_def = "def_str_epoch" + str(epoch) + ".pkl" # # if MPI_flag: # # _, d_BD = do_MPI_sim(nn_att, nn_def) # # else: # _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for d_BD.") # #TODO: This may lead to early stop. # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold: # print("*************************") # print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff) # print("a_BD=", a_BD, " ", "d_BD=", d_BD) # print("*************************") # break # game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. game = sim_Series.sim_and_modifiy_Series_with_game(game) # # find nash equilibrium using gambit analysis payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att print("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) game.env.attacker.nn_att = None game.env.defender.nn_def = None fp.save_pkl(game, game_path) print('a_BD_list', game.att_BD_list) print('aPayoff', game.att_payoff) print('d_BD_list', game.def_BD_list) print('dPayoff', game.def_payoff) print("Round_" + str(epoch) + " has done and game was saved.") print("=======================================================") # break count -= 1 sys.stdout.flush() #TODO: make sure this is correct. print("END EPOCH: " + str(epoch)) print(datetime.datetime.now())
def EGTA(env, game, start_hado=2, retrain=False, transfer=False, epoch=1, game_path=os.getcwd() + '/game_data/game.pkl'): if retrain: print("=======================================================") print("==============Begin Running HADO-EGTA==================") print("=======================================================") else: print("=======================================================") print("===============Begin Running DO-EGTA===================") print("=======================================================") retrain_start = False proc = psutil.Process(os.getpid()) count = 18 while count != 0: # while True: mem0 = proc.memory_info().rss # fix opponent strategy mix_str_def = game.nasheq[epoch][0] mix_str_att = game.nasheq[epoch][1] #TODO: play against uniform # mix_str_def = np.zeros(len(game.nasheq[epoch][0])) # mix_str_def[0] = 1 # mix_str_att = np.zeros(len(game.nasheq[epoch][1])) # mix_str_att[0] = 1 aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 print("Current epoch is " + str(epoch)) print("epoch " + str(epoch) + ':', datetime.datetime.now()) # train and save RL agents if retrain and epoch > start_hado: retrain_start = True if epoch == 2 and transfer: transfer_flag = False elif transfer: transfer_flag = True else: transfer_flag = False print("Begin training attacker......") a_BD = training.training_att(game, mix_str_def, epoch, retrain=retrain_start, transfer=transfer_flag) print("Attacker training done......") print("Begin training defender......") d_BD = training.training_def(game, mix_str_att, epoch, retrain=retrain_start, transfer=transfer_flag) print("Defender training done......") mem1 = proc.memory_info().rss if retrain and epoch > start_hado: print("Begin retraining attacker......") training.training_hado_att(game, transfer=transfer_flag) print("Attacker retraining done......") print("Begin retraining defender......") training.training_hado_def(game, transfer=transfer_flag) print("Defender retraining done......") # Simulation for retrained strategies and choose the best one as player's strategy. print('Begin retrained sim......') a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch) print('Done retrained sim......') game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) # else: # # # Judge beneficial deviation # # one plays nn and another plays ne strategy # print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.") # nn_att = "att_str_epoch" + str(epoch) + ".pkl" # nn_def = mix_str_def # # if MPI_flag: # # a_BD, _ = do_MPI_sim(nn_att, nn_def) # # else: # a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for a_BD.") # # print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.") # nn_att = mix_str_att # nn_def = "def_str_epoch" + str(epoch) + ".pkl" # # if MPI_flag: # # _, d_BD = do_MPI_sim(nn_att, nn_def) # # else: # _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for d_BD.") mem2 = proc.memory_info().rss # #TODO: This may lead to early stop. # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold: # print("*************************") # print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff) # print("a_BD=", a_BD, " ", "d_BD=", d_BD) # print("*************************") # break # game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. # game = sim_Series.sim_and_modifiy_Series_with_game(game, MPI_flag=MPI_flag) game = sim_Series.sim_and_modifiy_Series_with_game(game) mem3 = proc.memory_info().rss # # find nash equilibrium using gambit analysis payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att print("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) game.env.attacker.nn_att = None game.env.defender.nn_def = None fp.save_pkl(game, game_path) print('a_BD_list', game.att_BD_list) print('aPayoff', game.att_payoff) print('d_BD_list', game.def_BD_list) print('dPayoff', game.def_payoff) print("Round_" + str(epoch) + " has done and game was saved.") print("=======================================================") # break print("MEM:", (mem1 - mem0) / mem0, (mem2 - mem0) / mem0, (mem3 - mem0) / mem0) count -= 1 sys.stdout.flush() #TODO: make sure this is correct. print("END: " + str(epoch)) os._exit(os.EX_OK)
import numpy as np from attackgraph import file_op as fp import os DIR_def = os.getcwd() + '/defender_strategies/' DIR_att = os.getcwd() + '/attacker_strategies/' def act_att(ob, mask, training_flag, stochastic=True, update_eps=-1): if training_flag != 1: raise ValueError("training flag for uniform att str is not 1") legal_action = np.where(mask[0] == 0)[0] return [np.random.choice(legal_action)] def act_def(ob, mask, training_flag, stochastic=True, update_eps=-1): if training_flag != 0: raise ValueError("training flag for uniform def str is not 0") legal_action = np.where(mask[0] == 0)[0] return [np.random.choice(legal_action)] fp.save_pkl(act_att, DIR_att + "att_str_epoch" + str(1) + ".pkl") fp.save_pkl(act_def, DIR_def + "def_str_epoch" + str(1) + ".pkl")
def train_and_sim(): arg_path = os.getcwd() + '/inner_egta_arg/' start_hado, retrain = fp.load_pkl(arg_path + 'hado_arg.pkl') epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl') game_path = os.getcwd() + '/game_data/game.pkl' game = fp.load_pkl(game_path) env = game.env retrain_start = False mix_str_def = game.nasheq[epoch][0] mix_str_att = game.nasheq[epoch][1] aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 print("Current epoch is " + str(epoch)) print("epoch " + str(epoch) + ':', datetime.datetime.now()) # train and save RL agents if retrain and epoch > start_hado: retrain_start = True print("Begin training attacker......") a_BD = training.training_att(game, mix_str_def, epoch, retrain=retrain_start) print("Attacker training done......") print("Begin training defender......") d_BD = training.training_def(game, mix_str_att, epoch, retrain=retrain_start) print("Defender training done......") if retrain and epoch > start_hado: print("Begin retraining attacker......") training.training_hado_att(game) print("Attacker retraining done......") print("Begin retraining defender......") training.training_hado_def(game) print("Defender retraining done......") # Simulation for retrained strategies and choose the best one as player's strategy. print('Begin retrained sim......') a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch) print('Done retrained sim......') game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) # else: # # # Judge beneficial deviation # # one plays nn and another plays ne strategy # print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.") # nn_att = "att_str_epoch" + str(epoch) + ".pkl" # nn_def = mix_str_def # # if MPI_flag: # # a_BD, _ = do_MPI_sim(nn_att, nn_def) # # else: # a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for a_BD.") # # print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.") # nn_att = mix_str_att # nn_def = "def_str_epoch" + str(epoch) + ".pkl" # # if MPI_flag: # # _, d_BD = do_MPI_sim(nn_att, nn_def) # # else: # _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for d_BD.") # #TODO: This may lead to early stop. # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold: # print("*************************") # print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff) # print("a_BD=", a_BD, " ", "d_BD=", d_BD) # print("*************************") # break # game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. # game = sim_Series.sim_and_modifiy_Series_with_game(game, MPI_flag=MPI_flag) game = sim_Series.sim_and_modifiy_Series_with_game(game) game.env.attacker.nn_att = None game.env.defender.nn_def = None print('a_BD_list', game.att_BD_list) print('aPayoff', game.att_payoff) print('d_BD_list', game.def_BD_list) print('dPayoff', game.def_payoff) fp.save_pkl(game, game_path) fp.save_pkl(epoch, arg_path + 'epoch_arg.pkl')