def EGTA(env, game, start_hado=2, retrain=False, transfer=False, epoch=1, game_path=os.getcwd() + '/game_data/game.pkl'): if retrain: print("=======================================================") print("==============Begin Running HADO-EGTA==================") print("=======================================================") else: print("=======================================================") print("===============Begin Running DO-EGTA===================") print("=======================================================") retrain_start = False proc = psutil.Process(os.getpid()) count = 18 while count != 0: # while True: mem0 = proc.memory_info().rss # fix opponent strategy mix_str_def = game.nasheq[epoch][0] mix_str_att = game.nasheq[epoch][1] #TODO: play against uniform # mix_str_def = np.zeros(len(game.nasheq[epoch][0])) # mix_str_def[0] = 1 # mix_str_att = np.zeros(len(game.nasheq[epoch][1])) # mix_str_att[0] = 1 aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 print("Current epoch is " + str(epoch)) print("epoch " + str(epoch) + ':', datetime.datetime.now()) # train and save RL agents if retrain and epoch > start_hado: retrain_start = True if epoch == 2 and transfer: transfer_flag = False elif transfer: transfer_flag = True else: transfer_flag = False print("Begin training attacker......") a_BD = training.training_att(game, mix_str_def, epoch, retrain=retrain_start, transfer=transfer_flag) print("Attacker training done......") print("Begin training defender......") d_BD = training.training_def(game, mix_str_att, epoch, retrain=retrain_start, transfer=transfer_flag) print("Defender training done......") mem1 = proc.memory_info().rss if retrain and epoch > start_hado: print("Begin retraining attacker......") training.training_hado_att(game, transfer=transfer_flag) print("Attacker retraining done......") print("Begin retraining defender......") training.training_hado_def(game, transfer=transfer_flag) print("Defender retraining done......") # Simulation for retrained strategies and choose the best one as player's strategy. print('Begin retrained sim......') a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch) print('Done retrained sim......') game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) # else: # # # Judge beneficial deviation # # one plays nn and another plays ne strategy # print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.") # nn_att = "att_str_epoch" + str(epoch) + ".pkl" # nn_def = mix_str_def # # if MPI_flag: # # a_BD, _ = do_MPI_sim(nn_att, nn_def) # # else: # a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for a_BD.") # # print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.") # nn_att = mix_str_att # nn_def = "def_str_epoch" + str(epoch) + ".pkl" # # if MPI_flag: # # _, d_BD = do_MPI_sim(nn_att, nn_def) # # else: # _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for d_BD.") mem2 = proc.memory_info().rss # #TODO: This may lead to early stop. # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold: # print("*************************") # print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff) # print("a_BD=", a_BD, " ", "d_BD=", d_BD) # print("*************************") # break # game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. # game = sim_Series.sim_and_modifiy_Series_with_game(game, MPI_flag=MPI_flag) game = sim_Series.sim_and_modifiy_Series_with_game(game) mem3 = proc.memory_info().rss # # find nash equilibrium using gambit analysis payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att print("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) game.env.attacker.nn_att = None game.env.defender.nn_def = None fp.save_pkl(game, game_path) print('a_BD_list', game.att_BD_list) print('aPayoff', game.att_payoff) print('d_BD_list', game.def_BD_list) print('dPayoff', game.def_payoff) print("Round_" + str(epoch) + " has done and game was saved.") print("=======================================================") # break print("MEM:", (mem1 - mem0) / mem0, (mem2 - mem0) / mem0, (mem3 - mem0) / mem0) count -= 1 sys.stdout.flush() #TODO: make sure this is correct. print("END: " + str(epoch)) os._exit(os.EX_OK)
def EGTA_restart(restart_epoch, start_hado=2, retrain=False, transfer=False, game_path=os.getcwd() + '/game_data/game.pkl'): if retrain: print("=======================================================") print("============Continue Running HADO-EGTA=================") print("=======================================================") else: print("=======================================================") print("=============Continue Running DO-EGTA==================") print("=======================================================") epoch = restart_epoch - 1 game = fp.load_pkl(game_path) env = game.env retrain_start = False count = 8 - restart_epoch while count != 0: # while True: # fix opponent strategy mix_str_def = game.nasheq[epoch][0] mix_str_att = game.nasheq[epoch][1] aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 print("Current epoch is " + str(epoch)) print("epoch " + str(epoch) + ':', datetime.datetime.now()) # train and save RL agents if retrain and epoch > start_hado: retrain_start = True print("Begin training attacker......") a_BD = training.training_att(game, mix_str_def, epoch, retrain=retrain_start, transfer=transfer) print("Attacker training done......") print("Begin training defender......") d_BD = training.training_def(game, mix_str_att, epoch, retrain=retrain_start, transfer=transfer) print("Defender training done......") if retrain and epoch > start_hado: print("Begin retraining attacker......") training.training_hado_att(game) print("Attacker retraining done......") print("Begin retraining defender......") training.training_hado_def(game) print("Defender retraining done......") # Simulation for retrained strategies and choose the best one as player's strategy. print('Begin retrained sim......') a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch) print('Done retrained sim......') game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) # else: # # # Judge beneficial deviation # # one plays nn and another plays ne strategy # print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.") # nn_att = "att_str_epoch" + str(epoch) + ".pkl" # nn_def = mix_str_def # # if MPI_flag: # # a_BD, _ = do_MPI_sim(nn_att, nn_def) # # else: # a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for a_BD.") # # print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.") # nn_att = mix_str_att # nn_def = "def_str_epoch" + str(epoch) + ".pkl" # # if MPI_flag: # # _, d_BD = do_MPI_sim(nn_att, nn_def) # # else: # _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for d_BD.") # #TODO: This may lead to early stop. # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold: # print("*************************") # print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff) # print("a_BD=", a_BD, " ", "d_BD=", d_BD) # print("*************************") # break # game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. game = sim_Series.sim_and_modifiy_Series_with_game(game) # # find nash equilibrium using gambit analysis payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att print("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) game.env.attacker.nn_att = None game.env.defender.nn_def = None fp.save_pkl(game, game_path) print('a_BD_list', game.att_BD_list) print('aPayoff', game.att_payoff) print('d_BD_list', game.def_BD_list) print('dPayoff', game.def_payoff) print("Round_" + str(epoch) + " has done and game was saved.") print("=======================================================") # break count -= 1 sys.stdout.flush() #TODO: make sure this is correct. print("END EPOCH: " + str(epoch)) print(datetime.datetime.now())
def train_and_sim(): arg_path = os.getcwd() + '/inner_egta_arg/' start_hado, retrain = fp.load_pkl(arg_path + 'hado_arg.pkl') epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl') game_path = os.getcwd() + '/game_data/game.pkl' game = fp.load_pkl(game_path) env = game.env retrain_start = False mix_str_def = game.nasheq[epoch][0] mix_str_att = game.nasheq[epoch][1] aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 print("Current epoch is " + str(epoch)) print("epoch " + str(epoch) + ':', datetime.datetime.now()) # train and save RL agents if retrain and epoch > start_hado: retrain_start = True print("Begin training attacker......") a_BD = training.training_att(game, mix_str_def, epoch, retrain=retrain_start) print("Attacker training done......") print("Begin training defender......") d_BD = training.training_def(game, mix_str_att, epoch, retrain=retrain_start) print("Defender training done......") if retrain and epoch > start_hado: print("Begin retraining attacker......") training.training_hado_att(game) print("Attacker retraining done......") print("Begin retraining defender......") training.training_hado_def(game) print("Defender retraining done......") # Simulation for retrained strategies and choose the best one as player's strategy. print('Begin retrained sim......') a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch) print('Done retrained sim......') game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) # else: # # # Judge beneficial deviation # # one plays nn and another plays ne strategy # print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.") # nn_att = "att_str_epoch" + str(epoch) + ".pkl" # nn_def = mix_str_def # # if MPI_flag: # # a_BD, _ = do_MPI_sim(nn_att, nn_def) # # else: # a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for a_BD.") # # print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.") # nn_att = mix_str_att # nn_def = "def_str_epoch" + str(epoch) + ".pkl" # # if MPI_flag: # # _, d_BD = do_MPI_sim(nn_att, nn_def) # # else: # _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for d_BD.") # #TODO: This may lead to early stop. # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold: # print("*************************") # print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff) # print("a_BD=", a_BD, " ", "d_BD=", d_BD) # print("*************************") # break # game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. # game = sim_Series.sim_and_modifiy_Series_with_game(game, MPI_flag=MPI_flag) game = sim_Series.sim_and_modifiy_Series_with_game(game) game.env.attacker.nn_att = None game.env.defender.nn_def = None print('a_BD_list', game.att_BD_list) print('aPayoff', game.att_payoff) print('d_BD_list', game.def_BD_list) print('dPayoff', game.def_payoff) fp.save_pkl(game, game_path) fp.save_pkl(epoch, arg_path + 'epoch_arg.pkl')