def partial_matrix_ne_search(payoff_matrix_att, payoff_matrix_def, child_partition): ne_dict = {} heuristic_pos = find_heuristic_position(child_partition) for method in child_partition: ne_dict[method] = {} # find the position of heuristic. h_pos = heuristic_pos[method] # find the NE of the partial matrix. nash_att, _ = ga.do_gambit_analysis(payoff_matrix_def, payoff_matrix_att[h_pos[0], h_pos[1]], maxent=False, minent=False) _, nash_def = ga.do_gambit_analysis(payoff_matrix_def[h_pos[0], h_pos[1]], payoff_matrix_att, maxent=False, minent=False) # add a zero for uniform strategy. ne_dict[method][0] = np.insert(nash_def, 0, 0) ne_dict[method][1] = np.insert(nash_att, 0, 0) return ne_dict
def find_all_NE(payoffmatrix_def, payoffmatrix_att): # Paired NE. # nash_att_list = [np.array([0.5,0.2,0.3]), ...] # nash_def_list = [np.array([0.5,0.2,0.3]), ...] nash_att_list, nash_def_list = do_gambit_analysis(payoffmatrix_def, payoffmatrix_att, return_list=True) return nash_att_list, nash_def_list
def regret_curves(payoffmatrix_def, payoffmatrix_att, child_partition): """ Calculate the epsilon of each subgame. :param ne_dict: {"baseline": game.nasheq} :return: """ curves_att = {} curves_def = {} num_str, _ = np.shape(payoffmatrix_att) positions = find_heuristic_position(child_partition) for method in child_partition: curves_att[method] = [] curves_def[method] = [] start, end = positions[method] submatrix_def = payoffmatrix_def[start:end, :] submatrix_att = payoffmatrix_att[:, start:end] subgame_def = payoffmatrix_def[start:end, start:end] subgame_att = payoffmatrix_att[start:end, start:end] zeros = np.zeros(end - start) for epoch in np.arange(end): subsubgame_def = subgame_def[:epoch, :epoch] subsubgame_att = subgame_att[:epoch, :epoch] # TODO: Error: line 4:2: Expecting outcome or payoff nash_att, nash_def = do_gambit_analysis(subsubgame_def, subsubgame_att, maxent=False, minent=True) # TODO: Is this correct?? NO. nash_def = zeros[len(nash_def)] + nash_def nash_att = zeros[len(nash_att)] + nash_att nash_def = np.reshape(nash_def, newshape=(len(nash_def), 1)) payoff_vect_att = np.sum(nash_def * submatrix_def, axis=0) payoff_vect_def = np.sum(submatrix_att * nash_att, axis=1) payoffmatrix_def = np.reshape(payoffmatrix_def, newshape=np.shape(payoff_vect_att)) nash_payoff_att = np.round(np.sum(nash_def * subgame_att * nash_att), decimals=2) nash_payoff_def = np.round(np.sum(nash_def * subgame_def * nash_att), decimals=2) deviation_att = np.max(payoff_vect_att) deviation_def = np.max(payoff_vect_def) regret_att = np.maximum(deviation_att - nash_payoff_att, 0) regret_def = np.maximum(deviation_def - nash_payoff_def, 0) curves_att[method].append(regret_att) curves_def[method].append(regret_def) return curves_att, curves_def
def EGTA_restart(restart_epoch, start_hado=2, retrain=False, game_path=os.getcwd() + '/game_data/game.pkl'): if retrain: print("=======================================================") print("============Continue Running HADO-EGTA=================") print("=======================================================") else: print("=======================================================") print("=============Continue Running DO-EGTA==================") print("=======================================================") epoch = restart_epoch - 1 sys.stdout.flush() arg_path = os.getcwd() + '/inner_egta_arg/' hado_arg = (start_hado, retrain) epoch_arg = epoch fp.save_pkl(hado_arg, path=arg_path + 'hado_arg.pkl') fp.save_pkl(epoch_arg, path=arg_path + 'epoch_arg.pkl') count = 8 - restart_epoch while count != 0: # while True: do_train_and_sim() game = fp.load_pkl(game_path) epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl') # # find nash equilibrium using gambit analysis payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att print("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) game.env.attacker.nn_att = None game.env.defender.nn_def = None fp.save_pkl(game, game_path) print("Round_" + str(epoch) + " has done and game was saved.") print("=======================================================") # break count -= 1 sys.stdout.flush() #TODO: make sure this is correct. print("END EPOCH: " + str(epoch)) print(datetime.datetime.now())
def EGTA(start_hado=2, retrain=False, epoch=1, game_path=os.getcwd() + '/game_data/game.pkl'): if retrain: print("=======================================================") print("==============Begin Running HADO-EGTA==================") print("=======================================================") else: print("=======================================================") print("===============Begin Running DO-EGTA===================") print("=======================================================") sys.stdout.flush() arg_path = os.getcwd() + '/inner_egta_arg/' hado_arg = (start_hado, retrain) epoch_arg = epoch fp.save_pkl(hado_arg, path=arg_path + 'hado_arg.pkl') fp.save_pkl(epoch_arg, path=arg_path + 'epoch_arg.pkl') count = 18 while count != 0: # while True: do_train_and_sim() game = fp.load_pkl(game_path) epoch = fp.load_pkl(arg_path + 'epoch_arg.pkl') # find nash equilibrium using gambit analysis payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att print("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) fp.save_pkl(game, game_path) print("Round_" + str(epoch) + " has done and game was saved.") print("=======================================================") # break count -= 1 sys.stdout.flush() #TODO: make sure this is correct. print("END: " + str(epoch)) os._exit(os.EX_OK)
def formal_regret_curves(payoffmatrix_def, payoffmatrix_att, child_partition): positions = find_heuristic_position(child_partition) curves_dict_def = {} curves_dict_att = {} for method in child_partition: curves_dict_def[method] = [] curves_dict_att[method] = [] for epoch in np.arange(40): for method in child_partition: if method == 'RM': continue start, end = positions[method] print(start, end) submatrix_att = payoffmatrix_att[start:start + epoch + 1, start:start + epoch + 1] submatrix_def = payoffmatrix_def[start:start + epoch + 1, start:start + epoch + 1] # print('X:', start, start+epoch+1) nash_att, nash_def = do_gambit_analysis(submatrix_def, submatrix_att, maxent=True) nash_def = np.reshape(nash_def, newshape=(len(nash_def), 1)) ne_payoff_def = np.sum(nash_def * submatrix_def * nash_att) ne_payoff_att = np.sum(nash_def * submatrix_att * nash_att) dev_def = np.max( np.sum(payoffmatrix_def[:, start:start + epoch + 1] * nash_att, axis=1)) dev_att = np.max( np.sum(nash_def * payoffmatrix_att[start:start + epoch + 1, :], axis=0)) curves_dict_def[method].append( np.maximum(dev_def - ne_payoff_def, 0)) curves_dict_att[method].append( np.maximum(dev_att - ne_payoff_att, 0)) return curves_dict_def, curves_dict_att
def NE_regret(regret_vect_att, regret_vect_def, payoffmatrix_att, payoffmatrix_def, child_partition): """ Calculate the regret of each heuristic with respect to the combined game. The strategies of each heuristic only\ include those in the NE of each heuristic. :param regret_vect: regret vector calculated from combined game. :param ne_dict: {"baseline": {0: np.array([1,0,1,0...]), 1: np.array([1,0,1,0...])}, "RS": np.array([0,0,1,0...])} when a strategy is in a NE, that strategy is indicated by 1. :return: """ regret_dict = {} positions = find_heuristic_position(child_partition) for method in child_partition: start, end = positions[method] print(start, end) submatrix_att = payoffmatrix_att[start:end, start:end] submatrix_def = payoffmatrix_def[start:end, start:end] # submatrix_att = payoffmatrix_att[start:start+32, start:start+32] # submatrix_def = payoffmatrix_def[start:start+32, start:start+32] nash_att, nash_def = do_gambit_analysis(submatrix_def, submatrix_att, maxent=True) nash_att[nash_att > 0] = 1 nash_def[nash_def > 0] = 1 regret_dict[method] = { 0: np.sum(regret_vect_def[start:end] * nash_def) / np.sum(nash_def), 1: np.sum(regret_vect_att[start:end] * nash_att) / np.sum(nash_att) } # regret_dict[method] = {0: np.sum(regret_vect_def[start:start+30] * nash_def) / np.sum(nash_def), # 1: np.sum(regret_vect_att[start:start+30] * nash_att) / np.sum(nash_att)} return regret_dict
def regret_fixed_matrix(payoffmatrix_def, payoffmatrix_att, child_partition): positions = find_heuristic_position(child_partition) for method in child_partition: start, end = positions[method] print(start, end) # submatrix_att = payoffmatrix_att[start:end, start:end] # submatrix_def = payoffmatrix_def[start:end, start:end] submatrix_att = payoffmatrix_att[start:start + 32, start:start + 32] submatrix_def = payoffmatrix_def[start:start + 32, start:start + 32] nash_att, nash_def = do_gambit_analysis(submatrix_def, submatrix_att, maxent=True) nash_def = np.reshape(nash_def, newshape=(len(nash_def), 1)) ne_payoff_def = np.sum(nash_def * submatrix_def * nash_att) ne_payoff_att = np.sum(nash_def * submatrix_att * nash_att) # dev_def = np.max(np.sum(payoffmatrix_def[:, start:end] * nash_att, axis=1)) # dev_att = np.max(np.sum(nash_def * payoffmatrix_att[start:end, :], axis=0)) dev_def = np.max( np.sum(payoffmatrix_def[:, start:start + 32] * nash_att, axis=1)) # print(np.argmax(np.sum(payoffmatrix_def[:, start:end] * nash_att, axis=1))) dev_att = np.max( np.sum(nash_def * payoffmatrix_att[start:start + 32, :], axis=0)) # print(np.argmax(np.sum(nash_def * payoffmatrix_att[start:end, :], axis=0))) print('------------------------------------------') print("The current method is ", method) print("The defender's regret is", np.maximum(dev_def - ne_payoff_def, 0)) print("The attacker's regret is", np.maximum(dev_att - ne_payoff_att, 0)) print("==================================================")
def ne_search_wo_etrace(payoff_matrix_def, payoff_matrix_att, child_partition): position = find_heuristic_position(child_partition) total_num_str = 0 init_flag = False # Assume 2 methods. Find candidate NE in the first subgame. for method in child_partition: if not init_flag: nash_att, nash_def = do_gambit_analysis( payoff_matrix_def[:child_partition[method], : child_partition[method]], payoff_matrix_att[:child_partition[method], : child_partition[method]], maxent=False, minent=False) # Strategies of current game strategy_set_def = list(range(child_partition[method])) strategy_set_att = list(range(child_partition[method])) init_flag = True total_num_str += child_partition[method] # Extend the NE to the length of the combined game. zeros_def = np.zeros(total_num_str) zeros_att = np.zeros(total_num_str) zeros_def[:len(nash_def)] = nash_def zeros_att[:len(nash_def)] = nash_att nash_def = zeros_def nash_att = zeros_att # indicator_matrix records which cell has been simulated in the payoff matrix. indicator_matrix = np.zeros((total_num_str, total_num_str)) for method in position: start, end = position[method] indicator_matrix[start:end, start:end] = 1 nash_def_T = np.reshape(nash_def, newshape=(len(nash_def), 1)) payoff_def = np.sum(nash_def_T * payoff_matrix_def * nash_att) payoff_att = np.sum(nash_def_T * payoff_matrix_att * nash_att) support_idx_def = np.where(nash_def > 0)[0] support_idx_att = np.where(nash_att > 0)[0] # Change to simulation mode when simulation is needed. while True: for x in support_idx_def: indicator_matrix[x, :] = 1 for y in support_idx_att: indicator_matrix[:, y] = 1 dev_payoff_def = np.max(np.sum(payoff_matrix_def * nash_att, axis=1)) dev_payoff_att = np.max(np.sum(nash_def_T * payoff_matrix_att, axis=0)) dev_def = np.argmax(np.sum(payoff_matrix_def * nash_att, axis=1)) dev_att = np.argmax(np.sum(nash_def * payoff_matrix_att, axis=0)) if dev_payoff_def <= payoff_def and dev_payoff_att <= payoff_att: break if dev_payoff_def > payoff_def: strategy_set_def.append(dev_def) strategy_set_def.sort() indicator_matrix[dev_def, :] = 1 else: strategy_set_def.append(dev_def) strategy_set_def.sort() indicator_matrix[dev_def, :] = 1 if dev_payoff_att > payoff_att: strategy_set_att.append(dev_att) strategy_set_att.sort() indicator_matrix[:, dev_att] = 1 else: strategy_set_att.append(dev_att) strategy_set_att.sort() indicator_matrix[:, dev_att] = 1 subgame_def = es(strategy_set_def, strategy_set_att, payoff_matrix_def) subgame_att = es(strategy_set_def, strategy_set_att, payoff_matrix_att) # print(strategy_set_def, strategy_set_att) # print(np.shape(subgame_def), np.shape(subgame_att)) nash_att, nash_def = do_gambit_analysis(subgame_def, subgame_att, maxent=False, minent=False) nash_def_T = np.reshape(nash_def, newshape=(len(nash_def), 1)) payoff_def = np.sum(nash_def_T * subgame_def * nash_att) payoff_att = np.sum(nash_def_T * subgame_att * nash_att) zeros_def = np.zeros(total_num_str) zeros_att = np.zeros(total_num_str) for pos, value in zip(strategy_set_att, nash_att): zeros_att[pos] = value for pos, value in zip(strategy_set_def, nash_def): zeros_def[pos] = value nash_def = zeros_def nash_att = zeros_att support_idx_def = np.where(nash_def > 0)[0] support_idx_att = np.where(nash_att > 0)[0] # Payoff matrix of subgames denotes 5. for method in position: start, end = position[method] indicator_matrix[start:end, start:end] = 5 return nash_def, nash_att, indicator_matrix
def run(p1_payoff, p2_payoff): np.random.seed(0) regret_list = [] str_p1 = [] str_p2 = [] epoch = 0 x1, x2 = 0, 0 str_p1.append(x1) str_p2.append(x2) subgame_u1 = extract_submatrix( np.array(str_p1) * 2, np.array(str_p2) * 2, p1_payoff) subgame_u2 = extract_submatrix( np.array(str_p1) * 2, np.array(str_p2) * 2, p2_payoff) is_terminal = True switch = False while is_terminal: epoch += 1 nelist = do_gambit_analysis(subgame_u1, subgame_u2, return_list=True) # nash_2, nash_1 = do_gambit_analysis(subgame_u1, subgame_u2, maxent=False, minent=True) nash_2, nash_1 = do_gambit_analysis(subgame_u1, subgame_u2, maxent=True, minent=False) regret_list.append( regret(nash_1, nash_2, np.array(str_p1), np.array(str_p2), subgame_u1, subgame_u2, p1_payoff, p2_payoff)) # DO solver if switch: x1 = BR(np.array(str_p2) * 2, nash_2, p1_payoff) x2 = BR(np.array(str_p1) * 2, nash_1, p1_payoff) # Beneficial Deviation if not switch: x1 = beneficial_dev(np.array(str_p2) * 2, nash_2, p1_payoff) x2 = beneficial_dev(np.array(str_p1) * 2, nash_1, p1_payoff) # random # x1 = rand(np.array(str_p1)) # x2 = rand(np.array(str_p2)) if epoch == 10: switch = True str_p1.append(x1) str_p2.append(x2) print("--------------------------------") print("Current Epoch is ", epoch) print("ne_list:", nelist) print("Current NE is ", nash_1, nash_2) print("x1:", str_p1) print("x2:", str_p2) # if x1 not in str_p1: # str_p1.append(x1) # if x2 not in str_p2: # str_p2.append(x2) subgame_u1 = extract_submatrix( np.array(str_p1) * 2, np.array(str_p2) * 2, p1_payoff) subgame_u2 = extract_submatrix( np.array(str_p1) * 2, np.array(str_p2) * 2, p2_payoff) if epoch == 20: is_terminal = False print(regret_list) print("x1:", str_p1) print("x2:", str_p2)
def EGTA_restart(restart_epoch, start_hado=2, retrain=False, transfer=False, game_path=os.getcwd() + '/game_data/game.pkl'): if retrain: print("=======================================================") print("============Continue Running HADO-EGTA=================") print("=======================================================") else: print("=======================================================") print("=============Continue Running DO-EGTA==================") print("=======================================================") epoch = restart_epoch - 1 game = fp.load_pkl(game_path) env = game.env retrain_start = False count = 8 - restart_epoch while count != 0: # while True: # fix opponent strategy mix_str_def = game.nasheq[epoch][0] mix_str_att = game.nasheq[epoch][1] aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 print("Current epoch is " + str(epoch)) print("epoch " + str(epoch) + ':', datetime.datetime.now()) # train and save RL agents if retrain and epoch > start_hado: retrain_start = True print("Begin training attacker......") a_BD = training.training_att(game, mix_str_def, epoch, retrain=retrain_start, transfer=transfer) print("Attacker training done......") print("Begin training defender......") d_BD = training.training_def(game, mix_str_att, epoch, retrain=retrain_start, transfer=transfer) print("Defender training done......") if retrain and epoch > start_hado: print("Begin retraining attacker......") training.training_hado_att(game) print("Attacker retraining done......") print("Begin retraining defender......") training.training_hado_def(game) print("Defender retraining done......") # Simulation for retrained strategies and choose the best one as player's strategy. print('Begin retrained sim......') a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch) print('Done retrained sim......') game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) # else: # # # Judge beneficial deviation # # one plays nn and another plays ne strategy # print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.") # nn_att = "att_str_epoch" + str(epoch) + ".pkl" # nn_def = mix_str_def # # if MPI_flag: # # a_BD, _ = do_MPI_sim(nn_att, nn_def) # # else: # a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for a_BD.") # # print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.") # nn_att = mix_str_att # nn_def = "def_str_epoch" + str(epoch) + ".pkl" # # if MPI_flag: # # _, d_BD = do_MPI_sim(nn_att, nn_def) # # else: # _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for d_BD.") # #TODO: This may lead to early stop. # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold: # print("*************************") # print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff) # print("a_BD=", a_BD, " ", "d_BD=", d_BD) # print("*************************") # break # game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. game = sim_Series.sim_and_modifiy_Series_with_game(game) # # find nash equilibrium using gambit analysis payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att print("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) game.env.attacker.nn_att = None game.env.defender.nn_def = None fp.save_pkl(game, game_path) print('a_BD_list', game.att_BD_list) print('aPayoff', game.att_payoff) print('d_BD_list', game.def_BD_list) print('dPayoff', game.def_payoff) print("Round_" + str(epoch) + " has done and game was saved.") print("=======================================================") # break count -= 1 sys.stdout.flush() #TODO: make sure this is correct. print("END EPOCH: " + str(epoch)) print(datetime.datetime.now())
def EGTA(env, game, start_hado=2, retrain=False, transfer=False, epoch=1, game_path=os.getcwd() + '/game_data/game.pkl'): if retrain: print("=======================================================") print("==============Begin Running HADO-EGTA==================") print("=======================================================") else: print("=======================================================") print("===============Begin Running DO-EGTA===================") print("=======================================================") retrain_start = False proc = psutil.Process(os.getpid()) count = 18 while count != 0: # while True: mem0 = proc.memory_info().rss # fix opponent strategy mix_str_def = game.nasheq[epoch][0] mix_str_att = game.nasheq[epoch][1] #TODO: play against uniform # mix_str_def = np.zeros(len(game.nasheq[epoch][0])) # mix_str_def[0] = 1 # mix_str_att = np.zeros(len(game.nasheq[epoch][1])) # mix_str_att[0] = 1 aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 print("Current epoch is " + str(epoch)) print("epoch " + str(epoch) + ':', datetime.datetime.now()) # train and save RL agents if retrain and epoch > start_hado: retrain_start = True if epoch == 2 and transfer: transfer_flag = False elif transfer: transfer_flag = True else: transfer_flag = False print("Begin training attacker......") a_BD = training.training_att(game, mix_str_def, epoch, retrain=retrain_start, transfer=transfer_flag) print("Attacker training done......") print("Begin training defender......") d_BD = training.training_def(game, mix_str_att, epoch, retrain=retrain_start, transfer=transfer_flag) print("Defender training done......") mem1 = proc.memory_info().rss if retrain and epoch > start_hado: print("Begin retraining attacker......") training.training_hado_att(game, transfer=transfer_flag) print("Attacker retraining done......") print("Begin retraining defender......") training.training_hado_def(game, transfer=transfer_flag) print("Defender retraining done......") # Simulation for retrained strategies and choose the best one as player's strategy. print('Begin retrained sim......') a_BD, d_BD = sim_retrain(env, game, mix_str_att, mix_str_def, epoch) print('Done retrained sim......') game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) # else: # # # Judge beneficial deviation # # one plays nn and another plays ne strategy # print("Simulating attacker payoff. New strategy vs. mixed opponent strategy.") # nn_att = "att_str_epoch" + str(epoch) + ".pkl" # nn_def = mix_str_def # # if MPI_flag: # # a_BD, _ = do_MPI_sim(nn_att, nn_def) # # else: # a_BD, _ = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for a_BD.") # # print("Simulating defender's payoff. New strategy vs. mixed opponent strategy.") # nn_att = mix_str_att # nn_def = "def_str_epoch" + str(epoch) + ".pkl" # # if MPI_flag: # # _, d_BD = do_MPI_sim(nn_att, nn_def) # # else: # _, d_BD = series_sim(env, game, nn_att, nn_def, game.num_episodes) # print("Simulation done for d_BD.") mem2 = proc.memory_info().rss # #TODO: This may lead to early stop. # if a_BD - aPayoff < game.threshold and d_BD - dPayoff < game.threshold: # print("*************************") # print("aPayoff=", aPayoff, " ", "dPayoff=", dPayoff) # print("a_BD=", a_BD, " ", "d_BD=", d_BD) # print("*************************") # break # game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. # game = sim_Series.sim_and_modifiy_Series_with_game(game, MPI_flag=MPI_flag) game = sim_Series.sim_and_modifiy_Series_with_game(game) mem3 = proc.memory_info().rss # # find nash equilibrium using gambit analysis payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att print("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) game.env.attacker.nn_att = None game.env.defender.nn_def = None fp.save_pkl(game, game_path) print('a_BD_list', game.att_BD_list) print('aPayoff', game.att_payoff) print('d_BD_list', game.def_BD_list) print('dPayoff', game.def_payoff) print("Round_" + str(epoch) + " has done and game was saved.") print("=======================================================") # break print("MEM:", (mem1 - mem0) / mem0, (mem2 - mem0) / mem0, (mem3 - mem0) / mem0) count -= 1 sys.stdout.flush() #TODO: make sure this is correct. print("END: " + str(epoch)) os._exit(os.EX_OK)
def _run(env, game, meta_method_name, epoch: int = 1, game_path: str = None, n_processes: int = 1): assert n_processes > 0, "Invalid number of processors." if game_path is None: game_path = osp.join(settings.get_run_dir(), "game.pkl") logger.info("=======================================================") logger.info("===============Begin Running DO-EGTA===================") logger.info("=======================================================") proc = psutil.Process(os.getpid()) result_dir = settings.get_run_dir() selector = meta_method_selector(meta_method_name) count = 80 while count != 0: mem0 = proc.memory_info().rss # Fix opponent strategy. mix_str_def, mix_str_att = selector.sample(game, epoch) # Save mixed strategies. # with open(osp.join(result_dir, f"mix_defender.{epoch}.pkl"), "wb") as outfile: # pickle.dump(mix_str_def, outfile) # with open(osp.join(result_dir, f"mix_attacker.{epoch}.pkl"), "wb") as outfile: # pickle.dump(mix_str_att, outfile) # with open(osp.join(result_dir, f"payoff_defender.{epoch}.pkl"), "wb") as outfile: # pickle.dump(game.payoffmatrix_def, outfile) # with open(osp.join(result_dir, f"payoff_attacker.{epoch}.pkl"), "wb") as outfile: # pickle.dump(game.payoffmatrix_att, outfile) # Equilibrium pay-off. aPayoff, dPayoff = util.payoff_mixed_NE(game, epoch) game.att_payoff.append(aPayoff) game.def_payoff.append(dPayoff) # increase epoch epoch += 1 logger.info("Epoch " + str(epoch)) epoch_dir = osp.join(result_dir, f"epoch_{epoch}") # Summary writer for each epoch. writer = SummaryWriter(logdir=epoch_dir) # train and save RL agents # Train new best-response policies. if n_processes > 1: logger.info("Begining training attacker and defender in parallel.") time_training = time.time() job_queue = multiprocessing.SimpleQueue() result_queue = multiprocessing.SimpleQueue() attacker_trainer = LearnerWorker(job_queue, result_queue, 1, mix_str_def, epoch) defender_trainer = LearnerWorker(job_queue, result_queue, 0, mix_str_att, epoch) attacker_trainer.start() defender_trainer.start() # Submit training jobs on our game. for _ in range(2): job_queue.put(CloudpickleWrapper(game)) # Send sentinel values to tell processes to cleanly shutdown (1 per worker). for _ in range(2): job_queue.put(None) attacker_trainer.join() defender_trainer.join() # Collect and report results. We need to sort the results because they may appear in any order. results = [] for _ in range(2): results += [result_queue.get()] results = results if not results[0][ 0] else results[::-1] # Put defender first then attacker. # Process results into expected variables for non-distributed. a_BD = results[1][1] d_BD = results[0][1] logger.info("Done training attacker and defender.") logger.info(f"Defender training report: \n{results[0][2]}") logger.info(f"Attacker training report: \n{results[1][2]}") time_training = time.time() - time_training else: logger.info("Begin training attacker......") time_train_attacker = time.time() a_BD, report = training.train(game, 1, mix_str_def, epoch, writer) time_train_attacker = time.time() - time_train_attacker logger.info(f"\n{report}") logger.info("Attacker training done......") logger.info("Begin training defender......") time_train_defender = time.time() d_BD, report = training.train(game, 0, mix_str_att, epoch, writer) time_train_defender = time.time() - time_train_defender logger.info(f"\n{report}") logger.info("Defender training done......") mem1 = proc.memory_info().rss game.att_BD_list.append(a_BD) game.def_BD_list.append(d_BD) mem2 = proc.memory_info().rss game.add_att_str("att_str_epoch" + str(epoch) + ".pkl") game.add_def_str("def_str_epoch" + str(epoch) + ".pkl") # simulate and extend the payoff matrix. time_extend_game = time.time() game = simulation.simulate_expanded_game(game=game, n_processes=n_processes, save_dir=epoch_dir, summary_writer=writer) time_extend_game = time.time() - time_extend_game mem3 = proc.memory_info().rss # find nash equilibrium using gambit analysis time_gambit = time.time() payoffmatrix_def = game.payoffmatrix_def payoffmatrix_att = game.payoffmatrix_att logger.info("Begin Gambit analysis.") nash_att, nash_def = ga.do_gambit_analysis(payoffmatrix_def, payoffmatrix_att) ga.add_new_NE(game, nash_att, nash_def, epoch) game.env.attacker.nn_att = None game.env.defender.nn_def = None fp.save_pkl(game, game_path) time_gambit = time.time() - time_gambit logger.info("RESULTS:") logger.info(' - a_BD_list: {}'.format(game.att_BD_list)) logger.info(' - aPayoff: {}'.format(game.att_payoff)) logger.info(' - d_BD_list: {}'.format(game.def_BD_list)) logger.info(' - dPayoff: {}'.format(game.def_payoff)) logger.info("MEM: {}, {}, {}.".format( (mem1 - mem0) / mem0, (mem2 - mem0) / mem0, (mem3 - mem0) / mem0)) logger.info("TIME: ") if n_processes == 1: logger.info(f" - Training attacker: {time_train_attacker}") logger.info(f" - Training defender: {time_train_defender}") else: logger.info(f" - Training: {time_training}") logger.info(f" - Extend game: {time_extend_game}") logger.info(f" - Gambit: {time_gambit}") logger.info("Round_" + str(epoch) + " has done and game was saved.") logger.info("=======================================================") count -= 1 sys.stdout.flush() # TODO: make sure this is correct. logger.info("END: " + str(epoch)) os._exit(os.EX_OK)
# ga.encode_gambit_file(poDef,poAtt) # # ga.gambit_analysis() # ga.decode_gambit_file() n = 10 poDef = np.random.normal(size=(n,n)) poAtt = np.random.normal(size=(n,n)) poDef = np.round(poDef,2) poAtt = np.round(poAtt,2) # print(poAtt) t1 = time.time() nash_att, nash_def = ga.do_gambit_analysis(poDef, poAtt) t2 = time.time() print("time:",t2-t1) print(nash_att, nash_def) # nash_att, nash_def = ga.decode_gambit_file() # print(nash_att, nash_def) # a = '19/30,0,11/30,0,0,0,0,0,0,0,34/101,0,0,0,67/101,0,0,0,0,0' # b = a.split(',') # b = float(b) # c = np.array(b,dtype=np.float) # print(c) # print(c[0])