def sim_and_modifiy_Series(): #TODO: make sure this is correct print('Begin simulation and modify payoff matrix.') path = os.getcwd() + '/data/game.pkl' game = fp.load_pkl(path) env = game.env num_episodes = game.num_episodes #TODO: add str first and then calculate payoff old_dim, old_dim1 = game.dim_payoff_def() new_dim, new_dim1 = game.num_str() if old_dim != old_dim1 or new_dim != new_dim1: raise ValueError("Payoff dimension does not match.") def_str_list = game.def_str att_str_list = game.att_str position_col_list = [] position_row_list = [] for i in range(new_dim - 1): position_col_list.append((i, new_dim - 1)) for j in range(new_dim): position_row_list.append((new_dim - 1, j)) att_col = [] att_row = [] def_col = [] def_row = [] #TODO: check the path is correct for pos in position_col_list: idx_def, idx_att = pos # if MPI_flag: # aReward, dReward = do_MPI_sim(att_str_list[idx_att], def_str_list[idx_def]) # else: aReward, dReward = series_sim(env, game, att_str_list[idx_att], def_str_list[idx_def], num_episodes) att_col.append(aReward) def_col.append(dReward) for pos in position_row_list: idx_def, idx_att = pos # if MPI_flag: # aReward, dReward = do_MPI_sim(att_str_list[idx_att], def_str_list[idx_def]) # else: aReward, dReward = series_sim(env, game, att_str_list[idx_att], def_str_list[idx_def], num_episodes) att_row.append(aReward) def_row.append(dReward) game.add_col_att( np.reshape(np.round(np.array(att_col), 2), newshape=(len(att_col), 1))) game.add_col_def( np.reshape(np.round(np.array(def_col), 2), newshape=(len(att_col), 1))) game.add_row_att(np.round(np.array(att_row), 2)[None]) game.add_row_def(np.round(np.array(def_row), 2)[None]) fp.save_pkl(game, path=path) print("Done simulation and modify payoff matrix.")
def initialize(load_env=None, env_name=None): print("=======================================================") print("=======Begin Initialization and first epoch============") print("=======================================================") # Create Environment if isinstance(load_env, str): path = os.getcwd() + '/env_data/' + load_env + '.pkl' if not fp.isExist(path): raise ValueError("The env being loaded does not exist.") env = fp.load_pkl(path) else: # env is created and saved. env = dag.env_rand_gen_and_save(env_name) # save graph copy env.save_graph_copy() env.save_mask_copy() # create players and point to their env env.create_players() env.create_action_space() # load param param_path = os.getcwd() + '/network_parameters/param.json' param = jp.load_json_data(param_path) # initialize game data game = game_data.Game_data(env, num_episodes=param['num_episodes'], threshold=param['threshold']) game.set_hado_param(param=param['hado_param']) game.set_hado_time_step(param['retrain_timesteps']) game.env.defender.set_env_belong_to(game.env) game.env.attacker.set_env_belong_to(game.env) env.defender.set_env_belong_to(env) env.attacker.set_env_belong_to(env) # uniform strategy has been produced ahead of time print("epoch 1:", datetime.datetime.now()) epoch = 1 act_att = 'att_str_epoch1.pkl' act_def = 'def_str_epoch1.pkl' game.add_att_str(act_att) game.add_def_str(act_def) print('Begin simulation for uniform strategy.') sys.stdout.flush() # simulate using random strategies and initialize payoff matrix # if MPI_flag: # aReward, dReward = do_MPI_sim(act_att, act_def) # else: aReward, dReward = series_sim(game.env, game, act_att, act_def, game.num_episodes) print('Done simulation for uniform strategy.') sys.stdout.flush() game.init_payoffmatrix(dReward, aReward) ne = {} ne[0] = np.array([1], dtype=np.float32) ne[1] = np.array([1], dtype=np.float32) game.add_nasheq(epoch, ne) # save a copy of game data game_path = os.getcwd() + '/game_data/game.pkl' fp.save_pkl(game, game_path) sys.stdout.flush() return game
def simulate_payoff(co_payoff_matrix_def, co_payoff_matrix_att, str_book_def, str_book_att, new_def, new_att, save_path='./payoff_data/'): """ This function simulates the partial payoff matrix of the combined game given newly produced strategies. :param co_payoff_matrix: The partial payoff matrix of the combined game :param str_book: a book recording the method, its corresponding strategy name and position. e.g. {position: str_name} :param new_def: newly produced defender's strategies. :param new_att: newly produced attacker's strategies. :return: """ path = os.getcwd() + '/data/game.pkl' game = fp.load_pkl(path) env = game.env num_episodes = game.num_episodes # set positions. new_att = list(new_att) new_def = list(new_def) num_str_def, num_str_att = np.shape(co_payoff_matrix_def) num_new_str_def = len(new_def) num_new_str_att = len(new_att) new_dim_def = num_str_def + num_new_str_def new_dim_att = num_str_att + num_new_str_att for i in np.arange(num_new_str_def): idx = i + num_str_def if idx in str_book_def.keys(): raise ValueError("idx already exists.") str_book_att[idx] = new_def[i] for i in np.arange(num_new_str_att): idx = i + num_str_att if idx in str_book_att.keys(): raise ValueError("idx already exists.") str_book_att[idx] = new_att[i] position_row = [] position_col = [] # add column first. for i in np.arange(num_new_str_att): position_col_list = [] for k in np.arange(num_str_def): position_col_list.append((k,num_str_att+i)) position_row.append(position_col_list) # Then add row. for i in np.arange(num_new_str_def): position_row_list = [] for k in np.arange(new_dim_att): position_row_list.append((num_new_str_def+i,k)) position_row.append(position_row_list) att_col = [] att_row = [] def_col = [] def_row = [] for list in position_col: subcol_def = [] subcol_att = [] for pos in list: idx_def, idx_att = pos # TODO: reset the load path aReward, dReward = series_sim(env, game, str_book_att[idx_att], str_book_def[idx_def], num_episodes) subcol_att.append(aReward) subcol_def.append(dReward) att_col.append(subcol_att) def_col.append(subcol_def) for list in position_row: subrow_def = [] subrow_att = [] for pos in list: idx_def, idx_att = pos # TODO: reset the load path aReward, dReward = series_sim(env, game, str_book_att[idx_att], str_book_def[idx_def], num_episodes) subrow_att.append(aReward) subrow_def.append(dReward) att_row.append(subrow_att) def_row.append(subrow_def) for col in att_col: col = np.reshape(np.round(np.array(col),2),newshape=(num_str_def,1)) co_payoff_matrix_att = add_col(co_payoff_matrix_att, col) for row in att_row: rol = np.round(np.array(row),2)[None] co_payoff_matrix_att = add_row(co_payoff_matrix_att, rol) for col in def_col: col = np.reshape(np.round(np.array(col), 2), newshape=(num_str_def, 1)) co_payoff_matrix_def = add_col(co_payoff_matrix_def, col) for row in def_row: rol = np.round(np.array(row), 2)[None] co_payoff_matrix_def = add_row(co_payoff_matrix_def, rol) return co_payoff_matrix_def, co_payoff_matrix_att, str_book_def, str_book_att