def load_all_policies(env, str_set, opp_identity: int): """ Load all of the strategies for an agent. :param env: :param str_set: :param opp_identity: ID of the opponent (0/1 defender/attacker). :return: Dictionary from strings to `ActWrapper` policies. :rtype: dict """ if opp_identity == 0: # Pick a defender's strategy. path = settings.get_defender_strategy_dir() elif opp_identity == 1: path = settings.get_attacker_strategy_dir() else: raise ValueError("identity is neither 0 or 1!") str_dict = {} count = 1 for picked_str in str_set: # The initial policy is a function, so we do not need to load any parameters. if count == 1 and "epoch1" in picked_str: str_dict[picked_str] = fp.load_pkl(osp.join(path, picked_str)) count += 1 continue # Load the policies parameters for epoch > 1. str_dict[picked_str] = torch.load(osp.join(path, picked_str)) return str_dict
def sample_strategy_from_mixed(env, str_set, mix_str, identity, str_dict=None): """ Sample a pure strategy from a mixed strategy. Note: str in str_set should include .pkl. :param env: :param str_set: :param mix_str: :param identity: :param str_dict: """ assert env.training_flag != identity if not len(str_set) == len(mix_str): raise ValueError( "Length of mixed strategies does not match number of strategies.") mix_str = np.array(mix_str) if np.sum(mix_str) != 1.0: mix_str = mix_str / np.sum(mix_str) picked_str = np.random.choice(str_set, p=mix_str) # TODO: modification for fast sampling. if str_dict is not None: return str_dict[picked_str], picked_str if not fp.isInName('.pkl', name=picked_str): raise ValueError('The strategy picked is not a pickle file.') if identity == 0: # pick a defender's strategy path = settings.get_defender_strategy_dir() elif identity == 1: path = settings.get_attacker_strategy_dir() else: raise ValueError("identity is neither 0 or 1!") if not fp.isExist(osp.join(path, picked_str)): raise ValueError('The strategy picked does not exist!') if "epoch1.pkl" in picked_str: act = fp.load_pkl(osp.join(path, picked_str)) return act, picked_str act = torch.load(osp.join(path, picked_str)) return act, picked_str
def _train_classifier(classifier, buffer_paths, mixture, env, test_split: float, training_attacker: bool): """ Train an opponent classifier. """ # Load all the replay buffers and merge/split them. logger.info(f"Loading replay buffers from: ") labels = [] replay_buffers = [] for buffer_i, path in enumerate(buffer_paths): logger.info(f" - {path}") replay_buffers += [fp.load_pkl(path)] labels += [np.ones([len(replay_buffers[-1])]) * buffer_i] replay_buffer = merge_replay_buffers(replay_buffers) # We only want the state. replay_buffer = [x[0] for x in replay_buffer._storage] replay_buffer = np.array(replay_buffer) labels = np.ravel(labels) assert replay_buffer.shape[0] == labels.shape[0] # Shuffle the data. new_indices = np.random.permutation(len(labels)) replay_buffer = replay_buffer[new_indices] labels = labels[new_indices] # Train/test split. n_test_data = int(len(labels) * test_split) # Train the opponent classifier. classifier = supervised_learning(net=classifier, train_X=replay_buffer[:-n_test_data], train_Y=labels[:-n_test_data], test_X=replay_buffer[-n_test_data:], test_Y=labels[-n_test_data:], criterion=gin.REQUIRED, n_epochs=gin.REQUIRED, eval_freq=gin.REQUIRED, batch_size=gin.REQUIRED, log_dir=settings.get_run_dir()) return student
def init_game(saved_env_name: str = None, env_name: str = None): """ First attempts to load a saved environment, if not builds new enviornment. :param saved_env_name: Name of saved environment name to load. :param env_name: Name of environment to create. """ assert (saved_env_name is not None) or (env_name is not None) if saved_env_name is not None: logger.info(f"Loading environment: {saved_env_name}") path = osp.join(settings.get_env_data_dir(), f"{saved_env_name}.pkl") if not osp.exists(path): raise ValueError("The env being loaded does not exist.") env = fp.load_pkl(path) else: env = DagGenerator.get_env_data_dir(env_name) # save graph copy env.save_graph_copy() env.save_mask_copy() # TODO: change transfer # create players and point to their env env.create_players() env.create_action_space() # initialize game data game = EmpiricalGame(env) game.env.defender.set_env_belong_to(game.env) game.env.attacker.set_env_belong_to(game.env) # make no sense env.defender.set_env_belong_to(env) env.attacker.set_env_belong_to(env) return game
def initialize(load_env=None, env_name=None, n_processes: int = 1): logger.info("=======================================================") logger.info("=======Begin Initialization and first epoch============") logger.info("=======================================================") # Create Environment if isinstance(load_env, str): path = osp.join(settings.get_env_data_dir(), "{}.pkl".format(load_env)) if not fp.isExist(path): raise ValueError("The env being loaded does not exist.") env = fp.load_pkl(path) else: # env is created and saved. env = dag.env_rand_gen_and_save(env_name) # save graph copy env.save_graph_copy() env.save_mask_copy() # TODO: change transfer # create players and point to their env env.create_players() env.create_action_space() # print root node roots = env.get_Roots() logger.info(f"Root Nodes: {roots}") ed = env.get_ORedges() logger.info(f"Or edges: {ed}") # initialize game data game = empirical_game.EmpiricalGame(env) game.env.defender.set_env_belong_to(game.env) game.env.attacker.set_env_belong_to(game.env) # make no sense env.defender.set_env_belong_to(env) env.attacker.set_env_belong_to(env) # uniform strategy has been produced ahead of time logger.info("Epoch 1") epoch = 1 epoch_dir = osp.join(settings.get_results_dir(), f"epoch_{epoch}") writer = SummaryWriter(logdir=epoch_dir) act_att = 'att_str_epoch1.pkl' act_def = 'def_str_epoch1.pkl' game.add_att_str(act_att) game.add_def_str(act_def) logger.info('Begin simulation for uniform strategy.') aReward, dReward = simulation.simulate_profile( env=game.env, game=game, nn_att=act_att, nn_def=act_def, n_episodes=game.num_episodes, n_processes=n_processes, save_dir=epoch_dir, summary_writer=writer) logger.info('Done simulation for uniform strategy.') game.init_payoffmatrix(dReward, aReward) ne = {} ne[0] = np.array([1], dtype=np.float32) ne[1] = np.array([1], dtype=np.float32) game.add_nasheq(epoch, ne) # save a copy of game data game_path = osp.join(settings.get_run_dir(), "game.pkl") fp.save_pkl(game, game_path) sys.stdout.flush() return game
def _run_simulation(game, nn_att_saved, nn_def_saved, attacker_dir, defender_dir, collect_trajectories: bool=False, pos=None): """ Simulate a single episode. """ env = game.env env.reset_everything() T = env.T G = env.G _, targetset = env.get_Targets() attacker = env.attacker defender = env.defender aReward = 0 dReward = 0 nn_att = copy.copy(nn_att_saved) nn_def = copy.copy(nn_def_saved) if pos is not None: idx_def, idx_att = pos # Load attacker. if isinstance(nn_att, np.ndarray): str_set = game.total_strategies[1] nn_att_act = np.random.choice(str_set, p=nn_att) else: nn_att_act = game.total_strategies[1][idx_att] # Load defender. if isinstance(nn_def, np.ndarray): str_set = game.total_strategies[0] nn_def_act = np.random.choice(str_set, p=nn_def) else: nn_def_act = game.total_strategies[0][idx_def] else: # Load attacker. if isinstance(nn_att, np.ndarray): str_set = game.att_str nn_att = np.random.choice(str_set, p=nn_att) path = osp.join(attacker_dir, nn_att) try: nn_att_act = torch.load(path) except: nn_att_act = fp.load_pkl(path) # Load defender. if isinstance(nn_def, np.ndarray): str_set = game.def_str nn_def = np.random.choice(str_set, p=nn_def) path = osp.join(defender_dir, nn_def) try: nn_def_act = torch.load(path) except: nn_def_act = fp.load_pkl(path) if collect_trajectories: traj = [] exp = {} for t in range(T): if collect_trajectories: exp["observations"] = {} exp["observations"]["attacker"] = attacker.att_obs_constructor() exp["observations"]["defender"] = defender.def_obs_constructor(G) attacker.att_greedy_action_builder_single(G, nn_att_act) defender.def_greedy_action_builder_single(G, nn_def_act) att_action_set = attacker.attact def_action_set = defender.defact if collect_trajectories: exp["actions"] = {} exp["actions"]["attacker"] = att_action_set exp["actions"]["attacker"] = att_action_set for attack in att_action_set: if isinstance(attack, tuple): # check OR node aReward += G.edges[attack]['cost'] if random.uniform(0, 1) <= G.edges[attack]['actProb']: G.nodes[attack[-1]]['state'] = 1 else: # check AND node aReward += G.nodes[attack]['aCost'] if random.uniform(0, 1) <= G.nodes[attack]['actProb']: G.nodes[attack]['state'] = 1 # defender's action for node in def_action_set: G.nodes[node]['state'] = 0 dReward += G.nodes[node]['dCost'] for node in targetset: if G.nodes[node]['state'] == 1: aReward += G.nodes[node]['aReward'] dReward += G.nodes[node]['dPenalty'] # logger.info('aRew:', aReward, 'dRew:', dReward) # update players' observations # update defender's observation defender.update_obs(defender.get_def_hadAlert(G)) defender.save_defact2prev() defender.defact.clear() # update attacker's observation attacker.update_obs(attacker.get_att_isActive(G)) attacker.attact.clear() if collect_trajectories: traj += [exp] exp = {} if collect_trajectories: return dReward, aReward, traj else: return dReward, aReward
import os import attackgraph.common.file_ops as fp game_path = os.getcwd() + '/empirical_game/game.pkl' game = fp.load_pkl(game_path) print(game.att_str) print(game.def_str) print(game.nasheq) print(game.payoffmatrix_def) print(game.payoffmatrix_att)