コード例 #1
0
    def load_all_policies(env, str_set, opp_identity: int):
        """ Load all of the strategies for an agent.

        :param env:
        :param str_set:
        :param opp_identity: ID of the opponent (0/1 defender/attacker).
        :return: Dictionary from strings to `ActWrapper` policies.
        :rtype: dict
        """
        if opp_identity == 0:  # Pick a defender's strategy.
            path = settings.get_defender_strategy_dir()
        elif opp_identity == 1:
            path = settings.get_attacker_strategy_dir()
        else:
            raise ValueError("identity is neither 0 or 1!")

        str_dict = {}
        count = 1

        for picked_str in str_set:

            # The initial policy is a function, so we do not need to load any parameters.
            if count == 1 and "epoch1" in picked_str:
                str_dict[picked_str] = fp.load_pkl(osp.join(path, picked_str))
                count += 1
                continue

            # Load the policies parameters for epoch > 1.
            str_dict[picked_str] = torch.load(osp.join(path, picked_str))

        return str_dict
コード例 #2
0
def sample_strategy_from_mixed(env, str_set, mix_str, identity, str_dict=None):
    """ Sample a pure strategy from a mixed strategy.

    Note: str in str_set should include .pkl.

    :param env:
    :param str_set:
    :param mix_str:
    :param identity:
    :param str_dict:
    """
    assert env.training_flag != identity
    if not len(str_set) == len(mix_str):
        raise ValueError(
            "Length of mixed strategies does not match number of strategies.")

    mix_str = np.array(mix_str)

    if np.sum(mix_str) != 1.0:
        mix_str = mix_str / np.sum(mix_str)

    picked_str = np.random.choice(str_set, p=mix_str)
    # TODO: modification for fast sampling.
    if str_dict is not None:
        return str_dict[picked_str], picked_str

    if not fp.isInName('.pkl', name=picked_str):
        raise ValueError('The strategy picked is not a pickle file.')

    if identity == 0:  # pick a defender's strategy
        path = settings.get_defender_strategy_dir()
    elif identity == 1:
        path = settings.get_attacker_strategy_dir()
    else:
        raise ValueError("identity is neither 0 or 1!")

    if not fp.isExist(osp.join(path, picked_str)):
        raise ValueError('The strategy picked does not exist!')

    if "epoch1.pkl" in picked_str:
        act = fp.load_pkl(osp.join(path, picked_str))
        return act, picked_str

    act = torch.load(osp.join(path, picked_str))
    return act, picked_str
コード例 #3
0
def _train_classifier(classifier, buffer_paths, mixture, env,
                      test_split: float, training_attacker: bool):
    """ Train an opponent classifier. """
    # Load all the replay buffers and merge/split them.
    logger.info(f"Loading replay buffers from: ")
    labels = []
    replay_buffers = []
    for buffer_i, path in enumerate(buffer_paths):
        logger.info(f"  - {path}")
        replay_buffers += [fp.load_pkl(path)]
        labels += [np.ones([len(replay_buffers[-1])]) * buffer_i]
    replay_buffer = merge_replay_buffers(replay_buffers)
    # We only want the state.
    replay_buffer = [x[0] for x in replay_buffer._storage]
    replay_buffer = np.array(replay_buffer)
    labels = np.ravel(labels)

    assert replay_buffer.shape[0] == labels.shape[0]

    # Shuffle the data.
    new_indices = np.random.permutation(len(labels))
    replay_buffer = replay_buffer[new_indices]
    labels = labels[new_indices]

    # Train/test split.
    n_test_data = int(len(labels) * test_split)

    # Train the opponent classifier.
    classifier = supervised_learning(net=classifier,
                                     train_X=replay_buffer[:-n_test_data],
                                     train_Y=labels[:-n_test_data],
                                     test_X=replay_buffer[-n_test_data:],
                                     test_Y=labels[-n_test_data:],
                                     criterion=gin.REQUIRED,
                                     n_epochs=gin.REQUIRED,
                                     eval_freq=gin.REQUIRED,
                                     batch_size=gin.REQUIRED,
                                     log_dir=settings.get_run_dir())
    return student
コード例 #4
0
def init_game(saved_env_name: str = None, env_name: str = None):
    """ First attempts to load a saved environment, if not builds new enviornment.

    :param saved_env_name: Name of saved environment name to load.
    :param env_name: Name of environment to create.
    """
    assert (saved_env_name is not None) or (env_name is not None)

    if saved_env_name is not None:
        logger.info(f"Loading environment: {saved_env_name}")
        path = osp.join(settings.get_env_data_dir(), f"{saved_env_name}.pkl")
        if not osp.exists(path):
            raise ValueError("The env being loaded does not exist.")
        env = fp.load_pkl(path)

    else:
        env = DagGenerator.get_env_data_dir(env_name)

    # save graph copy
    env.save_graph_copy()
    env.save_mask_copy()  # TODO: change transfer

    # create players and point to their env
    env.create_players()
    env.create_action_space()

    # initialize game data
    game = EmpiricalGame(env)
    game.env.defender.set_env_belong_to(game.env)
    game.env.attacker.set_env_belong_to(game.env)

    # make no sense
    env.defender.set_env_belong_to(env)
    env.attacker.set_env_belong_to(env)

    return game
コード例 #5
0
def initialize(load_env=None, env_name=None, n_processes: int = 1):
    logger.info("=======================================================")
    logger.info("=======Begin Initialization and first epoch============")
    logger.info("=======================================================")

    # Create Environment
    if isinstance(load_env, str):
        path = osp.join(settings.get_env_data_dir(), "{}.pkl".format(load_env))
        if not fp.isExist(path):
            raise ValueError("The env being loaded does not exist.")
        env = fp.load_pkl(path)
    else:
        # env is created and saved.
        env = dag.env_rand_gen_and_save(env_name)

    # save graph copy
    env.save_graph_copy()
    env.save_mask_copy()  # TODO: change transfer

    # create players and point to their env
    env.create_players()
    env.create_action_space()

    # print root node
    roots = env.get_Roots()
    logger.info(f"Root Nodes: {roots}")
    ed = env.get_ORedges()
    logger.info(f"Or edges: {ed}")

    # initialize game data
    game = empirical_game.EmpiricalGame(env)
    game.env.defender.set_env_belong_to(game.env)
    game.env.attacker.set_env_belong_to(game.env)

    # make no sense
    env.defender.set_env_belong_to(env)
    env.attacker.set_env_belong_to(env)

    # uniform strategy has been produced ahead of time
    logger.info("Epoch 1")
    epoch = 1
    epoch_dir = osp.join(settings.get_results_dir(), f"epoch_{epoch}")
    writer = SummaryWriter(logdir=epoch_dir)

    act_att = 'att_str_epoch1.pkl'
    act_def = 'def_str_epoch1.pkl'

    game.add_att_str(act_att)
    game.add_def_str(act_def)

    logger.info('Begin simulation for uniform strategy.')
    aReward, dReward = simulation.simulate_profile(
        env=game.env,
        game=game,
        nn_att=act_att,
        nn_def=act_def,
        n_episodes=game.num_episodes,
        n_processes=n_processes,
        save_dir=epoch_dir,
        summary_writer=writer)
    logger.info('Done simulation for uniform strategy.')

    game.init_payoffmatrix(dReward, aReward)
    ne = {}
    ne[0] = np.array([1], dtype=np.float32)
    ne[1] = np.array([1], dtype=np.float32)
    game.add_nasheq(epoch, ne)

    # save a copy of game data
    game_path = osp.join(settings.get_run_dir(), "game.pkl")
    fp.save_pkl(game, game_path)

    sys.stdout.flush()
    return game
コード例 #6
0
def _run_simulation(game, nn_att_saved, nn_def_saved, attacker_dir, defender_dir, collect_trajectories: bool=False, pos=None):
    """ Simulate a single episode. """
    env = game.env
    env.reset_everything()
    T = env.T
    G = env.G
    _, targetset = env.get_Targets()
    attacker = env.attacker
    defender = env.defender

    aReward = 0
    dReward = 0


    nn_att = copy.copy(nn_att_saved)
    nn_def = copy.copy(nn_def_saved)
    if pos is not None:
        idx_def, idx_att = pos
        # Load attacker.
        if isinstance(nn_att, np.ndarray):
            str_set = game.total_strategies[1]
            nn_att_act = np.random.choice(str_set, p=nn_att)
        else:
            nn_att_act = game.total_strategies[1][idx_att]
        # Load defender.
        if isinstance(nn_def, np.ndarray):
            str_set = game.total_strategies[0]
            nn_def_act = np.random.choice(str_set, p=nn_def)
        else:
            nn_def_act = game.total_strategies[0][idx_def]
    else:
        # Load attacker.
        if isinstance(nn_att, np.ndarray):
            str_set = game.att_str
            nn_att = np.random.choice(str_set, p=nn_att)

        path = osp.join(attacker_dir, nn_att)
        try:
            nn_att_act = torch.load(path)
        except:
            nn_att_act = fp.load_pkl(path)

        # Load defender.
        if isinstance(nn_def, np.ndarray):
            str_set = game.def_str
            nn_def = np.random.choice(str_set, p=nn_def)

        path = osp.join(defender_dir, nn_def)
        try:
            nn_def_act = torch.load(path)
        except:
            nn_def_act = fp.load_pkl(path)

    if collect_trajectories:
        traj = []
        exp = {}

    for t in range(T):

        if collect_trajectories:
            exp["observations"] = {}
            exp["observations"]["attacker"] = attacker.att_obs_constructor()
            exp["observations"]["defender"] = defender.def_obs_constructor(G)

        attacker.att_greedy_action_builder_single(G, nn_att_act)
        defender.def_greedy_action_builder_single(G, nn_def_act)

        att_action_set = attacker.attact
        def_action_set = defender.defact

        if collect_trajectories:
            exp["actions"] = {}
            exp["actions"]["attacker"] = att_action_set
            exp["actions"]["attacker"] = att_action_set

        for attack in att_action_set:
            if isinstance(attack, tuple):
                # check OR node
                aReward += G.edges[attack]['cost']
                if random.uniform(0, 1) <= G.edges[attack]['actProb']:
                    G.nodes[attack[-1]]['state'] = 1
            else:
                # check AND node
                aReward += G.nodes[attack]['aCost']
                if random.uniform(0, 1) <= G.nodes[attack]['actProb']:
                    G.nodes[attack]['state'] = 1
        # defender's action
        for node in def_action_set:
            G.nodes[node]['state'] = 0
            dReward += G.nodes[node]['dCost']

        for node in targetset:
            if G.nodes[node]['state'] == 1:
                aReward += G.nodes[node]['aReward']
                dReward += G.nodes[node]['dPenalty']
        # logger.info('aRew:', aReward, 'dRew:', dReward)

        # update players' observations
        # update defender's observation
        defender.update_obs(defender.get_def_hadAlert(G))
        defender.save_defact2prev()
        defender.defact.clear()
        # update attacker's observation
        attacker.update_obs(attacker.get_att_isActive(G))
        attacker.attact.clear()

        if collect_trajectories:
            traj += [exp]
            exp = {}

    if collect_trajectories:
        return dReward, aReward, traj
    else:
        return dReward, aReward
コード例 #7
0
import os

import attackgraph.common.file_ops as fp

game_path = os.getcwd() + '/empirical_game/game.pkl'

game = fp.load_pkl(game_path)

print(game.att_str)
print(game.def_str)
print(game.nasheq)
print(game.payoffmatrix_def)
print(game.payoffmatrix_att)