Esempio n. 1
0
def _simulate_in_series(env, game, nn_att, nn_def, pos, n_episodes: int, save_dir: str = None, summary_writer=None, collect_trajectories: bool=False):
    """ Run simulations in series on a single processor. """
    attacker_rewards = np.zeros([n_episodes])
    defender_rewards = np.zeros([n_episodes])

    trajectories = []

    for episode_i in range(n_episodes):
        if collect_trajectories:
            defender_reward, attacker_reward, traj = _run_simulation(
                game,
                nn_att,
                nn_def,
                settings.get_attacker_strategy_dir(),
                settings.get_defender_strategy_dir(),
                collect_trajectories,
                pos)
            trajectories += [traj]
        else:
            defender_reward, attacker_reward = _run_simulation(
                game,
                nn_att,
                nn_def,
                settings.get_attacker_strategy_dir(),
                settings.get_defender_strategy_dir(),
                collect_trajectories,
                pos)
        attacker_rewards[episode_i] = attacker_reward
        defender_rewards[episode_i] = defender_reward

    if collect_trajectories:
        return attacker_rewards, defender_rewards, trajectories
    else:
        return attacker_rewards, defender_rewards
Esempio n. 2
0
    def load_all_policies(env, str_set, opp_identity: int):
        """ Load all of the strategies for an agent.

        :param env:
        :param str_set:
        :param opp_identity: ID of the opponent (0/1 defender/attacker).
        :return: Dictionary from strings to `ActWrapper` policies.
        :rtype: dict
        """
        if opp_identity == 0:  # Pick a defender's strategy.
            path = settings.get_defender_strategy_dir()
        elif opp_identity == 1:
            path = settings.get_attacker_strategy_dir()
        else:
            raise ValueError("identity is neither 0 or 1!")

        str_dict = {}
        count = 1

        for picked_str in str_set:

            # The initial policy is a function, so we do not need to load any parameters.
            if count == 1 and "epoch1" in picked_str:
                str_dict[picked_str] = fp.load_pkl(osp.join(path, picked_str))
                count += 1
                continue

            # Load the policies parameters for epoch > 1.
            str_dict[picked_str] = torch.load(osp.join(path, picked_str))

        return str_dict
Esempio n. 3
0
def _simulate_in_parallel(env, game, nn_att, nn_def, n_processes: int, n_episodes: int, save_dir: str = None, summary_writer=None):
    """ Run simulations in parallel processes. """
    worker_processes = []
    simulation_request_queue = multiprocessing.SimpleQueue()
    attacker_reward_queue = multiprocessing.SimpleQueue()
    defender_reward_queue = multiprocessing.SimpleQueue()
    # Set-up all the processes.
    for _ in range(n_processes):
        worker_processes += [SimulationWorker(
            simulation_request_queue,
            defender_reward_queue,
            attacker_reward_queue,
            nn_att,
            nn_def,
            settings.get_attacker_strategy_dir(),
            settings.get_defender_strategy_dir())]
        worker_processes[-1].start()
    # Request all simulations.
    for _ in range(n_episodes):
        simulation_request_queue.put(CloudpickleWrapper(game))
    # Send sentinel values to tell processes to cleanly shutdown (1 per worker).
    for _ in range(n_processes):
        simulation_request_queue.put(None)
    for process in worker_processes:
        process.join()

    # Aggregate results.
    attacker_rewards = np.zeros([n_episodes])
    defender_rewards = np.zeros([n_episodes])
    for episode_i in range(n_episodes):
        attacker_rewards[episode_i] = attacker_reward_queue.get()
        defender_rewards[episode_i] = defender_reward_queue.get()

    return attacker_rewards, defender_rewards
Esempio n. 4
0
def train(game,
          identity,
          opponent_mix_str,
          epoch,
          writer,
          save_path: str = None,
          scope: str = None):
    """ Train a best response policy.

    :param game:
    :param identity:
    :param opponent_mix_str:
    :param epoch:
    """
    env = game.env
    env.reset_everything()
    env.set_training_flag(identity)

    if identity:  # Training the attacker.
        if len(opponent_mix_str) != len(game.def_str):
            raise ValueError("The length must match while training.")
        env.defender.set_mix_strategy(opponent_mix_str)
        env.defender.set_str_set(game.def_str)
        if save_path is None:
            save_path = osp.join(settings.get_attacker_strategy_dir(),
                                 "att_str_epoch" + str(epoch) + ".pkl")

    else:  # Training the defender.
        if len(opponent_mix_str) != len(game.att_str):
            raise ValueError("The length must match while training.")
        env.attacker.set_mix_strategy(opponent_mix_str)
        env.attacker.set_str_set(game.att_str)
        if save_path is None:
            save_path = osp.join(settings.get_defender_strategy_dir(),
                                 "def_str_epoch" + str(epoch) + ".pkl")

    name = "attacker" if identity else "defender"
    scope = name if scope is None else scope
    with gin.config_scope(scope):
        learner = learner_factory()
        policy, best_deviation, _, report = learner.learn_multi_nets(
            env, epoch=epoch, writer=writer, game=game)

    # add online policy, without loading policies every time.
    game.total_strategies[identity].append(policy)

    torch.save(policy, save_path, pickle_module=dill)
    # fp.save_pkl(replay_buffer, save_path[:-4]+".replay_buffer.pkl")
    return best_deviation, report
Esempio n. 5
0
def run(load_env, env_name, n_processes):
    """ Run the double-oracle algorithm. """
    # Create initial policies.
    fp.save_pkl(
        uniform_str_init.act_att,
        osp.join(settings.get_attacker_strategy_dir(), "att_str_epoch1.pkl"))
    fp.save_pkl(
        uniform_str_init.act_def,
        osp.join(settings.get_defender_strategy_dir(), "def_str_epoch1.pkl"))

    game = initialize(load_env=FLAGS.env,
                      env_name=None,
                      n_processes=n_processes)
    _run(game.env,
         game,
         meta_method_name=FLAGS.meta_method,
         n_processes=n_processes)
Esempio n. 6
0
    def run(self):
        # Because we are "spawning" the process instead of "forking" the process, we need to
        # reimport the run's configurations.
        # Reparse the flags for this process.
        FLAGS = flags.FLAGS
        FLAGS(sys.argv)
        # Reload gin configurations for this process.
        gin_files = [
            osp.join(settings.SRC_DIR, "configs", f"{x}.gin")
            for x in FLAGS.config_files
        ]
        gin.parse_config_files_and_bindings(config_files=gin_files,
                                            bindings=FLAGS.config_overrides,
                                            skip_unknown=False)

        policy_name = "attacker" if self.train_attacker else "defender"

        for job in iter(self.job_queue.get, None):
            # The game we're given has no policies and has not been initialized.
            game, opponent = job
            game = game()  # Unpickle game.

            # Register the opponent we will be playing as the opponent's only policy.
            if self.train_attacker:
                game.add_def_str(opponent)
            else:
                game.add_att_str(opponent)

            # The opponent sampling is done from the result directory, so we need
            # to copy any model we use into the policy set.
            if self.train_attacker:
                opponent_dir = settings.get_defender_strategy_dir()
            else:
                opponent_dir = settings.get_attacker_strategy_dir()
            new_filepath = osp.join(opponent_dir, osp.basename(opponent))
            shutil.copyfile(src=opponent, dst=new_filepath)

            save_path = osp.join(settings.get_run_dir(),
                                 osp.basename(opponent))
            save_path = save_path[:-4]  # Remove ".pkl".
            training.train(game=game,
                           identity=int(self.train_attacker),
                           opponent_mix_str=np.array([1.0]),
                           epoch=osp.basename(opponent),
                           writer=SummaryWriter(logdir=save_path),
                           save_path=osp.join(save_path, f"{policy_name}.pkl"))
def sample_strategy_from_mixed(env, str_set, mix_str, identity, str_dict=None):
    """ Sample a pure strategy from a mixed strategy.

    Note: str in str_set should include .pkl.

    :param env:
    :param str_set:
    :param mix_str:
    :param identity:
    :param str_dict:
    """
    assert env.training_flag != identity
    if not len(str_set) == len(mix_str):
        raise ValueError(
            "Length of mixed strategies does not match number of strategies.")

    mix_str = np.array(mix_str)

    if np.sum(mix_str) != 1.0:
        mix_str = mix_str / np.sum(mix_str)

    picked_str = np.random.choice(str_set, p=mix_str)
    # TODO: modification for fast sampling.
    if str_dict is not None:
        return str_dict[picked_str], picked_str

    if not fp.isInName('.pkl', name=picked_str):
        raise ValueError('The strategy picked is not a pickle file.')

    if identity == 0:  # pick a defender's strategy
        path = settings.get_defender_strategy_dir()
    elif identity == 1:
        path = settings.get_attacker_strategy_dir()
    else:
        raise ValueError("identity is neither 0 or 1!")

    if not fp.isExist(osp.join(path, picked_str)):
        raise ValueError('The strategy picked does not exist!')

    if "epoch1.pkl" in picked_str:
        act = fp.load_pkl(osp.join(path, picked_str))
        return act, picked_str

    act = torch.load(osp.join(path, picked_str))
    return act, picked_str
Esempio n. 8
0
def _maybe_load_last_epochs_model(is_attacker: bool):
    """ Load the last parameters of the last trained best responder, if any.

    :param is_attacker: Load previous attacker's parameters.
    :type is_attacker: bool
    :return: DQNEncDec parameters, or None if no previous parameters.
    """
    # Get the directory where all of this runs models are saved.
    save_dir = None
    filename = None
    if is_attacker:
        save_dir = settings.get_attacker_strategy_dir()
        filename = "att_str_epoch{}.pkl"
    else:
        save_dir = settings.get_defender_strategy_dir()
        filename = "def_str_epoch{}.pkl"
    filepath_template = osp.join(save_dir, filename)

    # The first epoch where we could have a checkpoint to load
    # is from the 2nd epoch. This is because the 1st epoch's
    # model are random non-parameterized functions. Therefore,
    # if we cannot find at least a checkpoint from the 2nd epoch,
    # there will be nothing to load.
    filepath = filepath_template.format(2)
    if not osp.exists(filepath):
        return None

    # Find the most recent save of a model.
    epoch = 2
    prev_epochs_model = None
    while True:
        filepath = filepath_template.format(epoch)

        # If we cannot find a checkpoint for this epoch, return last checkpoint.
        if not osp.exists(filepath):
            assert prev_epochs_model is not None, "Should've been able to load at least 2nd epoch's model."
            return prev_epochs_model

        # Otherwise, update latest checkpoint.
        prev_epochs_model = torch.load(filepath)
        epoch += 1
    return None
    def __init__(self, env, num_episodes, threshold):
        # TODO: check if env should be initial env, this env should be with G_reserved.
        logger.info("Reminder: env in game should be same as the initial one since G should be G_reserved.")
        self.env = copy.deepcopy(env)
        self.att_str = []
        self.def_str = []
        self.nasheq = {}
        self.payoffmatrix_def = np.zeros((1, 1), dtype=np.float32)
        self.payoffmatrix_att = np.zeros((1, 1), dtype=np.float32)
        self.dir_def = settings.get_defender_strategy_dir()
        self.dir_att = settings.get_attacker_strategy_dir()

        # payoff and beneficial deviation
        self.att_BD_list = []
        self.def_BD_list = []
        self.att_payoff = []
        self.def_payoff = []

        self.num_episodes = num_episodes
        self.threshold = threshold