Beispiel #1
0
def random_search_max_confidence_recipe(sess, model, x, y, eps,
                                        clip_min, clip_max,
                                        report_path, batch_size=BATCH_SIZE,
                                        num_noise_points=10000):
  """Max confidence using random search.

  References:
  https://openreview.net/forum?id=H1g0piA9tQ
    Describes the max_confidence procedure used for the bundling in this recipe
  https://arxiv.org/abs/1802.00420
    Describes using random search with 1e5 or more random points to avoid
    gradient masking.

  :param sess: tf.Session
  :param model: cleverhans.model.Model
  :param x: numpy array containing clean example inputs to attack
  :param y: numpy array containing true labels
  :param nb_classes: int, number of classes
  :param eps: float, maximum size of perturbation (measured by max norm)
  :param eps_iter: float, step size for one version of PGD attacks
    (will also run another version with 25X smaller step size)
  :param nb_iter: int, number of iterations for one version of PGD attacks
    (will also run another version with 25X more iterations)
  :param report_path: str, the path that the report will be saved to.
  :batch_size: int, the total number of examples to run simultaneously
  """
  noise_attack = Noise(model, sess)
  threat_params = {"eps": eps, "clip_min": clip_min, "clip_max": clip_max}
  noise_attack_config = AttackConfig(noise_attack, threat_params)
  attack_configs = [noise_attack_config]
  assert batch_size % num_devices == 0
  new_work_goal = {noise_attack_config: num_noise_points}
  goals = [MaxConfidence(t=1., new_work_goal=new_work_goal)]
  bundle_attacks(sess, model, x, y, attack_configs, goals, report_path)
Beispiel #2
0
 def recipe(
     sess,
     model,
     x,
     y,
     nb_classes,
     eps,
     clip_min,
     clip_max,
     eps_iter,
     nb_iter,
     report_path,
     eps_iter_small,
     batch_size,
 ):
     """
     Mock recipe that just runs the Noise attack so the test runs fast
     """
     attack_configs = [AttackConfig(Noise(model, sess), {"eps": eps})]
     new_work_goal = {config: 1 for config in attack_configs}
     goals = [Misclassify(new_work_goal=new_work_goal)]
     bundle_attacks(
         sess,
         model,
         x,
         y,
         attack_configs,
         goals,
         report_path,
         attack_batch_size=batch_size,
         eval_batch_size=batch_size,
     )
 def recipe(sess, model, x, y, nb_classes, eps, clip_min,
            clip_max, eps_iter, nb_iter,
            report_path, eps_iter_small):
   attack_configs = [AttackConfig(Noise(model, sess), {'eps': eps})]
   new_work_goal = {config: 1 for config in attack_configs}
   goals = [Misclassify(new_work_goal=new_work_goal)]
   bundle_attacks(sess, model, x, y, attack_configs, goals, report_path, attack_batch_size=batch_size,
                  eval_batch_size=batch_size)
Beispiel #4
0
def single_run_max_confidence_recipe(sess, model, x, y, nb_classes, eps,
                                     clip_min, clip_max, eps_iter, nb_iter,
                                     report_path,
                                     batch_size=BATCH_SIZE):
  """A reasonable attack bundling recipe for a max norm threat model and
  a defender that uses confidence thresholding. This recipe uses both
  uniform noise and randomly-initialized PGD targeted attacks.

  References:
  https://openreview.net/forum?id=H1g0piA9tQ

  This version runs each attack (noise, targeted PGD for each class with
  nb_iter iterations, target PGD for each class with 25X more iterations)
  just once and then stops. See `basic_max_confidence_recipe` for a version
  that runs indefinitely.

  :param sess: tf.Session
  :param model: cleverhans.model.Model
  :param x: numpy array containing clean example inputs to attack
  :param y: numpy array containing true labels
  :param nb_classes: int, number of classes
  :param eps: float, maximum size of perturbation (measured by max norm)
  :param eps_iter: float, step size for one version PGD attacks
    (will also run another version with 25X smaller step size)
  :param nb_iter: int, number of iterations for the cheaper PGD attacks
    (will also run another version with 25X more iterations)
  :param report_path: str, the path that the report will be saved to.
  :batch_size: int, the total number of examples to run simultaneously
  """
  noise_attack = Noise(model, sess)
  pgd_attack = ProjectedGradientDescent(model, sess)
  threat_params = {"eps": eps, "clip_min" : clip_min, "clip_max" : clip_max}
  noise_attack_config = AttackConfig(noise_attack, threat_params, "noise")
  attack_configs = [noise_attack_config]
  pgd_attack_configs = []
  pgd_params = copy.copy(threat_params)
  pgd_params["eps_iter"] = eps_iter
  pgd_params["nb_iter"] = nb_iter
  assert batch_size % num_devices == 0
  dev_batch_size = batch_size // num_devices
  ones = tf.ones(dev_batch_size, tf.int32)
  expensive_pgd = []
  for cls in range(nb_classes):
    cls_params = copy.copy(pgd_params)
    cls_params['y_target'] = tf.to_float(tf.one_hot(ones * cls, nb_classes))
    cls_attack_config = AttackConfig(pgd_attack, cls_params, "pgd_" + str(cls))
    pgd_attack_configs.append(cls_attack_config)
    expensive_params = copy.copy(cls_params)
    expensive_params["eps_iter"] /= 25.
    expensive_params["nb_iter"] *= 25.
    expensive_config = AttackConfig(pgd_attack, expensive_params, "expensive_pgd_" + str(cls))
    expensive_pgd.append(expensive_config)
  attack_configs = [noise_attack_config] + pgd_attack_configs + expensive_pgd
  new_work_goal = {config: 1 for config in attack_configs}
  goals = [MaxConfidence(t=1., new_work_goal=new_work_goal)]
  bundle_attacks(sess, model, x, y, attack_configs, goals, report_path)
Beispiel #5
0
def prepare_attack(sess, args, model, adv_input, target_embeddings):
    if args.attack_type == 'FGSM':
        # Define FGSM for the model
        steps = 1
        alpha = args.eps / steps
        fgsm = FastGradientMethod(model)
        fgsm_params = {'eps': alpha, 'clip_min': 0., 'clip_max': 1.}
        adv_x = fgsm.generate(model.face_input, **fgsm_params)
    elif args.attack_type == 'CW':
        model.face_input.set_shape(np.shape(adv_input))
        # Instantiate a CW attack object
        cw = CarliniWagnerL2(model, sess)
        cw_params = {
            'binary_search_steps': 1,  # 1
            'max_iterations': 100,  # 100
            'learning_rate': .2,  # .2
            'batch_size': args.lfw_batch_size,
            'initial_const': args.init_c,  # 10
            'confidence': 10
        }
        #              # model.batch_size: 10, model.phase_train: False}
        feed_dict = {
            model.face_input: adv_input,
            model.victim_embedding_input: target_embeddings
        }
        #              # model.batch_size: 10, model.phase_train: False}
        # adv_x = cw.generate(model.face_input, feed_dict, **cw_params)
        adv_x = cw.generate(model.face_input, **cw_params)
        # adv_x = cw.generate_np(adv_input, **cw_params)
        print('hello')
    elif args.attack_type == 'random':
        random_attack = Noise(model, sess)
        noise_params = {
            'eps': args.eps,
            'ord': np.inf,
            'clip_min': 0,
            'clip_max': 1
        }
        adv_x = random_attack.generate(model.face_input, **noise_params)

    return adv_x
Beispiel #6
0
def train_child(t, p, m, load_dict=False):
    # model = nn.DataParallel(TestCNN().cuda(1), device_ids=[1, 2, 3])
    raw_model = TestCNN().cuda(0)
    model = TestCNN().cuda(0)
    tf_model = convert_pytorch_model_to_tf(model)
    cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits')
    session = tf.Session()
    x_op = tf.placeholder(tf.float32, shape=(None, 3, 32, 32))
    fgsm = FastGradientMethod(cleverhans_model, sess=session)
    # stm = SpatialTransformationMethod(cleverhans_model, sess=session)
    # cw2 = CarliniWagnerL2(cleverhans_model, sess=session)
    pgd = ProjectedGradientDescent(cleverhans_model, sess=session)
    noise = Noise(cleverhans_model, sess=session)
    mim = MomentumIterativeMethod(cleverhans_model, sess=session)
    df = DeepFool(cleverhans_model, sess=session)
    tf_raw_model = convert_pytorch_model_to_tf(raw_model)
    cleverhans_raw_model = CallableModelWrapper(tf_raw_model,
                                                output_layer='logits')
    # pgd_raw = ProjectedGradientDescent(cleverhans_raw_model, sess=session)
    noise_raw = Noise(cleverhans_raw_model, sess=session)

    def fgsm_op(x, eps):
        att = fgsm.generate(x_op, eps=eps)
        return session.run(att, feed_dict={x_op: x})

    # def stm_op(x, eps):
    #     att = stm.generate(x_op, batch_size=len(x), dx_min=-0.1*eps, dx_max=0.1*eps, dy_min=-0.1*eps, dy_max=0.1*eps, angle_min=-30*eps, angle_max=30*eps)
    #     return session.run(att, feed_dict={x_op: x})
    # def cw2_op(x, eps):
    #     att = cw2.generate(x_op, max_iterations=3)
    def pgd_op(x, eps):
        att = pgd.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3)
        return session.run(att, feed_dict={x_op: x})

    # def pgd_raw_op(x, eps):
    #     att = pgd_raw.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3)
    #     return session.run(att, feed_dict={x_op: x})
    def noise_op(x, eps):
        att = noise.generate(x_op, eps=eps)
        return session.run(att, feed_dict={x_op: x})

    def noise_raw_op(x, eps):
        att = noise_raw.generate(x_op, eps=eps)
        return session.run(att, feed_dict={x_op: x})

    def df_op(x):
        att = df.generate(x_op, nb_candidate=10, max_iter=3)
        return session.run(att, feed_dict={x_op: x})

    def mim_op(x, eps):
        att = mim.generate(x_op, eps=eps, eps_iter=eps * 0.2)
        return session.run(att, feed_dict={x_op: x})

    def attack_train(x):
        attacks = [fgsm_op, pgd_op, mim_op]
        attacks_name = ['FGSM', 'PGD', 'MIM']
        eps = [[0.03, 0.3], [0.03, 0.3], [0.03, 0.3]]
        train_x_adv = x.copy()
        adv_type = np.random.randint(SUBPOLICY_COUNT, size=len(train_x_adv))
        for i, (ti, pi, mi) in enumerate(
                tqdm(zip(t, p, m),
                     total=len(t),
                     desc='Subpolicy: ',
                     leave=False)):
            adv_i = train_x_adv[adv_type == i]
            for j, (tj, pj, mj) in enumerate(
                    tqdm(zip(ti, pi, mi),
                         total=len(ti),
                         desc='Operation: ',
                         leave=False)):
                tj, pj, mj = (*tj, *pj, *mj)
                adv_j = adv_i[np.random.randn(len(adv_i)) < pj]
                for i in tqdm(range(0, len(adv_j), BATCH_SIZE),
                              desc=attacks_name[tj] + ': ',
                              leave=False):
                    adv_j[i:][:BATCH_SIZE] = attacks[tj](
                        adv_j[i:][:BATCH_SIZE], (mj + 1) / MAGN_COUNT *
                        (eps[tj][1] - eps[tj][0]) + eps[tj][0])
        return train_x_adv

    optimizer = optim.SGD(model.parameters(), lr=1e-3)
    raw_optimizer = optim.SGD(raw_model.parameters(), lr=1e-3)
    train_x_adv = attack_train(train_x)
    adv_trainset = torch.utils.data.TensorDataset(
        torch.tensor(train_x_adv, dtype=torch.float),
        torch.tensor(train_y, dtype=torch.long))
    adv_trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=BATCH_SIZE,
                                                  shuffle=True,
                                                  num_workers=4)
    if load_dict:
        model.load_state_dict(torch.load('black_eval_runs/model.pt'))
        optimizer.load_state_dict(torch.load('black_eval_runs/optimizer.pt'))
        raw_model.load_state_dict(torch.load('black_eval_runs/raw_model.pt'))
        raw_optimizer.load_state_dict(
            torch.load('black_eval_runs/raw_optimizer.pt'))
    model.train()
    batch_tqdm = tqdm(adv_trainloader, leave=False)
    for x, y in batch_tqdm:
        optimizer.zero_grad()
        output = model(x.cuda(0))
        loss = criterion(output, y.cuda(0))
        loss.backward()
        optimizer.step()
        acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0)
        batch_tqdm.set_description(f'adv {loss:.3f} {acc:.3f}')
    batch_tqdm = tqdm(trainloader, leave=False)
    raw_model.train()
    for x, y in batch_tqdm:
        raw_optimizer.zero_grad()
        output = raw_model(x.cuda(0))
        loss = criterion(output, y.cuda(0))
        loss.backward()
        raw_optimizer.step()
        acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0)
        batch_tqdm.set_description(f'raw {loss:.3f} {acc:.3f}')
    with torch.no_grad():
        model.eval()
        batch_tqdm = tqdm(valloader, leave=False)
        tot_acc = 0
        for x, y in batch_tqdm:
            output = model(x.cuda(0))
            acc = float(torch.sum(output.cpu().argmax(axis=1) == y))
            tot_acc += acc
        adv_raw_acc = tot_acc / len(val_x)
        val_x_adv = np.zeros_like(val_x)
        for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE),
                      desc='Noise: ',
                      leave=False):
            val_x_adv[i:][:BATCH_SIZE] = noise_op(val_x[i:][:BATCH_SIZE], 0.3)
        adv_valset = torch.utils.data.TensorDataset(
            torch.tensor(val_x_adv, dtype=torch.float),
            torch.tensor(val_y, dtype=torch.long))
        adv_valloader = torch.utils.data.DataLoader(adv_valset,
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=False,
                                                    num_workers=4)
        batch_tqdm = tqdm(adv_valloader, leave=False)
        tot_acc = 0
        for x, y in batch_tqdm:
            output = model(x.cuda(0))
            acc = float(torch.sum(output.cpu().argmax(axis=1) == y))
            tot_acc += acc
        adv_adv_acc = tot_acc / len(val_x)
        raw_model.eval()
        batch_tqdm = tqdm(valloader, leave=False)
        tot_acc = 0
        for x, y in batch_tqdm:
            output = raw_model(x.cuda(0))
            acc = float(torch.sum(output.cpu().argmax(axis=1) == y))
            tot_acc += acc
        raw_raw_acc = tot_acc / len(val_x)
        val_x_adv = np.zeros_like(val_x)
        for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE),
                      desc='Noise: ',
                      leave=False):
            val_x_adv[i:][:BATCH_SIZE] = noise_raw_op(val_x[i:][:BATCH_SIZE],
                                                      0.3)
        adv_valset = torch.utils.data.TensorDataset(
            torch.tensor(val_x_adv, dtype=torch.float),
            torch.tensor(val_y, dtype=torch.long))
        adv_valloader = torch.utils.data.DataLoader(adv_valset,
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=False,
                                                    num_workers=4)
        batch_tqdm = tqdm(adv_valloader, leave=False)
        tot_acc = 0
        for x, y in batch_tqdm:
            output = raw_model(x.cuda(0))
            acc = float(torch.sum(output.cpu().argmax(axis=1) == y))
            tot_acc += acc
        raw_adv_acc = tot_acc / len(val_x)
    with open('black_eval_runs/acc.csv', 'a') as f:
        f.write(f'{adv_raw_acc},{adv_adv_acc},{raw_raw_acc},{raw_adv_acc}\n')
    print(
        f'adv {adv_raw_acc:.3f} -> {adv_adv_acc:.3f} | raw {raw_raw_acc:.3f} -> {raw_adv_acc:.3f}'
    )
    torch.save(model.state_dict(), 'black_eval_runs/model.pt')
    torch.save(optimizer.state_dict(), 'black_eval_runs/optimizer.pt')
    torch.save(raw_model.state_dict(), 'black_eval_runs/raw_model.pt')
    torch.save(raw_optimizer.state_dict(), 'black_eval_runs/raw_optimizer.pt')
Beispiel #7
0
def fixed_max_confidence_recipe(sess,
                                model,
                                x,
                                y,
                                nb_classes,
                                eps,
                                clip_min,
                                clip_max,
                                eps_iter,
                                nb_iter,
                                report_path,
                                batch_size=BATCH_SIZE):
    """A reasonable attack bundling recipe for a max norm threat model and
  a defender that uses confidence thresholding.

  References:
  https://openreview.net/forum?id=H1g0piA9tQ

  This version runs each attack a fixed number of times.
  It is more exhaustive than `single_run_max_confidence_recipe` but because
  it uses a fixed budget rather than running indefinitely it is more
  appropriate for making fair comparisons between two models.

  :param sess: tf.Session
  :param model: cleverhans.model.Model
  :param x: numpy array containing clean example inputs to attack
  :param y: numpy array containing true labels
  :param nb_classes: int, number of classes
  :param eps: float, maximum size of perturbation (measured by max norm)
  :param eps_iter: float, step size for one version of PGD attacks
    (will also run another version with 25X smaller step size)
  :param nb_iter: int, number of iterations for one version of PGD attacks
    (will also run another version with 25X more iterations)
  :param report_path: str, the path that the report will be saved to.
  :batch_size: int, the total number of examples to run simultaneously
  """
    noise_attack = Noise(model, sess)
    pgd_attack = ProjectedGradientDescent(model, sess)
    threat_params = {"eps": eps, "clip_min": clip_min, "clip_max": clip_max}
    noise_attack_config = AttackConfig(noise_attack, threat_params)
    attack_configs = [noise_attack_config]
    pgd_attack_configs = []
    pgd_params = copy.copy(threat_params)
    pgd_params["eps_iter"] = eps_iter
    pgd_params["nb_iter"] = nb_iter
    assert batch_size % num_devices == 0
    dev_batch_size = batch_size // num_devices
    ones = tf.ones(dev_batch_size, tf.int32)
    expensive_pgd = []
    for cls in range(nb_classes):
        cls_params = copy.copy(pgd_params)
        cls_params['y_target'] = tf.to_float(tf.one_hot(
            ones * cls, nb_classes))
        cls_attack_config = AttackConfig(pgd_attack, cls_params,
                                         "pgd_" + str(cls))
        pgd_attack_configs.append(cls_attack_config)
        expensive_params = copy.copy(cls_params)
        expensive_params["eps_iter"] /= 25.
        expensive_params["nb_iter"] *= 25.
        expensive_config = AttackConfig(pgd_attack, expensive_params,
                                        "expensive_pgd_" + str(cls))
        expensive_pgd.append(expensive_config)
    attack_configs = [noise_attack_config] + pgd_attack_configs + expensive_pgd
    new_work_goal = {config: 5 for config in attack_configs}
    pgd_work_goal = {config: 5 for config in pgd_attack_configs}
    # TODO: lower priority: make sure bundler won't waste time running targeted
    # attacks on examples where the target class is the true class
    goals = [
        Misclassify(new_work_goal={noise_attack_config: 50}),
        Misclassify(new_work_goal=pgd_work_goal),
        MaxConfidence(t=0.5, new_work_goal=new_work_goal),
        MaxConfidence(t=0.75, new_work_goal=new_work_goal),
        MaxConfidence(t=0.875, new_work_goal=new_work_goal),
        MaxConfidence(t=0.9375, new_work_goal=new_work_goal),
        MaxConfidence(t=0.96875, new_work_goal=new_work_goal),
        MaxConfidence(t=0.984375, new_work_goal=new_work_goal),
        MaxConfidence(t=1., new_work_goal=new_work_goal)
    ]
    bundle_attacks(sess, model, x, y, attack_configs, goals, report_path)
Beispiel #8
0
def basic_max_confidence_recipe(sess,
                                model,
                                x,
                                y,
                                nb_classes,
                                eps,
                                clip_min,
                                clip_max,
                                eps_iter,
                                nb_iter,
                                report_path,
                                batch_size=BATCH_SIZE,
                                eps_iter_small=None):
    """A reasonable attack bundling recipe for a max norm threat model and
  a defender that uses confidence thresholding.

  References:
  https://openreview.net/forum?id=H1g0piA9tQ

  This version runs indefinitely, updating the report on disk continuously.

  :param sess: tf.Session
  :param model: cleverhans.model.Model
  :param x: numpy array containing clean example inputs to attack
  :param y: numpy array containing true labels
  :param nb_classes: int, number of classes
  :param eps: float, maximum size of perturbation (measured by max norm)
  :param eps_iter: float, step size for one version of PGD attacks
    (will also run another version with eps_iter_small)
  :param nb_iter: int, number of iterations for one version of PGD attacks
    (will also run another version with 25X more iterations)
  :param report_path: str, the path that the report will be saved to.
  :batch_size: int, the total number of examples to run simultaneously
  :param eps_iter_small: optional, float.
    The second version of the PGD attack is run with 25 * nb_iter iterations
    and eps_iter_small step size. If eps_iter_small is not specified it is
    set to eps_iter / 25.
  """
    noise_attack = Noise(model, sess)
    pgd_attack = ProjectedGradientDescent(model, sess)
    threat_params = {"eps": eps, "clip_min": clip_min, "clip_max": clip_max}
    noise_attack_config = AttackConfig(noise_attack, threat_params)
    attack_configs = [noise_attack_config]
    pgd_attack_configs = []
    pgd_params = copy.copy(threat_params)
    pgd_params["eps_iter"] = eps_iter
    pgd_params["nb_iter"] = nb_iter
    assert batch_size % num_devices == 0
    dev_batch_size = batch_size // num_devices
    ones = tf.ones(dev_batch_size, tf.int32)
    expensive_pgd = []
    if eps_iter_small is None:
        eps_iter_small = eps_iter / 25.
    for cls in range(nb_classes):
        cls_params = copy.copy(pgd_params)
        cls_params['y_target'] = tf.to_float(tf.one_hot(
            ones * cls, nb_classes))
        cls_attack_config = AttackConfig(pgd_attack, cls_params,
                                         "pgd_" + str(cls))
        pgd_attack_configs.append(cls_attack_config)
        expensive_params = copy.copy(cls_params)
        expensive_params["eps_iter"] = eps_iter_small
        expensive_params["nb_iter"] *= 25.
        expensive_config = AttackConfig(pgd_attack, expensive_params,
                                        "expensive_pgd_" + str(cls))
        expensive_pgd.append(expensive_config)
    attack_configs = [noise_attack_config] + pgd_attack_configs + expensive_pgd
    new_work_goal = {config: 5 for config in attack_configs}
    pgd_work_goal = {config: 5 for config in pgd_attack_configs}
    goals = [
        Misclassify(new_work_goal={noise_attack_config: 50}),
        Misclassify(new_work_goal=pgd_work_goal),
        MaxConfidence(t=0.5, new_work_goal=new_work_goal),
        MaxConfidence(t=0.75, new_work_goal=new_work_goal),
        MaxConfidence(t=0.875, new_work_goal=new_work_goal),
        MaxConfidence(t=0.9375, new_work_goal=new_work_goal),
        MaxConfidence(t=0.96875, new_work_goal=new_work_goal),
        MaxConfidence(t=0.984375, new_work_goal=new_work_goal),
        MaxConfidence(t=1.)
    ]
    bundle_attacks(sess, model, x, y, attack_configs, goals, report_path)