コード例 #1
0
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes,
              nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, model_type):
    """
    This function creates the substitute by alternatively
    augmenting the training data and training the substitute.
    :param sess: TF session
    :param x: input TF placeholder
    :param y: output TF placeholder
    :param bbox_preds: output of black-box model predictions
    :param X_sub: initial substitute training data
    :param Y_sub: initial substitute training labels
    :param nb_classes: number of output classes
    :param nb_epochs_s: number of epochs to train substitute model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param data_aug: number of times substitute training data is augmented
    :param lmbda: lambda from arxiv.org/abs/1602.02697
    :return:
    """
    # Define TF model graph (for the black-box model)
    # model_sub = model_mnist(type=model_type)
    model_sub = substitute_model()
    preds_sub = model_sub(x)
    print("Defined TensorFlow model graph for the substitute.")

    # Define the Jacobian symbolically using TensorFlow
    grads = jacobian_graph(preds_sub, x, nb_classes)

    # Train the substitute and augment dataset alternatively
    for rho in xrange(data_aug):
        print("Substitute training epoch #" + str(rho))
        train_params = {
            'nb_epochs': nb_epochs_s,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub),
                    init_all=False, verbose=False, args=train_params)

        # If we are not at last substitute training iteration, augment dataset
        if rho < data_aug - 1:
            print("Augmenting substitute training data.")
            # Perform the Jacobian augmentation
            X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda)

            print("Labeling substitute training data.")
            # Label the newly generated synthetic points using the black-box
            Y_sub = np.hstack([Y_sub, Y_sub])
            X_sub_prev = X_sub[int(len(X_sub)/2):]
            bbox_val = batch_eval([x], [bbox_preds], [X_sub_prev])[0]
            # Note here that we take the argmax because the adversary
            # only has access to the label (not the probabilities) output
            # by the black-box model
            Y_sub[int(len(X_sub)/2):] = np.argmax(bbox_val, axis=1)

    return model_sub, preds_sub
コード例 #2
0
def main(attack, src_model_name, target_model_name):
    np.random.seed(0)
    tf.set_random_seed(0)

    dim = 28 * 28 * 1

    x = K.placeholder((None,
                       28,
                       28,
                       1))

    y = K.placeholder((None, 10))

    _, _, X_test, Y_test = data_mnist()
    Y_test_uncat = np.argmax(Y_test, axis=1)

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_model = load_model(target_model_name)

    # simply compute test error
    if attack == "test":
        _, _, err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print('{}: {:.1f}'.format(basename(src_model_name), err))
        _, _, err = tf_test_error_rate(target_model, x, X_test, Y_test)
        print('{}: {:.1f}'.format(basename(target_model_name), err))

        return

    if args.targeted_flag == 1:
        targets = []
        allowed_targets = list(range(10))
        for i in range(len(Y_test)):
            allowed_targets.remove(Y_test_uncat[i])
            targets.append(np.random.choice(allowed_targets))
            allowed_targets = list(range(10))
        targets = np.array(targets)
        print(targets)
        targets_cat = np_utils.to_categorical(targets, 10).astype(np.float32)
        Y_test = targets_cat

    logits = src_model(x)
    print('logits', logits)

    if args.loss_type == 'xent':
        loss, grad = gen_grad_ens(x, logits, y)
        assert grad is not None
    elif args.loss_type == 'cw':
        grad = gen_grad_cw(x, logits, y)
    if args.targeted_flag == 1:
        grad = -1.0 * grad

    for eps in eps_list:
        # FGSM and RAND+FGSM one-shot attack
        if attack in ["fgs", "rand_fgs"] and args.norm == 'linf':
            assert grad is not None
            adv_x = symbolic_fgs(x, grad, eps=eps)
        elif attack in ["fgs", "rand_fgs"] and args.norm == 'l2':
            adv_x = symbolic_fg(x, grad, eps=eps)

        # iterative FGSM
        if attack == "ifgs":
            l = 1000
            X_test = X_test[0:l]
            Y_test = Y_test[0:l]

            adv_x = x
            # iteratively apply the FGSM with small step size
            for i in range(args.num_iter):
                adv_logits = src_model(adv_x)

                if args.loss_type == 'xent':
                    loss, grad = gen_grad_ens(adv_x, adv_logits, y)
                elif args.loss_type == 'cw':
                    grad = gen_grad_cw(adv_x, adv_logits, y)
                if args.targeted_flag == 1:
                    grad = -1.0 * grad

                adv_x = symbolic_fgs(adv_x, grad, args.delta, True)
                r = adv_x - x
                r = K.clip(r, -eps, eps)
                adv_x = x + r

            adv_x = K.clip(adv_x, 0, 1)

        print('Generating adversarial samples')
        X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

        avg_l2_perturb = np.mean(np.linalg.norm((X_adv - X_test).reshape(len(X_test), dim), axis=1))

        # white-box attack
        l = len(X_adv)
        print('Carrying out white-box attack')
        preds_adv, orig, err = tf_test_error_rate(src_model, x, X_adv, Y_test[0:l])
        if args.targeted_flag == 1:
            err = 100.0 - err
        print('{}->{}: {:.1f}'.format(src_model_name, src_model_name, err))

        # black-box attack
        if target_model_name is not None:
            print('Carrying out black-box attack')
            preds, _, err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            if args.targeted_flag == 1:
                err = 100.0 - err
            print('{}->{}: {:.1f}, {}, {} {}'.format(src_model_name,
                                                     basename(target_model_name), err,
                                                     avg_l2_perturb, eps, attack))
コード例 #3
0
def main(attack, src_model_name, target_model_names, data_train_dir,
         data_test_dir):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 32, 'Size of batches')
    set_gtsrb_flags()

    # Get MNIST test data
    _, _, X_test, Y_test = load_data(data_train_dir, data_test_dir)

    # One-hot encode image labels
    label_binarizer = LabelBinarizer()
    Y_test = label_binarizer.fit_transform(Y_test)

    x = tf.placeholder(tf.float32, (None, 32, 32, 1))

    y = tf.placeholder(tf.int32, (None))

    one_hot_y = tf.one_hot(y, 43)

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.3f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.3f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:1000]
        Y_test = Y_test[0:1000]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)

        return

    if attack == "grad_ens":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2)
        sub_models = [None] * len(sub_model_ens)
        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        adv_x = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models + [src_model]):
                logits = m(adv_x)
                gradient = gen_grad(adv_x, logits, y)
                adv_x = symbolic_fgs(adv_x,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name),
                                      err)
コード例 #4
0
def main(attack, src_model_name, target_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 10, 'Size of batches')
    set_mnist_flags()

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = data_mnist()

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.1f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.1f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:1000]
        Y_test = Y_test[0:1000]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                          basename(name), err)

        return

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(name),
                                      err)
コード例 #5
0
ファイル: simple_eval.py プロジェクト: cxmscb/ATDA_scb
def main(attack, src_model_name, target_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)

    set_flags(20)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    K.set_session(tf.Session(config=config))

    x = K.placeholder(
        (None, FLAGS.NUM_CHANNELS, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = load_data()

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print('{}: {:.1f}'.format(basename(src_model_name), 100 - err))

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print('{}: {:.1f}'.format(basename(name), 100 - err))
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rfgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rfgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "pgd":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps,
                         alpha=args.eps / 10.0)

    if attack == 'mim':
        adv_x = momentum_fgs(src_model, x, y, eps=args.eps)

    print('start')
    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
    print('-----done----')
    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print('{}->{}: {:.1f}'.format(basename(src_model_name),
                                  basename(src_model_name), 100 - err))

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print('{}->{}: {:.1f}'.format(basename(src_model_name), basename(name),
                                      100 - err))
コード例 #6
0
def main(attack, src_model_names, target_model_name):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 1, 'Size of batches')
    set_mnist_flags()

    dim = FLAGS.IMAGE_ROWS * FLAGS.IMAGE_COLS * FLAGS.NUM_CHANNELS

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = data_mnist()
    Y_test_uncat = np.argmax(Y_test, axis=1)

    # source model for crafting adversarial examples
    src_models = [None] * len(src_model_names)
    for i in range(len(src_model_names)):
        src_models[i] = load_model(src_model_names[i])

    src_model_name_joint = ''
    for i in range(len(src_models)):
        src_model_name_joint += basename(src_model_names[i])

    # model(s) to target
    if target_model_name is not None:
        target_model = load_model(target_model_name)

    # simply compute test error
    if attack == "test":
        for (name, src_model) in zip(src_model_names, src_models):
            _, _, err = tf_test_error_rate(src_model, x, X_test, Y_test)
            print '{}: {:.1f}'.format(basename(name), err)
        if target_model_name is not None:
            _, _, err = tf_test_error_rate(target_model, x, X_test, Y_test)
        print '{}: {:.1f}'.format(basename(target_model_name), err)

        return

    if args.targeted_flag == 1:
        pickle_name = attack + '_' + src_model_name_joint + '_' + '_' + args.loss_type + '_targets.p'
        if os.path.exists(pickle_name):
            targets = pickle.load(open(pickle_name, 'rb'))
        else:
            targets = []
            allowed_targets = list(range(FLAGS.NUM_CLASSES))
            for i in range(len(Y_test)):
                allowed_targets.remove(Y_test_uncat[i])
                targets.append(np.random.choice(allowed_targets))
                allowed_targets = list(range(FLAGS.NUM_CLASSES))
            # targets = np.random.randint(10, size = BATCH_SIZE*BATCH_EVAL_NUM)
            targets = np.array(targets)
            print targets
            targets_cat = np_utils.to_categorical(
                targets, FLAGS.NUM_CLASSES).astype(np.float32)
            Y_test = targets_cat
            if SAVE_FLAG == True:
                pickle.dump(Y_test, open(pickle_name, 'wb'))

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = [None] * len(src_model_names)
    for i in range(len(src_model_names)):
        curr_model = src_models[i]
        logits[i] = curr_model(x)

    if args.loss_type == 'xent':
        loss, grad = gen_grad_ens(x, logits, y)
    elif args.loss_type == 'cw':
        grad = gen_grad_cw(x, logits, y)
    if args.targeted_flag == 1:
        grad = -1.0 * grad

    for eps in eps_list:
        # FGSM and RAND+FGSM one-shot attack
        if attack in ["fgs", "rand_fgs"] and args.norm == 'linf':
            adv_x = symbolic_fgs(x, grad, eps=eps)
        elif attack in ["fgs", "rand_fgs"] and args.norm == 'l2':
            adv_x = symbolic_fg(x, grad, eps=eps)

        # iterative FGSM
        if attack == "ifgs":
            l = 1000
            X_test = X_test[0:l]
            Y_test = Y_test[0:l]

            adv_x = x
            # iteratively apply the FGSM with small step size
            for i in range(args.num_iter):
                adv_logits = [None] * len(src_model_names)
                for i in range(len(src_model_names)):
                    curr_model = src_models[i]
                    adv_logits[i] = curr_model(adv_x)

                if args.loss_type == 'xent':
                    loss, grad = gen_grad_ens(adv_x, adv_logits, y)
                elif args.loss_type == 'cw':
                    grad = gen_grad_cw(adv_x, adv_logits, y)
                if args.targeted_flag == 1:
                    grad = -1.0 * grad

                adv_x = symbolic_fgs(adv_x, grad, args.delta, True)
                r = adv_x - x
                r = K.clip(r, -eps, eps)
                adv_x = x + r

            adv_x = K.clip(adv_x, 0, 1)

        if attack == "CW_ens":
            l = 1000
            pickle_name = attack + '_' + src_model_name_joint + '_' + str(
                args.eps) + '_adv.p'
            print(pickle_name)
            Y_test = Y_test[0:l]
            if os.path.exists(pickle_name) and attack == "CW_ens":
                print 'Loading adversarial samples'
                X_adv = pickle.load(open(pickle_name, 'rb'))

                for (name, src_model) in zip(src_model_names, src_models):
                    preds_adv, _, err = tf_test_error_rate(
                        src_model, x, X_adv, Y_test)
                    print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                                  basename(name), err)

                preds_adv, _, err = tf_test_error_rate(target_model, x, X_adv,
                                                       Y_test)
                print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                              basename(target_model_name), err)

                return

            X_test = X_test[0:l]
            time1 = time()
            cli = CarliniLiEns(K.get_session(),
                               src_models,
                               targeted=False,
                               confidence=args.kappa,
                               eps=eps)

            X_adv = cli.attack(X_test, Y_test)

            r = np.clip(X_adv - X_test, -eps, eps)
            X_adv = X_test + r
            time2 = time()
            print("Run with Adam took {}s".format(time2 - time1))

            if SAVE_FLAG == True:
                pickle.dump(X_adv, open(pickle_name, 'wb'))

            for (name, src_model) in zip(src_model_names, src_models):
                print('Carrying out white-box attack')
                pres, _, err = tf_test_error_rate(src_model, x, X_adv, Y_test)
                print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                              basename(name), err)
            if target_model_name is not None:
                print('Carrying out black-box attack')
                preds, orig, err = tf_test_error_rate(target_model, x, X_adv,
                                                      Y_test)
                print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                              basename(target_model_name), err)

            return

        pickle_name = attack + '_' + src_model_name_joint + '_' + args.loss_type + '_' + str(
            eps) + '_adv.p'
        if args.targeted_flag == 1:
            pickle_name = attack + '_' + src_model_name_joint + '_' + args.loss_type + '_' + str(
                eps) + '_adv_t.p'

        if os.path.exists(pickle_name):
            print 'Loading adversarial samples'
            X_adv = pickle.load(open(pickle_name, 'rb'))
        else:
            print 'Generating adversarial samples'
            X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
            if SAVE_FLAG == True:
                pickle.dump(X_adv, open(pickle_name, 'wb'))

        avg_l2_perturb = np.mean(
            np.linalg.norm((X_adv - X_test).reshape(len(X_test), dim), axis=1))

        # white-box attack
        l = len(X_adv)
        print('Carrying out white-box attack')
        for (name, src_model) in zip(src_model_names, src_models):
            preds_adv, orig, err = tf_test_error_rate(src_model, x, X_adv,
                                                      Y_test[0:l])
            if args.targeted_flag == 1:
                err = 100.0 - err
            print '{}->{}: {:.1f}'.format(basename(name), basename(name), err)

        # black-box attack
        if target_model_name is not None:
            print('Carrying out black-box attack')
            preds, _, err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            if args.targeted_flag == 1:
                err = 100.0 - err
            print '{}->{}: {:.1f}, {}, {} {}'.format(
                src_model_name_joint, basename(target_model_name), err,
                avg_l2_perturb, eps, attack)
コード例 #7
0
def main(attack, src_model_name, target_model_names, data_train_dir,
         data_test_dir):
    np.random.seed(0)
    tf.set_random_seed(0)
    set_gtsrb_flags()

    # Get GTSRB test data
    _, _, _, _, X_test, Y_test = load_data(data_train_dir, data_test_dir)

    # display_leg_sample(X_test)

    # One-hot encode image labels
    label_binarizer = LabelBinarizer()
    Y_test = label_binarizer.fit_transform(Y_test)

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    # one_hot_y = tf.one_hot(y, 43)

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.3f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.3f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:200]
        Y_test = Y_test[0:200]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r
        np.save('Train_Carlini_200.npy', X_adv)
        np.save('Label_Carlini_200.npy', Y_test)

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)
        display_leg_adv_sample(X_test, X_adv)
        return

    if attack == "cascade_ensemble":
        # X_test = np.clip(
        #     X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)),
        #     0.0, 1.0)
        # eps -= args.alpha

        sub_model_ens = (sub_model_2, sub_model_3)
        sub_models = [None] * len(sub_model_ens)
        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        adv_x = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models + [src_model]):
                logits = m(adv_x)
                gradient = gen_grad(adv_x, logits, y)
                adv_x = symbolic_fgs(adv_x,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)

    if attack == "Iter_Casc":
        # X_test = np.clip(
        #     X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)),
        #     0.0, 1.0)
        # args.eps = args.eps - args.alpha

        sub_model_ens = (sub_model_1, sub_model_2, sub_model_3)
        sub_models = [None] * len(sub_model_ens)
        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        x_advs = [None] * len(sub_models)
        errs = [None] * len(sub_models)
        adv_x = x
        eps_all = []

        for i in range(args.steps):
            if i == 0:
                eps_all[0] = (1.0 / len(sub_models)) * args.eps
            else:
                for j in range(i):
                    pre_sum = 0.0
                    pre_sum += eps_all[j]
                    eps_all[i] = (args.eps - pre_sum) * (1.0 / len(sub_models))

        # for i in range(args.steps):
        #     if i == 0:
        #         eps_0 = (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_0)
        #     elif i == 1:
        #         eps_1 = (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_1)
        #     elif i == 2:
        #         eps_2 = (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_2)
        #     elif i == 3:
        #         eps_3 = (1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                 1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_3)
        #     elif i == 4:
        #         eps_4 = (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                          1.0 / len(sub_models))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_4)
        #     elif i == 5:
        #         eps_5 = (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                               1.0 / len(sub_models)))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_5)
        #     elif i == 6:
        #         eps_6 = (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                    1.0 / len(sub_models))))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_6)
        #
        #     elif i == 7:
        #         eps_7 = (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                         1.0 / len(sub_models)))))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_7)
        #     elif i == 8:
        #         eps_8 = (1 - (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                              1.0 / len(sub_models))))))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_8)
        #     elif i == 9:
        #         eps_9 = (1 - (1 - (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                                   1.0 / len(sub_models)))))))) * (
        #                         1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_9)
        #     elif i == 10:
        #         eps_10 = (1 - (1 - (1 - (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                                         1.0 / len(sub_models))))))))) * (
        #                          1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_10)
        #     elif i == 11:
        #         eps_11 = (1 - (1 - (1 - (1 - (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                                              1.0 / len(sub_models)))))))))) * (
        #                          1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_11)

        for j in range(args.steps):
            print('iterative step is :', j)
            if j == 0:
                for i, m in enumerate(sub_models):
                    logits = m(adv_x)
                    gradient = gen_grad(adv_x, logits, y)
                    adv_x_ = symbolic_fgs(adv_x,
                                          gradient,
                                          eps=eps_all[j],
                                          clipping=True)
                    x_advs[i] = adv_x_

                    X_adv = batch_eval([x, y], [adv_x_], [X_test, Y_test])[0]

                    err = tf_test_error_rate(m, x, X_adv, Y_test)
                    errs[i] = err
                adv_x = x_advs[errs.index(min(errs))]
            else:
                t = errs.index(min(errs))
                print('index of min value of errs:', t)
                logits = sub_models[t](adv_x)
                gradient = gen_grad(adv_x, logits, y)
                adv_x = symbolic_fgs(adv_x,
                                     gradient,
                                     eps=eps_all[j],
                                     clipping=True)

                for i, m in enumerate(sub_models):
                    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
                    err = tf_test_error_rate(m, x, X_adv, Y_test)
                    errs[i] = err
            print('error rate of each substitute models_oldest: ', errs)
            print('\t')
            if min(errs) >= 99:
                success_rate = sum(errs) / len(sub_models)
                print('success rate is: {:.3f}'.format(success_rate))
                break

        success_rate = sum(errs) / len(sub_models)
        print('success rate is: {:.3f}'.format(success_rate))

        X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
        np.save('results/iter_casc_0.2_leg_adv/X_adv_Iter_Casc_0.2.npy', X_adv)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)

        save_leg_adv_sample('results/iter_casc_0.2_leg_adv/', X_test, X_adv)

        # save adversarial example specified by user
        save_leg_adv_specified_by_user(
            'results/iter_casc_0.2_leg_adv_label_4/', X_test, X_adv, Y_test)
        return

    if attack == "stack_paral":
        # X_test = np.clip(
        #     X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)),
        #     0.0, 1.0)
        # eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2, sub_model_3)
        sub_models = [None] * len(sub_model_ens)

        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        errs = [None] * (len(sub_models) + 1)
        x_advs = [None] * len(sub_models)
        # print x_advs

        for i, m in enumerate(sub_models):
            # x = x + args.alpha * np.sign(np.random.randn(*x[0].shape))
            logits = m(x)
            gradient = gen_grad(x, logits, y)
            adv_x = symbolic_fgs(x, gradient, eps=args.eps / 2, clipping=True)
            x_advs[i] = adv_x

        # print x_advs
        adv_x_sum = x_advs[0]
        for i in range(len(sub_models)):
            if i == 0: continue
            adv_x_sum = adv_x_sum + x_advs[i]
        adv_x_mean = adv_x_sum / (len(sub_models))
        preds = src_model(adv_x_mean)
        grads = gen_grad(adv_x_mean, preds, y)
        adv_x = symbolic_fgs(adv_x_mean, grads, eps=args.eps, clipping=True)

        # compute the adversarial examples and evaluate
        for i, m in enumerate(sub_models + [src_model]):
            X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
            err = tf_test_error_rate(m, x, X_adv, Y_test)
            errs[i] = err

        # compute success rate
        success_rate = sum(errs) / (len(sub_models) + 1)
        print('success rate is: {:.3f}'.format(success_rate))

        # compute transfer rate
        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)

        # save adversarial examples
        np.save('results/stack_paral_0.2_leg_adv/X_adv_stack_paral_0.2.npy',
                X_adv)
        # save_leg_adv_sample(X_test, X_adv)
        save_leg_adv_sample('results/stack_paral_0.2_leg_adv/', X_test, X_adv)

        # save adversarial example specified by user
        save_leg_adv_specified_by_user(
            'results/stack_paral_0.2_leg_adv_label_4/', X_test, X_adv, Y_test)

        return

    if attack == "cascade_ensemble_2":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2)
        sub_models = [None] * len(sub_model_ens)

        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        x_advs = [([None] * len(sub_models)) for i in range(args.steps)]
        # print x_advs

        x_adv = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models):
                logits = m(x_adv)
                gradient = gen_grad(x_adv, logits, y)
                x_adv = symbolic_fgs(x_adv,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)
                x_advs[j][i] = x_adv

        # print x_advs
        adv_x_sum = x_advs[0][0]
        for j in range(args.steps):
            for i in range(len(sub_models)):
                if j == 0 and i == 0: continue
                adv_x_sum = adv_x_sum + x_advs[j][i]
        adv_x_mean = adv_x_sum / (args.steps * len(sub_models))
        preds = src_model(adv_x_mean)
        grads = gen_grad(adv_x_mean, preds, y)
        adv_x = symbolic_fgs(adv_x_mean,
                             grads,
                             eps=args.eps / args.steps,
                             clipping=True)

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name),
                                      err)
コード例 #8
0
def main(attack, src_model_name, target_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 32, 'Size of batches')
    set_mnist_flags()

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = data_mnist()

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.3f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.3f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:1000]
        Y_test = Y_test[0:1000]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)

        return

    if attack == "cascade_ensemble":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2, sub_model_3, sub_model_4,
                         sub_model_5, sub_model_6, sub_model_7)
        sub_models = [None] * len(sub_model_ens)
        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        adv_x = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models + [src_model]):
                logits = m(adv_x)
                gradient = gen_grad(adv_x, logits, y)
                adv_x = symbolic_fgs(adv_x,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)

    if attack == "parallel_ensemble":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2, sub_model_3)
        sub_models = [None] * len(sub_model_ens)

        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        x_advs = [([None] * len(sub_models)) for i in range(args.steps)]
        print x_advs

        x_adv = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models):
                logits = m(x_adv)
                gradient = gen_grad(x_adv, logits, y)
                x_adv = symbolic_fgs(x_adv,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)
                x_advs[j][i] = x_adv

        print x_advs
        adv_x_mean = x_advs[0][0]
        for j in range(args.steps):
            for i in range(len(sub_models)):
                if j == 0 and i == 0: continue
                adv_x_mean = adv_x_mean + x_advs[j][i]
        xadv = adv_x_mean / (args.steps * len(sub_models))
        preds = src_model(xadv)
        grads = gen_grad(xadv, preds, y)
        adv_x = symbolic_fgs(xadv, grads, eps=args.eps, clipping=True)

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name),
                                      err)