Example #1
0
def main(model_name, adv_model_names, model_type):
    np.random.seed(0)
    assert keras.backend.backend() == "tensorflow"

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist()

    data_gen = data_gen_mnist(X_train)

    x = K.placeholder(shape=(None, 28, 28, 1))

    y = K.placeholder(shape=(BATCH_SIZE, 10))

    eps = args.eps
    norm = args.norm

    # if src_models is not None, we train on adversarial examples that come
    # from multiple models
    adv_models = [None] * len(adv_model_names)
    ens_str = ''
    for i in range(len(adv_model_names)):
        adv_models[i] = load_model(adv_model_names[i])
        if len(adv_models) > 0:
            name = basename(adv_model_names[i])
            model_index = name.replace('model', '')
            ens_str += model_index
    model = model_mnist(type=model_type)

    x_advs = [None] * (len(adv_models) + 1)

    for i, m in enumerate(adv_models + [model]):
        if args.iter == 0:
            logits = m(x)
            grad = gen_grad(x, logits, y, loss='training')
            x_advs[i] = symbolic_fgs(x, grad, eps=eps)
        elif args.iter == 1:
            x_advs[i] = iter_fgs(m, x, y, steps=40, alpha=0.01, eps=args.eps)

    # Train an MNIST model
    tf_train(x,
             y,
             model,
             X_train,
             Y_train,
             data_gen,
             x_advs=x_advs,
             benign=args.ben)

    # Finally print the result!
    test_error = tf_test_error_rate(model, x, X_test, Y_test)
    print('Test error: %.1f%%' % test_error)
    model_name += '_' + str(eps) + '_' + str(norm) + '_' + ens_str
    if args.iter == 1:
        model_name += 'iter'
    if args.ben == 0:
        model_name += '_nob'
    save_model(model, model_name)
    json_string = model.to_json()
    with open(model_name + '.json', 'wr') as f:
        f.write(json_string)
Example #2
0
def main(model_name, adv_model_names, model_type):
    np.random.seed(0)
    assert keras.backend.backend() == "tensorflow"
    set_flags(24)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    K.set_session(tf.Session(config=config))

    flags.DEFINE_integer('NUM_EPOCHS', args.epochs, 'Number of epochs')
    flags.DEFINE_integer('type', args.type, 'model type')

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = load_data()

    data_gen = data_flow(X_train)

    x = K.placeholder(shape=(None,
                             FLAGS.NUM_CHANNELS,
                             FLAGS.IMAGE_ROWS,
                             FLAGS.IMAGE_COLS))

    y = K.placeholder(shape=(FLAGS.BATCH_SIZE, FLAGS.NUM_CLASSES))

    eps = args.eps

    # if src_models is not None, we train on adversarial examples that come
    # from multiple models
    adv_models = [None] * len(adv_model_names)
    for i in range(len(adv_model_names)):
        adv_models[i] = load_model(adv_model_names[i])

    model = model_select(type=model_type)

    x_advs = [None] * (len(adv_models) + 1)
    for i, m in enumerate(adv_models + [model]):
        logits = m(x)
        grad = gen_grad(x, logits, y, loss='training')
        x_advs[i] = symbolic_fgs(x, grad, eps=eps)

    # Train
    tf_train(x, y, model, X_train, Y_train, data_gen, model_name, x_advs=x_advs, epochs=args.epochs)

    # Finally print the result!
    test_error = tf_test_error_rate(model, x, X_test, Y_test)
    with open(model_name + '_log.txt', 'a') as log:
        log.write('Test error: %.1f%%' % test_error)
    print('Test error: %.1f%%' % test_error)
    save_model(model, model_name)
    json_string = model.to_json()
    with open(model_name+'.json', 'w') as f:
        f.write(json_string)
def main(model_name, adv_model_names, model_type):
    np.random.seed(0)
    assert keras.backend.backend() == "tensorflow"
    set_mnist_flags()

    flags.DEFINE_bool('NUM_EPOCHS', args.epochs, 'Number of epochs')

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist()

    data_gen = data_gen_mnist(X_train)

    x = K.placeholder(shape=(None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS,
                             FLAGS.NUM_CHANNELS))

    y = K.placeholder(shape=(FLAGS.BATCH_SIZE, FLAGS.NUM_CLASSES))

    eps = args.eps

    # if src_models is not None, we train on adversarial examples that come
    # from multiple models
    adv_models = [None] * len(adv_model_names)
    for i in range(len(adv_model_names)):
        adv_models[i] = load_model(adv_model_names[i])

    model = model_mnist(type=model_type)

    x_advs = [None] * (len(adv_models) + 1)

    for i, m in enumerate(adv_models + [model]):
        logits = m(x)
        grad = gen_grad(x, logits, y, loss='training')
        x_advs[i] = symbolic_fgs(x, grad, eps=eps)

    # Train an MNIST model
    tf_train(x, y, model, X_train, Y_train, data_gen, x_advs=x_advs)

    # Finally print the result!
    test_error = tf_test_error_rate(model, x, X_test, Y_test)
    print('Test error: %.1f%%' % test_error)
    save_model(model, model_name)
    json_string = model.to_json()
    with open(model_name + '.json', 'wr') as f:
        f.write(json_string)
def main(adv_model_names):
    np.random.seed(0)
    assert keras.backend.backend() == "tensorflow"
    set_model_flags()

    tf.reset_default_graph()
    g = tf.get_default_graph()

    x = tf.placeholder(
        tf.float32,
        shape=[None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS])
    y = tf.placeholder(tf.float32, shape=[None, FLAGS.NUM_CLASSES])

    train_mode = tf.placeholder(tf.bool)
    eps = FLAGS.EPS

    # if src_models is not None, we train on adversarial examples that come
    # from multiple models
    # load from out source
    adv_models = [None] * len(adv_model_names)
    for i, name in enumerate(adv_model_names):
        adv_models[i] = load_model(name, path="./models/" + name + "-save.npy")
    x_advs = [None] * (len(adv_models))

    for i, m in enumerate(adv_models):
        logits, _ = m(x)
        grad = gen_grad(x, logits, y, loss='training')
        x_advs[i] = symbolic_fgs(x, grad, eps=eps)

    data = dataset(FLAGS.DIR, normalize=False)
    sess, graph_dict = tf_train(g,
                                x,
                                y,
                                data,
                                defense_model,
                                train_mode,
                                x_advs=x_advs)

    # Finally print the result!
    test_error = tf_test_error_rate(sess, graph_dict, data, x_advs)
    print('Test error: %.1f%%' % test_error)
def main(attack, src_model_name, target_model_name):
    np.random.seed(0)
    tf.set_random_seed(0)

    dim = 28 * 28 * 1

    x = K.placeholder((None,
                       28,
                       28,
                       1))

    y = K.placeholder((None, 10))

    _, _, X_test, Y_test = data_mnist()
    Y_test_uncat = np.argmax(Y_test, axis=1)

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_model = load_model(target_model_name)

    # simply compute test error
    if attack == "test":
        _, _, err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print('{}: {:.1f}'.format(basename(src_model_name), err))
        _, _, err = tf_test_error_rate(target_model, x, X_test, Y_test)
        print('{}: {:.1f}'.format(basename(target_model_name), err))

        return

    if args.targeted_flag == 1:
        targets = []
        allowed_targets = list(range(10))
        for i in range(len(Y_test)):
            allowed_targets.remove(Y_test_uncat[i])
            targets.append(np.random.choice(allowed_targets))
            allowed_targets = list(range(10))
        targets = np.array(targets)
        print(targets)
        targets_cat = np_utils.to_categorical(targets, 10).astype(np.float32)
        Y_test = targets_cat

    logits = src_model(x)
    print('logits', logits)

    if args.loss_type == 'xent':
        loss, grad = gen_grad_ens(x, logits, y)
        assert grad is not None
    elif args.loss_type == 'cw':
        grad = gen_grad_cw(x, logits, y)
    if args.targeted_flag == 1:
        grad = -1.0 * grad

    for eps in eps_list:
        # FGSM and RAND+FGSM one-shot attack
        if attack in ["fgs", "rand_fgs"] and args.norm == 'linf':
            assert grad is not None
            adv_x = symbolic_fgs(x, grad, eps=eps)
        elif attack in ["fgs", "rand_fgs"] and args.norm == 'l2':
            adv_x = symbolic_fg(x, grad, eps=eps)

        # iterative FGSM
        if attack == "ifgs":
            l = 1000
            X_test = X_test[0:l]
            Y_test = Y_test[0:l]

            adv_x = x
            # iteratively apply the FGSM with small step size
            for i in range(args.num_iter):
                adv_logits = src_model(adv_x)

                if args.loss_type == 'xent':
                    loss, grad = gen_grad_ens(adv_x, adv_logits, y)
                elif args.loss_type == 'cw':
                    grad = gen_grad_cw(adv_x, adv_logits, y)
                if args.targeted_flag == 1:
                    grad = -1.0 * grad

                adv_x = symbolic_fgs(adv_x, grad, args.delta, True)
                r = adv_x - x
                r = K.clip(r, -eps, eps)
                adv_x = x + r

            adv_x = K.clip(adv_x, 0, 1)

        print('Generating adversarial samples')
        X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

        avg_l2_perturb = np.mean(np.linalg.norm((X_adv - X_test).reshape(len(X_test), dim), axis=1))

        # white-box attack
        l = len(X_adv)
        print('Carrying out white-box attack')
        preds_adv, orig, err = tf_test_error_rate(src_model, x, X_adv, Y_test[0:l])
        if args.targeted_flag == 1:
            err = 100.0 - err
        print('{}->{}: {:.1f}'.format(src_model_name, src_model_name, err))

        # black-box attack
        if target_model_name is not None:
            print('Carrying out black-box attack')
            preds, _, err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            if args.targeted_flag == 1:
                err = 100.0 - err
            print('{}->{}: {:.1f}, {}, {} {}'.format(src_model_name,
                                                     basename(target_model_name), err,
                                                     avg_l2_perturb, eps, attack))
Example #6
0
def main(attack, src_model_name, target_model_names, data_train_dir,
         data_test_dir):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 32, 'Size of batches')
    set_gtsrb_flags()

    # Get MNIST test data
    _, _, X_test, Y_test = load_data(data_train_dir, data_test_dir)

    # One-hot encode image labels
    label_binarizer = LabelBinarizer()
    Y_test = label_binarizer.fit_transform(Y_test)

    x = tf.placeholder(tf.float32, (None, 32, 32, 1))

    y = tf.placeholder(tf.int32, (None))

    one_hot_y = tf.one_hot(y, 43)

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.3f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.3f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:1000]
        Y_test = Y_test[0:1000]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)

        return

    if attack == "grad_ens":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2)
        sub_models = [None] * len(sub_model_ens)
        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        adv_x = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models + [src_model]):
                logits = m(adv_x)
                gradient = gen_grad(adv_x, logits, y)
                adv_x = symbolic_fgs(adv_x,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name),
                                      err)
Example #7
0
def main(attack, src_model_name, target_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 10, 'Size of batches')
    set_mnist_flags()

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = data_mnist()

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.1f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.1f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:1000]
        Y_test = Y_test[0:1000]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                          basename(name), err)

        return

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(name),
                                      err)
def main(args):
    def get_model_type(model_name):
        model_type = {
            'models/modelA': 0,
            'models/modelA_adv': 0,
            'models/modelA_ens': 0,
            'models/modelB': 1,
            'models/modelB_adv': 1,
            'models/modelB_ens': 1,
            'models/modelC': 2,
            'models/modelC_adv': 2,
            'models/modelC_ens': 2,
            'models/modelD': 3,
            'models/modelD_adv': 3,
            'models/modelD_ens': 3,
        }
        if model_name not in model_type.keys():
            raise ValueError('Unknown model: {}'.format(model_name))
        return model_type[model_name]

    torch.manual_seed(args.seed)
    device = torch.device('cuda' if args.cuda else 'cpu')
    '''
    Preprocess MNIST dataset
    '''
    kwargs = {'num_workers': 20, 'pin_memory': True} if args.cuda else {}
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../attack_mnist', train=False, transform=transforms.ToTensor()),
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              **kwargs)

    # source model for crafting adversarial examples
    src_model_name = args.src_model
    type = get_model_type(src_model_name)
    src_model = load_model(src_model_name, type).to(device)

    # model(s) to target
    target_model_names = args.target_models
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        type = get_model_type(target_model_names[i])
        target_models[i] = load_model(target_model_names[i],
                                      type=type).to(device)

    attack = args.attack

    # simply compute test error
    if attack == 'test':
        correct_s = 0
        with torch.no_grad():
            for (data, labels) in test_loader:
                data, labels = data.to(device), labels.to(device)
                correct_s += test(src_model, data, labels)
        err = 100. - 100. * correct_s / len(test_loader.dataset)
        print('Test error of {}: {:.2f}'.format(basename(src_model_name), err))

        for (name, target_model) in zip(target_model_names, target_models):
            correct_t = 0
            with torch.no_grad():
                for (data, labels) in test_loader:
                    data, labels = data.to(device), labels.to(device)
                    correct_t += test(target_model, data, labels)
            err = 100. - 100. * correct_t / len(test_loader.dataset)
            print('Test error of {}: {:.2f}'.format(
                basename(target_model_names), err))
        return

    eps = args.eps

    correct = 0
    for (data, labels) in test_loader:
        # take the random step in the RAND+FGSM
        if attack == 'rand_fgs':
            data = torch.clamp(
                data +
                torch.zeros_like(data).uniform_(-args.alpha, args.alpha), 0.0,
                1.0)
            eps -= args.alpha
        data, labels = data.to(device), labels.to(device)
        grad = gen_grad(data, src_model, labels)

        # FGSM and RAND+FGSM one-shot attack
        if attack in ['fgs', 'rand_fgs']:
            adv_x = symbolic_fgs(data, grad, eps=eps)

        # iterative FGSM
        if attack == 'ifgs':
            adv_x = iter_fgs(src_model,
                             data,
                             labels,
                             steps=args.steps,
                             eps=args.eps / args.steps)

        correct += test(src_model, adv_x, labels)
    test_error = 100. - 100. * correct / len(test_loader.dataset)
    print('Test Set Error Rate: {:.2f}%'.format(test_error))
Example #9
0
def main(attack, src_model_name, target_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    K.set_session(tf.Session(config=config))

    if args.dataset == "mnist":
        K.set_image_data_format('channels_last')
        set_mnist_flags()
        x = K.placeholder(
            (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))
        y = K.placeholder((None, FLAGS.NUM_CLASSES))
        _, _, X_test, Y_test = data_mnist()
        # source model for crafting adversarial examples
        src_model = load_model_mnist(src_model_name)
        sd = 0.7

    elif args.dataset == "cifar10":
        set_flags(20)
        K.set_image_data_format('channels_first')
        x = K.placeholder(
            (None, FLAGS.NUM_CHANNELS, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS))
        y = K.placeholder((None, FLAGS.NUM_CLASSES))
        _, _, X_test, Y_test = load_data()
        # source model for crafting adversarial examples
        src_model = load_model(src_model_name)
        sd = 100. / 255.

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        acc = tf_test_acc(src_model, x, X_test, Y_test)
        print('{}: {:.1f}'.format(basename(src_model_name), acc))

        for (name, target_model) in zip(target_model_names, target_models):
            acc = tf_test_acc(target_model, x, X_test, Y_test)
            print('{}: {:.1f}'.format(basename(name), acc))
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rfgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rfgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "pgd":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=eps,
                         alpha=eps / 10.0)

    if attack == 'so':
        adv_x = so(src_model,
                   x,
                   y,
                   steps=args.steps,
                   eps=eps,
                   alpha=eps / 10.0,
                   norm=args.norm,
                   sd=sd)

    print('start')
    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
    # pdb.set_trace()
    print('-----done----')
    # white-box attack
    acc = tf_test_acc(src_model, x, X_adv, Y_test, sd=sd, num_iter=10)
    with open('attacks.txt', 'a') as log:
        log.write('{}->{}: {:.1f}, size = {:.4f}\n'.format(
            basename(src_model_name), basename(src_model_name), acc, eps))

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        acc = tf_test_acc(target_model, x, X_adv, Y_test, sd=sd, num_iter=10)
        with open('attacks.txt', 'a') as log:
            log.write('{}->{}: {:.1f}, size = {:.4f}\n'.format(
                basename(src_model_name), basename(name), acc, eps))
Example #10
0
def main(attack, src_model_name, target_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)

    set_flags(20)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    K.set_session(tf.Session(config=config))

    x = K.placeholder(
        (None, FLAGS.NUM_CHANNELS, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = load_data()

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print('{}: {:.1f}'.format(basename(src_model_name), 100 - err))

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print('{}: {:.1f}'.format(basename(name), 100 - err))
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rfgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rfgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "pgd":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps,
                         alpha=args.eps / 10.0)

    if attack == 'mim':
        adv_x = momentum_fgs(src_model, x, y, eps=args.eps)

    print('start')
    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
    print('-----done----')
    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print('{}->{}: {:.1f}'.format(basename(src_model_name),
                                  basename(src_model_name), 100 - err))

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print('{}->{}: {:.1f}'.format(basename(src_model_name), basename(name),
                                      100 - err))
def main(attack, src_model_names, target_model_name):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 1, 'Size of batches')
    set_mnist_flags()

    dim = FLAGS.IMAGE_ROWS * FLAGS.IMAGE_COLS * FLAGS.NUM_CHANNELS

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = data_mnist()
    Y_test_uncat = np.argmax(Y_test, axis=1)

    # source model for crafting adversarial examples
    src_models = [None] * len(src_model_names)
    for i in range(len(src_model_names)):
        src_models[i] = load_model(src_model_names[i])

    src_model_name_joint = ''
    for i in range(len(src_models)):
        src_model_name_joint += basename(src_model_names[i])

    # model(s) to target
    if target_model_name is not None:
        target_model = load_model(target_model_name)

    # simply compute test error
    if attack == "test":
        for (name, src_model) in zip(src_model_names, src_models):
            _, _, err = tf_test_error_rate(src_model, x, X_test, Y_test)
            print '{}: {:.1f}'.format(basename(name), err)
        if target_model_name is not None:
            _, _, err = tf_test_error_rate(target_model, x, X_test, Y_test)
        print '{}: {:.1f}'.format(basename(target_model_name), err)

        return

    if args.targeted_flag == 1:
        pickle_name = attack + '_' + src_model_name_joint + '_' + '_' + args.loss_type + '_targets.p'
        if os.path.exists(pickle_name):
            targets = pickle.load(open(pickle_name, 'rb'))
        else:
            targets = []
            allowed_targets = list(range(FLAGS.NUM_CLASSES))
            for i in range(len(Y_test)):
                allowed_targets.remove(Y_test_uncat[i])
                targets.append(np.random.choice(allowed_targets))
                allowed_targets = list(range(FLAGS.NUM_CLASSES))
            # targets = np.random.randint(10, size = BATCH_SIZE*BATCH_EVAL_NUM)
            targets = np.array(targets)
            print targets
            targets_cat = np_utils.to_categorical(
                targets, FLAGS.NUM_CLASSES).astype(np.float32)
            Y_test = targets_cat
            if SAVE_FLAG == True:
                pickle.dump(Y_test, open(pickle_name, 'wb'))

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = [None] * len(src_model_names)
    for i in range(len(src_model_names)):
        curr_model = src_models[i]
        logits[i] = curr_model(x)

    if args.loss_type == 'xent':
        loss, grad = gen_grad_ens(x, logits, y)
    elif args.loss_type == 'cw':
        grad = gen_grad_cw(x, logits, y)
    if args.targeted_flag == 1:
        grad = -1.0 * grad

    for eps in eps_list:
        # FGSM and RAND+FGSM one-shot attack
        if attack in ["fgs", "rand_fgs"] and args.norm == 'linf':
            adv_x = symbolic_fgs(x, grad, eps=eps)
        elif attack in ["fgs", "rand_fgs"] and args.norm == 'l2':
            adv_x = symbolic_fg(x, grad, eps=eps)

        # iterative FGSM
        if attack == "ifgs":
            l = 1000
            X_test = X_test[0:l]
            Y_test = Y_test[0:l]

            adv_x = x
            # iteratively apply the FGSM with small step size
            for i in range(args.num_iter):
                adv_logits = [None] * len(src_model_names)
                for i in range(len(src_model_names)):
                    curr_model = src_models[i]
                    adv_logits[i] = curr_model(adv_x)

                if args.loss_type == 'xent':
                    loss, grad = gen_grad_ens(adv_x, adv_logits, y)
                elif args.loss_type == 'cw':
                    grad = gen_grad_cw(adv_x, adv_logits, y)
                if args.targeted_flag == 1:
                    grad = -1.0 * grad

                adv_x = symbolic_fgs(adv_x, grad, args.delta, True)
                r = adv_x - x
                r = K.clip(r, -eps, eps)
                adv_x = x + r

            adv_x = K.clip(adv_x, 0, 1)

        if attack == "CW_ens":
            l = 1000
            pickle_name = attack + '_' + src_model_name_joint + '_' + str(
                args.eps) + '_adv.p'
            print(pickle_name)
            Y_test = Y_test[0:l]
            if os.path.exists(pickle_name) and attack == "CW_ens":
                print 'Loading adversarial samples'
                X_adv = pickle.load(open(pickle_name, 'rb'))

                for (name, src_model) in zip(src_model_names, src_models):
                    preds_adv, _, err = tf_test_error_rate(
                        src_model, x, X_adv, Y_test)
                    print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                                  basename(name), err)

                preds_adv, _, err = tf_test_error_rate(target_model, x, X_adv,
                                                       Y_test)
                print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                              basename(target_model_name), err)

                return

            X_test = X_test[0:l]
            time1 = time()
            cli = CarliniLiEns(K.get_session(),
                               src_models,
                               targeted=False,
                               confidence=args.kappa,
                               eps=eps)

            X_adv = cli.attack(X_test, Y_test)

            r = np.clip(X_adv - X_test, -eps, eps)
            X_adv = X_test + r
            time2 = time()
            print("Run with Adam took {}s".format(time2 - time1))

            if SAVE_FLAG == True:
                pickle.dump(X_adv, open(pickle_name, 'wb'))

            for (name, src_model) in zip(src_model_names, src_models):
                print('Carrying out white-box attack')
                pres, _, err = tf_test_error_rate(src_model, x, X_adv, Y_test)
                print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                              basename(name), err)
            if target_model_name is not None:
                print('Carrying out black-box attack')
                preds, orig, err = tf_test_error_rate(target_model, x, X_adv,
                                                      Y_test)
                print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                              basename(target_model_name), err)

            return

        pickle_name = attack + '_' + src_model_name_joint + '_' + args.loss_type + '_' + str(
            eps) + '_adv.p'
        if args.targeted_flag == 1:
            pickle_name = attack + '_' + src_model_name_joint + '_' + args.loss_type + '_' + str(
                eps) + '_adv_t.p'

        if os.path.exists(pickle_name):
            print 'Loading adversarial samples'
            X_adv = pickle.load(open(pickle_name, 'rb'))
        else:
            print 'Generating adversarial samples'
            X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
            if SAVE_FLAG == True:
                pickle.dump(X_adv, open(pickle_name, 'wb'))

        avg_l2_perturb = np.mean(
            np.linalg.norm((X_adv - X_test).reshape(len(X_test), dim), axis=1))

        # white-box attack
        l = len(X_adv)
        print('Carrying out white-box attack')
        for (name, src_model) in zip(src_model_names, src_models):
            preds_adv, orig, err = tf_test_error_rate(src_model, x, X_adv,
                                                      Y_test[0:l])
            if args.targeted_flag == 1:
                err = 100.0 - err
            print '{}->{}: {:.1f}'.format(basename(name), basename(name), err)

        # black-box attack
        if target_model_name is not None:
            print('Carrying out black-box attack')
            preds, _, err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            if args.targeted_flag == 1:
                err = 100.0 - err
            print '{}->{}: {:.1f}, {}, {} {}'.format(
                src_model_name_joint, basename(target_model_name), err,
                avg_l2_perturb, eps, attack)
Example #12
0
def main(attack, src_model_name, target_model_names, data_train_dir,
         data_test_dir):
    np.random.seed(0)
    tf.set_random_seed(0)
    set_gtsrb_flags()

    # Get GTSRB test data
    _, _, _, _, X_test, Y_test = load_data(data_train_dir, data_test_dir)

    # display_leg_sample(X_test)

    # One-hot encode image labels
    label_binarizer = LabelBinarizer()
    Y_test = label_binarizer.fit_transform(Y_test)

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    # one_hot_y = tf.one_hot(y, 43)

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.3f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.3f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:200]
        Y_test = Y_test[0:200]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r
        np.save('Train_Carlini_200.npy', X_adv)
        np.save('Label_Carlini_200.npy', Y_test)

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)
        display_leg_adv_sample(X_test, X_adv)
        return

    if attack == "cascade_ensemble":
        # X_test = np.clip(
        #     X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)),
        #     0.0, 1.0)
        # eps -= args.alpha

        sub_model_ens = (sub_model_2, sub_model_3)
        sub_models = [None] * len(sub_model_ens)
        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        adv_x = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models + [src_model]):
                logits = m(adv_x)
                gradient = gen_grad(adv_x, logits, y)
                adv_x = symbolic_fgs(adv_x,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)

    if attack == "Iter_Casc":
        # X_test = np.clip(
        #     X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)),
        #     0.0, 1.0)
        # args.eps = args.eps - args.alpha

        sub_model_ens = (sub_model_1, sub_model_2, sub_model_3)
        sub_models = [None] * len(sub_model_ens)
        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        x_advs = [None] * len(sub_models)
        errs = [None] * len(sub_models)
        adv_x = x
        eps_all = []

        for i in range(args.steps):
            if i == 0:
                eps_all[0] = (1.0 / len(sub_models)) * args.eps
            else:
                for j in range(i):
                    pre_sum = 0.0
                    pre_sum += eps_all[j]
                    eps_all[i] = (args.eps - pre_sum) * (1.0 / len(sub_models))

        # for i in range(args.steps):
        #     if i == 0:
        #         eps_0 = (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_0)
        #     elif i == 1:
        #         eps_1 = (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_1)
        #     elif i == 2:
        #         eps_2 = (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_2)
        #     elif i == 3:
        #         eps_3 = (1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                 1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_3)
        #     elif i == 4:
        #         eps_4 = (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                          1.0 / len(sub_models))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_4)
        #     elif i == 5:
        #         eps_5 = (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                               1.0 / len(sub_models)))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_5)
        #     elif i == 6:
        #         eps_6 = (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                    1.0 / len(sub_models))))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_6)
        #
        #     elif i == 7:
        #         eps_7 = (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                         1.0 / len(sub_models)))))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_7)
        #     elif i == 8:
        #         eps_8 = (1 - (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                              1.0 / len(sub_models))))))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_8)
        #     elif i == 9:
        #         eps_9 = (1 - (1 - (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                                   1.0 / len(sub_models)))))))) * (
        #                         1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_9)
        #     elif i == 10:
        #         eps_10 = (1 - (1 - (1 - (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                                         1.0 / len(sub_models))))))))) * (
        #                          1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_10)
        #     elif i == 11:
        #         eps_11 = (1 - (1 - (1 - (1 - (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                                              1.0 / len(sub_models)))))))))) * (
        #                          1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_11)

        for j in range(args.steps):
            print('iterative step is :', j)
            if j == 0:
                for i, m in enumerate(sub_models):
                    logits = m(adv_x)
                    gradient = gen_grad(adv_x, logits, y)
                    adv_x_ = symbolic_fgs(adv_x,
                                          gradient,
                                          eps=eps_all[j],
                                          clipping=True)
                    x_advs[i] = adv_x_

                    X_adv = batch_eval([x, y], [adv_x_], [X_test, Y_test])[0]

                    err = tf_test_error_rate(m, x, X_adv, Y_test)
                    errs[i] = err
                adv_x = x_advs[errs.index(min(errs))]
            else:
                t = errs.index(min(errs))
                print('index of min value of errs:', t)
                logits = sub_models[t](adv_x)
                gradient = gen_grad(adv_x, logits, y)
                adv_x = symbolic_fgs(adv_x,
                                     gradient,
                                     eps=eps_all[j],
                                     clipping=True)

                for i, m in enumerate(sub_models):
                    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
                    err = tf_test_error_rate(m, x, X_adv, Y_test)
                    errs[i] = err
            print('error rate of each substitute models_oldest: ', errs)
            print('\t')
            if min(errs) >= 99:
                success_rate = sum(errs) / len(sub_models)
                print('success rate is: {:.3f}'.format(success_rate))
                break

        success_rate = sum(errs) / len(sub_models)
        print('success rate is: {:.3f}'.format(success_rate))

        X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
        np.save('results/iter_casc_0.2_leg_adv/X_adv_Iter_Casc_0.2.npy', X_adv)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)

        save_leg_adv_sample('results/iter_casc_0.2_leg_adv/', X_test, X_adv)

        # save adversarial example specified by user
        save_leg_adv_specified_by_user(
            'results/iter_casc_0.2_leg_adv_label_4/', X_test, X_adv, Y_test)
        return

    if attack == "stack_paral":
        # X_test = np.clip(
        #     X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)),
        #     0.0, 1.0)
        # eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2, sub_model_3)
        sub_models = [None] * len(sub_model_ens)

        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        errs = [None] * (len(sub_models) + 1)
        x_advs = [None] * len(sub_models)
        # print x_advs

        for i, m in enumerate(sub_models):
            # x = x + args.alpha * np.sign(np.random.randn(*x[0].shape))
            logits = m(x)
            gradient = gen_grad(x, logits, y)
            adv_x = symbolic_fgs(x, gradient, eps=args.eps / 2, clipping=True)
            x_advs[i] = adv_x

        # print x_advs
        adv_x_sum = x_advs[0]
        for i in range(len(sub_models)):
            if i == 0: continue
            adv_x_sum = adv_x_sum + x_advs[i]
        adv_x_mean = adv_x_sum / (len(sub_models))
        preds = src_model(adv_x_mean)
        grads = gen_grad(adv_x_mean, preds, y)
        adv_x = symbolic_fgs(adv_x_mean, grads, eps=args.eps, clipping=True)

        # compute the adversarial examples and evaluate
        for i, m in enumerate(sub_models + [src_model]):
            X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
            err = tf_test_error_rate(m, x, X_adv, Y_test)
            errs[i] = err

        # compute success rate
        success_rate = sum(errs) / (len(sub_models) + 1)
        print('success rate is: {:.3f}'.format(success_rate))

        # compute transfer rate
        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)

        # save adversarial examples
        np.save('results/stack_paral_0.2_leg_adv/X_adv_stack_paral_0.2.npy',
                X_adv)
        # save_leg_adv_sample(X_test, X_adv)
        save_leg_adv_sample('results/stack_paral_0.2_leg_adv/', X_test, X_adv)

        # save adversarial example specified by user
        save_leg_adv_specified_by_user(
            'results/stack_paral_0.2_leg_adv_label_4/', X_test, X_adv, Y_test)

        return

    if attack == "cascade_ensemble_2":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2)
        sub_models = [None] * len(sub_model_ens)

        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        x_advs = [([None] * len(sub_models)) for i in range(args.steps)]
        # print x_advs

        x_adv = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models):
                logits = m(x_adv)
                gradient = gen_grad(x_adv, logits, y)
                x_adv = symbolic_fgs(x_adv,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)
                x_advs[j][i] = x_adv

        # print x_advs
        adv_x_sum = x_advs[0][0]
        for j in range(args.steps):
            for i in range(len(sub_models)):
                if j == 0 and i == 0: continue
                adv_x_sum = adv_x_sum + x_advs[j][i]
        adv_x_mean = adv_x_sum / (args.steps * len(sub_models))
        preds = src_model(adv_x_mean)
        grads = gen_grad(adv_x_mean, preds, y)
        adv_x = symbolic_fgs(adv_x_mean,
                             grads,
                             eps=args.eps / args.steps,
                             clipping=True)

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name),
                                      err)
Example #13
0
def main(args):
    def get_model_type(model_name):
        model_type = {
            'models/modelA': 0,
            'models/modelA_adv': 0,
            'models/modelA_ens': 0,
            'models/modelB': 1,
            'models/modelB_adv': 1,
            'models/modelB_ens': 1,
            'models/modelC': 2,
            'models/modelC_adv': 2,
            'models/modelC_ens': 2,
            'models/modelD': 3,
            'models/modelD_adv': 3,
            'models/modelD_ens': 3,
        }
        if model_name not in model_type.keys():
            raise ValueError('Unknown model: {}'.format(model_name))
        return model_type[model_name]

    torch.manual_seed(args.seed)
    device = torch.device('cuda' if args.cuda else 'cpu')
    '''
    Preprocess MNIST dataset
    '''
    kwargs = {'num_workers': 20, 'pin_memory': True} if args.cuda else {}
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../attack_mnist',
        train=True,
        download=True,
        transform=transforms.ToTensor()),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../attack_mnist', train=False, transform=transforms.ToTensor()),
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              **kwargs)

    eps = args.eps

    # if src_models is not None, we train on adversarial examples that come
    # from multiple models
    adv_model_names = args.adv_models
    adv_models = [None] * len(adv_model_names)
    for i in range(len(adv_model_names)):
        type = get_model_type(adv_model_names[i])
        adv_models[i] = load_model(adv_model_names[i], type=type).to(device)

    model = model_mnist(type=args.type).to(device)
    optimizer = optim.Adam(model.parameters())

    # Train on MNIST model
    x_advs = [None] * (len(adv_models) + 1)
    for epoch in range(args.epochs):
        for batch_idx, (data, labels) in enumerate(train_loader):
            data, labels = data.to(device), labels.to(device)
            for i, m in enumerate(adv_models + [model]):
                grad = gen_grad(data, m, labels, loss='training')
                x_advs[i] = symbolic_fgs(data, grad, eps=eps)
            train(epoch,
                  batch_idx,
                  model,
                  data,
                  labels,
                  optimizer,
                  x_advs=x_advs)

    # Finally print the result
    correct = 0
    with torch.no_grad():
        for (data, labels) in test_loader:
            data, labels = data.to(device), labels.to(device)
            correct += test(model, data, labels)
    test_error = 100. - 100. * correct / len(test_loader.dataset)
    print('Test Set Error Rate: {:.2f}%'.format(test_error))

    torch.save(model.state_dict(), args.model + '.pkl')
def main(attack, src_model_name, target_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 32, 'Size of batches')
    set_mnist_flags()

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = data_mnist()

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.3f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.3f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:1000]
        Y_test = Y_test[0:1000]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)

        return

    if attack == "cascade_ensemble":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2, sub_model_3, sub_model_4,
                         sub_model_5, sub_model_6, sub_model_7)
        sub_models = [None] * len(sub_model_ens)
        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        adv_x = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models + [src_model]):
                logits = m(adv_x)
                gradient = gen_grad(adv_x, logits, y)
                adv_x = symbolic_fgs(adv_x,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)

    if attack == "parallel_ensemble":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2, sub_model_3)
        sub_models = [None] * len(sub_model_ens)

        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        x_advs = [([None] * len(sub_models)) for i in range(args.steps)]
        print x_advs

        x_adv = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models):
                logits = m(x_adv)
                gradient = gen_grad(x_adv, logits, y)
                x_adv = symbolic_fgs(x_adv,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)
                x_advs[j][i] = x_adv

        print x_advs
        adv_x_mean = x_advs[0][0]
        for j in range(args.steps):
            for i in range(len(sub_models)):
                if j == 0 and i == 0: continue
                adv_x_mean = adv_x_mean + x_advs[j][i]
        xadv = adv_x_mean / (args.steps * len(sub_models))
        preds = src_model(xadv)
        grads = gen_grad(xadv, preds, y)
        adv_x = symbolic_fgs(xadv, grads, eps=args.eps, clipping=True)

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name),
                                      err)