Ejemplo n.º 1
0
def so(model, x, y, steps, eps, alpha=0, norm="l2", sd=0.0):
    adv_x = x
    for i in range(steps):
        total_grad = 0
        for j in range(10):
            temp_adv_x = adv_x + tf.random_normal(
                stddev=sd, shape=tf.shape(x), seed=42)
            logits = model(temp_adv_x)
            if norm == "linf":
                grad = gen_grad(temp_adv_x, logits, y, loss='logloss')
            elif norm == "l2":
                grad = gen_grad(temp_adv_x, logits, y, loss='cw')
            total_grad += grad

        if norm == "linf":
            normed_grad = K.sign(total_grad)
            adv_x += alpha * normed_grad
            adv_x = tf.clip_by_value(adv_x, x - eps, x + eps)
        if norm == "l2":
            grad_norm = tf.clip_by_value(l2_norm(total_grad), 1e-8, np.inf)
            adv_x += 2.5 * eps / steps * total_grad / grad_norm
            dx = adv_x - x
            dx_norm = tf.clip_by_value(l2_norm(dx), 1e-8, np.inf)
            dx_final_norm = tf.clip_by_value(dx_norm, 0, eps)
            adv_x = x + dx_final_norm * dx / dx_norm
    adv_x = tf.clip_by_value(adv_x, 0, 1)
    return adv_x
Ejemplo n.º 2
0
def momentum_fgs(model, x, y, eps):

    # parameters
    nb_iter = 10
    decay_factor = 1.0
    eps_iter = eps / 5.0
	
    # Initialize loop variables
    momentum = tf.zeros_like(x)
    adv_x = x


    for i in range(nb_iter):
        logits = model(adv_x)
        grad = gen_grad(adv_x, logits, y)
        
        # Normalize current gradient and add it to the accumulated gradient
        red_ind = list(range(1, len(grad.get_shape())))
        avoid_zero_div = tf.cast(1e-12, grad.dtype)
        grad = grad / tf.maximum(avoid_zero_div, tf.reduce_mean(tf.abs(grad), red_ind, keepdims=True))
        momentum = decay_factor * momentum + grad

        normalized_grad = tf.sign(momentum)
        # Update and clip adversarial example in current iteration
        scaled_grad = eps_iter * normalized_grad
        adv_x = adv_x + scaled_grad
        adv_x = x + tf.clip_by_value(adv_x-x, -eps, eps)

        adv_x = tf.clip_by_value(adv_x, 0., 1.0)

        adv_x = K.stop_gradient(adv_x)

    return adv_x
Ejemplo n.º 3
0
def main(model_name, adv_model_names, model_type):
    np.random.seed(0)
    assert keras.backend.backend() == "tensorflow"

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist()

    data_gen = data_gen_mnist(X_train)

    x = K.placeholder(shape=(None, 28, 28, 1))

    y = K.placeholder(shape=(BATCH_SIZE, 10))

    eps = args.eps
    norm = args.norm

    # if src_models is not None, we train on adversarial examples that come
    # from multiple models
    adv_models = [None] * len(adv_model_names)
    ens_str = ''
    for i in range(len(adv_model_names)):
        adv_models[i] = load_model(adv_model_names[i])
        if len(adv_models) > 0:
            name = basename(adv_model_names[i])
            model_index = name.replace('model', '')
            ens_str += model_index
    model = model_mnist(type=model_type)

    x_advs = [None] * (len(adv_models) + 1)

    for i, m in enumerate(adv_models + [model]):
        if args.iter == 0:
            logits = m(x)
            grad = gen_grad(x, logits, y, loss='training')
            x_advs[i] = symbolic_fgs(x, grad, eps=eps)
        elif args.iter == 1:
            x_advs[i] = iter_fgs(m, x, y, steps=40, alpha=0.01, eps=args.eps)

    # Train an MNIST model
    tf_train(x,
             y,
             model,
             X_train,
             Y_train,
             data_gen,
             x_advs=x_advs,
             benign=args.ben)

    # Finally print the result!
    test_error = tf_test_error_rate(model, x, X_test, Y_test)
    print('Test error: %.1f%%' % test_error)
    model_name += '_' + str(eps) + '_' + str(norm) + '_' + ens_str
    if args.iter == 1:
        model_name += 'iter'
    if args.ben == 0:
        model_name += '_nob'
    save_model(model, model_name)
    json_string = model.to_json()
    with open(model_name + '.json', 'wr') as f:
        f.write(json_string)
Ejemplo n.º 4
0
def main(model_name, adv_model_names, model_type):
    np.random.seed(0)
    assert keras.backend.backend() == "tensorflow"
    set_flags(32)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    K.set_session(tf.Session(config=config))

    flags.DEFINE_integer('NUM_EPOCHS', args.epochs, 'Number of epochs')
    flags.DEFINE_integer('type', args.type, 'model type')

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = load_data()

    data_gen = data_flow(X_train)

    x = K.placeholder(shape=(None,
                             FLAGS.NUM_CHANNELS,
                             FLAGS.IMAGE_ROWS,
                             FLAGS.IMAGE_COLS))

    y = K.placeholder(shape=(FLAGS.BATCH_SIZE, FLAGS.NUM_CLASSES))

    eps = args.eps

    # if src_models is not None, we train on adversarial examples that come
    # from multiple models
    adv_models = [None] * len(adv_model_names)
    for i in range(len(adv_model_names)):
        adv_models[i] = load_model(adv_model_names[i])

    model = model_select(type=model_type)

    x_advs = [None] * (len(adv_models) + 1)
    

    for i, m in enumerate(adv_models + [model]):
        x_noise = x + tf.random_uniform(shape=[FLAGS.BATCH_SIZE, FLAGS.NUM_CHANNELS, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS], minval= -args.eps, maxval=args.eps)
        x_noise = tf.clip_by_value(x_noise, 0., 1.)
        for _ in range(args.k):
            logits = m(x_noise)
            grad = gen_grad(x_noise, logits, y, loss='logloss')
            x_noise =  K.stop_gradient(x_noise + args.eps / 4.0 * K.sign(grad))
            x_noise = tf.clip_by_value(x_noise, x - args.eps, x + args.eps)
            x_noise = tf.clip_by_value(x_noise, 0., 1.)
        x_advs[i] = x_noise

    # Train an MNIST model
    tf_train(x, y, model, X_train, Y_train, data_gen, model_name, x_advs=x_advs)

    # Finally print the result!
    test_error = tf_test_error_rate(model, x, X_test, Y_test)
    with open(model_name + '_log.txt', 'a') as log:
        log.write('Test error: %.1f%%' % test_error)
    print('Test error: %.1f%%' % test_error)
    save_model(model, model_name)
    json_string = model.to_json()
    with open(model_name+'.json', 'w') as f:
        f.write(json_string)
def iter_fgs(model, data, labels, steps, eps):
    '''
    I-FGSM attack.
    '''
    adv_x = data

    # iteratively apply the FGSM with small step size
    for i in range(steps):
        grad = gen_grad(adv_x, model, labels)
        adv_x = symbolic_fgs(adv_x, grad, eps)
    return adv_x
Ejemplo n.º 6
0
def iter_fgs(model, x, y, steps, eps):
    """
    I-FGSM attack.
    """

    adv_x = x

    # iteratively apply the FGSM with small step size
    for i in range(steps):
        logits = model(adv_x)
        grad = gen_grad(adv_x, logits, y)

        adv_x = symbolic_fgs(adv_x, grad, eps, True)
    return adv_x
Ejemplo n.º 7
0
def iter_fgs(model, x, y, steps, eps, alpha):
    """
    PGD / I-FGSM attack.
    """

    adv_x = x

    # iteratively apply the FGSM with small step size
    for i in range(steps):
        logits = model(adv_x)
        grad = gen_grad(adv_x, logits, y)

        adv_x = symbolic_fgs(adv_x, grad, alpha, True)
        adv_x = tf.clip_by_value(adv_x, x - eps, x + eps)
    return adv_x
Ejemplo n.º 8
0
def main(model_name, adv_model_names, model_type):
    np.random.seed(0)
    assert keras.backend.backend() == "tensorflow"
    set_mnist_flags()

    flags.DEFINE_bool('NUM_EPOCHS', args.epochs, 'Number of epochs')

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist()

    data_gen = data_gen_mnist(X_train)

    x = K.placeholder(shape=(None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS,
                             FLAGS.NUM_CHANNELS))

    y = K.placeholder(shape=(FLAGS.BATCH_SIZE, FLAGS.NUM_CLASSES))

    eps = args.eps

    # if src_models is not None, we train on adversarial examples that come
    # from multiple models
    adv_models = [None] * len(adv_model_names)
    for i in range(len(adv_model_names)):
        adv_models[i] = load_model(adv_model_names[i])

    model = model_mnist(type=model_type)

    x_advs = [None] * (len(adv_models) + 1)

    for i, m in enumerate(adv_models + [model]):
        logits = m(x)
        grad = gen_grad(x, logits, y, loss='training')
        x_advs[i] = symbolic_fgs(x, grad, eps=eps)

    # Train an MNIST model
    tf_train(x, y, model, X_train, Y_train, data_gen, x_advs=x_advs)

    # Finally print the result!
    test_error = tf_test_error_rate(model, x, X_test, Y_test)
    print('Test error: %.1f%%' % test_error)
    save_model(model, model_name)
    json_string = model.to_json()
    with open(model_name + '.json', 'wr') as f:
        f.write(json_string)
Ejemplo n.º 9
0
def iter_fgs(model, x, y, steps, alpha, eps, clipping=True):
    """
    I-FGSM attack.
    """

    adv_x = x
    # iteratively apply the FGSM with small step size
    for i in range(steps):
        logits = model(adv_x)
        grad = gen_grad(adv_x, logits, y)

        adv_x = symbolic_fgs(adv_x, grad, alpha, True)
        r = adv_x - x
        r = K.clip(r, -eps, eps)
        adv_x = x + r

    if clipping:
        adv_x = K.clip(adv_x, 0, 1)

    return adv_x
def main(adv_model_names):
    np.random.seed(0)
    assert keras.backend.backend() == "tensorflow"
    set_model_flags()

    tf.reset_default_graph()
    g = tf.get_default_graph()

    x = tf.placeholder(
        tf.float32,
        shape=[None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS])
    y = tf.placeholder(tf.float32, shape=[None, FLAGS.NUM_CLASSES])

    train_mode = tf.placeholder(tf.bool)
    eps = FLAGS.EPS

    # if src_models is not None, we train on adversarial examples that come
    # from multiple models
    # load from out source
    adv_models = [None] * len(adv_model_names)
    for i, name in enumerate(adv_model_names):
        adv_models[i] = load_model(name, path="./models/" + name + "-save.npy")
    x_advs = [None] * (len(adv_models))

    for i, m in enumerate(adv_models):
        logits, _ = m(x)
        grad = gen_grad(x, logits, y, loss='training')
        x_advs[i] = symbolic_fgs(x, grad, eps=eps)

    data = dataset(FLAGS.DIR, normalize=False)
    sess, graph_dict = tf_train(g,
                                x,
                                y,
                                data,
                                defense_model,
                                train_mode,
                                x_advs=x_advs)

    # Finally print the result!
    test_error = tf_test_error_rate(sess, graph_dict, data, x_advs)
    print('Test error: %.1f%%' % test_error)
def white_box_fgsm_iter(prediction, target_model, x, logits, y, X_test, X_test_ini, targets, targets_cat, eps, dim, beta):
    #Get gradient from model
    if args.loss_type == 'xent':
        grad = gen_grad(x, logits, y)
    elif args.loss_type == 'cw':
        real = tf.reduce_sum(y*logits, 1)
        other = tf.reduce_max((1-y)*logits - (y*10000), 1)
        if '_un' in args.method:
            loss = tf.maximum(0.0,real-other+args.conf)
        else:
            loss = tf.maximum(0.0,other-real+args.conf)
        grad, = tf.gradients(loss, x)

    # normalized gradient
    if args.norm == 'linf':
        normed_grad = tf.sign(grad)
    elif args.norm == 'l2':
        normed_grad = K.l2_normalize(grad, axis = (1,2,3))

    # Multiply by constant epsilon
    scaled_grad = beta * normed_grad

    # Add perturbation to original example to obtain adversarial example
    if args.loss_type == 'xent':
        if '_un' in args.method:
            adv_x_t = tf.stop_gradient(x + scaled_grad)
        else:
            adv_x_t = tf.stop_gradient(x - scaled_grad)
    elif args.loss_type == 'cw':
        adv_x_t = tf.stop_gradient(x - scaled_grad)

    adv_x_t = tf.clip_by_value(adv_x_t, CLIP_MIN, CLIP_MAX)

    X_test_mod = X_test[random_indices]
    X_test_ini_mod = X_test_ini[random_indices]

    X_adv_t = np.zeros_like(X_test_ini_mod)
    adv_pred_np = np.zeros((len(X_test_ini_mod), NUM_CLASSES))
    pred_np = np.zeros((len(X_test_ini_mod), NUM_CLASSES))

    for i in range(BATCH_EVAL_NUM):
        X_test_slice = X_test_mod[i*(BATCH_SIZE):(i+1)*(BATCH_SIZE)]
        X_test_ini_slice = X_test_ini_mod[i*(BATCH_SIZE):(i+1)*(BATCH_SIZE)]
        targets_cat_slice = targets_cat[i*(BATCH_SIZE):(i+1)*(BATCH_SIZE)]
        X_adv_curr = X_test_slice
        for k in range(args.num_iter):
            X_adv_curr = sess.run(adv_x_t, feed_dict={x: X_adv_curr, y: targets_cat_slice})
            r = X_adv_curr - X_test_ini_slice
            r = np.clip(r, -eps, eps)
            X_adv_curr = X_test_ini_slice + r
        X_adv_curr = np.clip(X_adv_curr, CLIP_MIN, CLIP_MAX)
        X_adv_t[i*(BATCH_SIZE):(i+1)*(BATCH_SIZE)] = X_adv_curr
        adv_pred_np_i = sess.run(prediction, feed_dict={x: X_adv_curr})
        adv_pred_np[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:] = adv_pred_np_i
        pred_np_i, logits_np_i = sess.run([prediction, logits], feed_dict={x: X_test_slice})
        pred_np[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:] = pred_np_i

    white_box_success = 100.0 * np.sum(np.argmax(adv_pred_np, 1) == targets)/(BATCH_SIZE*BATCH_EVAL_NUM)
    if '_un' in args.method:
        white_box_success = 100.0 - white_box_success
    benign_success = 100.0 * np.sum(np.argmax(pred_np, 1) == targets)/(BATCH_SIZE*BATCH_EVAL_NUM)
    print('Benign success: {}'.format(benign_success))

    wb_norm = np.mean(np.linalg.norm((X_adv_t-X_test_ini_mod).reshape(BATCH_SIZE*BATCH_EVAL_NUM, dim), axis=1))
    print('Average white-box l2 perturbation: {}'.format(wb_norm))

    wb_write_out(eps, white_box_success, wb_norm)

    return
Ejemplo n.º 12
0
def main(attack, src_model_name, target_model_names, data_train_dir,
         data_test_dir):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 32, 'Size of batches')
    set_gtsrb_flags()

    # Get MNIST test data
    _, _, X_test, Y_test = load_data(data_train_dir, data_test_dir)

    # One-hot encode image labels
    label_binarizer = LabelBinarizer()
    Y_test = label_binarizer.fit_transform(Y_test)

    x = tf.placeholder(tf.float32, (None, 32, 32, 1))

    y = tf.placeholder(tf.int32, (None))

    one_hot_y = tf.one_hot(y, 43)

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.3f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.3f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:1000]
        Y_test = Y_test[0:1000]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)

        return

    if attack == "grad_ens":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2)
        sub_models = [None] * len(sub_model_ens)
        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        adv_x = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models + [src_model]):
                logits = m(adv_x)
                gradient = gen_grad(adv_x, logits, y)
                adv_x = symbolic_fgs(adv_x,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name),
                                      err)
Ejemplo n.º 13
0
def main(args):
    def get_model_type(model_name):
        model_type = {
            'models/modelA': 0,
            'models/modelA_adv': 0,
            'models/modelA_ens': 0,
            'models/modelB': 1,
            'models/modelB_adv': 1,
            'models/modelB_ens': 1,
            'models/modelC': 2,
            'models/modelC_adv': 2,
            'models/modelC_ens': 2,
            'models/modelD': 3,
            'models/modelD_adv': 3,
            'models/modelD_ens': 3,
        }
        if model_name not in model_type.keys():
            raise ValueError('Unknown model: {}'.format(model_name))
        return model_type[model_name]

    torch.manual_seed(args.seed)
    device = torch.device('cuda' if args.cuda else 'cpu')
    '''
    Preprocess MNIST dataset
    '''
    kwargs = {'num_workers': 20, 'pin_memory': True} if args.cuda else {}
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../attack_mnist',
        train=True,
        download=True,
        transform=transforms.ToTensor()),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../attack_mnist', train=False, transform=transforms.ToTensor()),
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              **kwargs)

    eps = args.eps

    # if src_models is not None, we train on adversarial examples that come
    # from multiple models
    adv_model_names = args.adv_models
    adv_models = [None] * len(adv_model_names)
    for i in range(len(adv_model_names)):
        type = get_model_type(adv_model_names[i])
        adv_models[i] = load_model(adv_model_names[i], type=type).to(device)

    model = model_mnist(type=args.type).to(device)
    optimizer = optim.Adam(model.parameters())

    # Train on MNIST model
    x_advs = [None] * (len(adv_models) + 1)
    for epoch in range(args.epochs):
        for batch_idx, (data, labels) in enumerate(train_loader):
            data, labels = data.to(device), labels.to(device)
            for i, m in enumerate(adv_models + [model]):
                grad = gen_grad(data, m, labels, loss='training')
                x_advs[i] = symbolic_fgs(data, grad, eps=eps)
            train(epoch,
                  batch_idx,
                  model,
                  data,
                  labels,
                  optimizer,
                  x_advs=x_advs)

    # Finally print the result
    correct = 0
    with torch.no_grad():
        for (data, labels) in test_loader:
            data, labels = data.to(device), labels.to(device)
            correct += test(model, data, labels)
    test_error = 100. - 100. * correct / len(test_loader.dataset)
    print('Test Set Error Rate: {:.2f}%'.format(test_error))

    torch.save(model.state_dict(), args.model + '.pkl')
Ejemplo n.º 14
0
def main(attack, src_model_name, target_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 10, 'Size of batches')
    set_mnist_flags()

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = data_mnist()

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.1f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.1f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:1000]
        Y_test = Y_test[0:1000]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                          basename(name), err)

        return

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(name),
                                      err)
def main(args):
    def get_model_type(model_name):
        model_type = {
            'models/modelA': 0,
            'models/modelA_adv': 0,
            'models/modelA_ens': 0,
            'models/modelB': 1,
            'models/modelB_adv': 1,
            'models/modelB_ens': 1,
            'models/modelC': 2,
            'models/modelC_adv': 2,
            'models/modelC_ens': 2,
            'models/modelD': 3,
            'models/modelD_adv': 3,
            'models/modelD_ens': 3,
        }
        if model_name not in model_type.keys():
            raise ValueError('Unknown model: {}'.format(model_name))
        return model_type[model_name]

    torch.manual_seed(args.seed)
    device = torch.device('cuda' if args.cuda else 'cpu')
    '''
    Preprocess MNIST dataset
    '''
    kwargs = {'num_workers': 20, 'pin_memory': True} if args.cuda else {}
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../attack_mnist', train=False, transform=transforms.ToTensor()),
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              **kwargs)

    # source model for crafting adversarial examples
    src_model_name = args.src_model
    type = get_model_type(src_model_name)
    src_model = load_model(src_model_name, type).to(device)

    # model(s) to target
    target_model_names = args.target_models
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        type = get_model_type(target_model_names[i])
        target_models[i] = load_model(target_model_names[i],
                                      type=type).to(device)

    attack = args.attack

    # simply compute test error
    if attack == 'test':
        correct_s = 0
        with torch.no_grad():
            for (data, labels) in test_loader:
                data, labels = data.to(device), labels.to(device)
                correct_s += test(src_model, data, labels)
        err = 100. - 100. * correct_s / len(test_loader.dataset)
        print('Test error of {}: {:.2f}'.format(basename(src_model_name), err))

        for (name, target_model) in zip(target_model_names, target_models):
            correct_t = 0
            with torch.no_grad():
                for (data, labels) in test_loader:
                    data, labels = data.to(device), labels.to(device)
                    correct_t += test(target_model, data, labels)
            err = 100. - 100. * correct_t / len(test_loader.dataset)
            print('Test error of {}: {:.2f}'.format(
                basename(target_model_names), err))
        return

    eps = args.eps

    correct = 0
    for (data, labels) in test_loader:
        # take the random step in the RAND+FGSM
        if attack == 'rand_fgs':
            data = torch.clamp(
                data +
                torch.zeros_like(data).uniform_(-args.alpha, args.alpha), 0.0,
                1.0)
            eps -= args.alpha
        data, labels = data.to(device), labels.to(device)
        grad = gen_grad(data, src_model, labels)

        # FGSM and RAND+FGSM one-shot attack
        if attack in ['fgs', 'rand_fgs']:
            adv_x = symbolic_fgs(data, grad, eps=eps)

        # iterative FGSM
        if attack == 'ifgs':
            adv_x = iter_fgs(src_model,
                             data,
                             labels,
                             steps=args.steps,
                             eps=args.eps / args.steps)

        correct += test(src_model, adv_x, labels)
    test_error = 100. - 100. * correct / len(test_loader.dataset)
    print('Test Set Error Rate: {:.2f}%'.format(test_error))
Ejemplo n.º 16
0
def main(attack, src_model_name, target_model_names, data_train_dir,
         data_test_dir):
    np.random.seed(0)
    tf.set_random_seed(0)
    set_gtsrb_flags()

    # Get GTSRB test data
    _, _, _, _, X_test, Y_test = load_data(data_train_dir, data_test_dir)

    # display_leg_sample(X_test)

    # One-hot encode image labels
    label_binarizer = LabelBinarizer()
    Y_test = label_binarizer.fit_transform(Y_test)

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    # one_hot_y = tf.one_hot(y, 43)

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.3f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.3f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:200]
        Y_test = Y_test[0:200]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r
        np.save('Train_Carlini_200.npy', X_adv)
        np.save('Label_Carlini_200.npy', Y_test)

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)
        display_leg_adv_sample(X_test, X_adv)
        return

    if attack == "cascade_ensemble":
        # X_test = np.clip(
        #     X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)),
        #     0.0, 1.0)
        # eps -= args.alpha

        sub_model_ens = (sub_model_2, sub_model_3)
        sub_models = [None] * len(sub_model_ens)
        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        adv_x = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models + [src_model]):
                logits = m(adv_x)
                gradient = gen_grad(adv_x, logits, y)
                adv_x = symbolic_fgs(adv_x,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)

    if attack == "Iter_Casc":
        # X_test = np.clip(
        #     X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)),
        #     0.0, 1.0)
        # args.eps = args.eps - args.alpha

        sub_model_ens = (sub_model_1, sub_model_2, sub_model_3)
        sub_models = [None] * len(sub_model_ens)
        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        x_advs = [None] * len(sub_models)
        errs = [None] * len(sub_models)
        adv_x = x
        eps_all = []

        for i in range(args.steps):
            if i == 0:
                eps_all[0] = (1.0 / len(sub_models)) * args.eps
            else:
                for j in range(i):
                    pre_sum = 0.0
                    pre_sum += eps_all[j]
                    eps_all[i] = (args.eps - pre_sum) * (1.0 / len(sub_models))

        # for i in range(args.steps):
        #     if i == 0:
        #         eps_0 = (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_0)
        #     elif i == 1:
        #         eps_1 = (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_1)
        #     elif i == 2:
        #         eps_2 = (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_2)
        #     elif i == 3:
        #         eps_3 = (1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                 1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_3)
        #     elif i == 4:
        #         eps_4 = (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                          1.0 / len(sub_models))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_4)
        #     elif i == 5:
        #         eps_5 = (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                               1.0 / len(sub_models)))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_5)
        #     elif i == 6:
        #         eps_6 = (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                    1.0 / len(sub_models))))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_6)
        #
        #     elif i == 7:
        #         eps_7 = (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                         1.0 / len(sub_models)))))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_7)
        #     elif i == 8:
        #         eps_8 = (1 - (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                              1.0 / len(sub_models))))))) * (1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_8)
        #     elif i == 9:
        #         eps_9 = (1 - (1 - (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                                   1.0 / len(sub_models)))))))) * (
        #                         1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_9)
        #     elif i == 10:
        #         eps_10 = (1 - (1 - (1 - (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                                         1.0 / len(sub_models))))))))) * (
        #                          1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_10)
        #     elif i == 11:
        #         eps_11 = (1 - (1 - (1 - (1 - (1 - (1 - (1 - (1 - (
        #                 1 - (1 - (1 - 1.0 / len(sub_models)) * (1.0 / len(sub_models))) * (1.0 / len(sub_models))) * (
        #                                                              1.0 / len(sub_models)))))))))) * (
        #                          1.0 / len(sub_models)) * args.eps
        #         eps_all.append(eps_11)

        for j in range(args.steps):
            print('iterative step is :', j)
            if j == 0:
                for i, m in enumerate(sub_models):
                    logits = m(adv_x)
                    gradient = gen_grad(adv_x, logits, y)
                    adv_x_ = symbolic_fgs(adv_x,
                                          gradient,
                                          eps=eps_all[j],
                                          clipping=True)
                    x_advs[i] = adv_x_

                    X_adv = batch_eval([x, y], [adv_x_], [X_test, Y_test])[0]

                    err = tf_test_error_rate(m, x, X_adv, Y_test)
                    errs[i] = err
                adv_x = x_advs[errs.index(min(errs))]
            else:
                t = errs.index(min(errs))
                print('index of min value of errs:', t)
                logits = sub_models[t](adv_x)
                gradient = gen_grad(adv_x, logits, y)
                adv_x = symbolic_fgs(adv_x,
                                     gradient,
                                     eps=eps_all[j],
                                     clipping=True)

                for i, m in enumerate(sub_models):
                    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
                    err = tf_test_error_rate(m, x, X_adv, Y_test)
                    errs[i] = err
            print('error rate of each substitute models_oldest: ', errs)
            print('\t')
            if min(errs) >= 99:
                success_rate = sum(errs) / len(sub_models)
                print('success rate is: {:.3f}'.format(success_rate))
                break

        success_rate = sum(errs) / len(sub_models)
        print('success rate is: {:.3f}'.format(success_rate))

        X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
        np.save('results/iter_casc_0.2_leg_adv/X_adv_Iter_Casc_0.2.npy', X_adv)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)

        save_leg_adv_sample('results/iter_casc_0.2_leg_adv/', X_test, X_adv)

        # save adversarial example specified by user
        save_leg_adv_specified_by_user(
            'results/iter_casc_0.2_leg_adv_label_4/', X_test, X_adv, Y_test)
        return

    if attack == "stack_paral":
        # X_test = np.clip(
        #     X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)),
        #     0.0, 1.0)
        # eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2, sub_model_3)
        sub_models = [None] * len(sub_model_ens)

        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        errs = [None] * (len(sub_models) + 1)
        x_advs = [None] * len(sub_models)
        # print x_advs

        for i, m in enumerate(sub_models):
            # x = x + args.alpha * np.sign(np.random.randn(*x[0].shape))
            logits = m(x)
            gradient = gen_grad(x, logits, y)
            adv_x = symbolic_fgs(x, gradient, eps=args.eps / 2, clipping=True)
            x_advs[i] = adv_x

        # print x_advs
        adv_x_sum = x_advs[0]
        for i in range(len(sub_models)):
            if i == 0: continue
            adv_x_sum = adv_x_sum + x_advs[i]
        adv_x_mean = adv_x_sum / (len(sub_models))
        preds = src_model(adv_x_mean)
        grads = gen_grad(adv_x_mean, preds, y)
        adv_x = symbolic_fgs(adv_x_mean, grads, eps=args.eps, clipping=True)

        # compute the adversarial examples and evaluate
        for i, m in enumerate(sub_models + [src_model]):
            X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
            err = tf_test_error_rate(m, x, X_adv, Y_test)
            errs[i] = err

        # compute success rate
        success_rate = sum(errs) / (len(sub_models) + 1)
        print('success rate is: {:.3f}'.format(success_rate))

        # compute transfer rate
        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)

        # save adversarial examples
        np.save('results/stack_paral_0.2_leg_adv/X_adv_stack_paral_0.2.npy',
                X_adv)
        # save_leg_adv_sample(X_test, X_adv)
        save_leg_adv_sample('results/stack_paral_0.2_leg_adv/', X_test, X_adv)

        # save adversarial example specified by user
        save_leg_adv_specified_by_user(
            'results/stack_paral_0.2_leg_adv_label_4/', X_test, X_adv, Y_test)

        return

    if attack == "cascade_ensemble_2":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2)
        sub_models = [None] * len(sub_model_ens)

        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        x_advs = [([None] * len(sub_models)) for i in range(args.steps)]
        # print x_advs

        x_adv = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models):
                logits = m(x_adv)
                gradient = gen_grad(x_adv, logits, y)
                x_adv = symbolic_fgs(x_adv,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)
                x_advs[j][i] = x_adv

        # print x_advs
        adv_x_sum = x_advs[0][0]
        for j in range(args.steps):
            for i in range(len(sub_models)):
                if j == 0 and i == 0: continue
                adv_x_sum = adv_x_sum + x_advs[j][i]
        adv_x_mean = adv_x_sum / (args.steps * len(sub_models))
        preds = src_model(adv_x_mean)
        grads = gen_grad(adv_x_mean, preds, y)
        adv_x = symbolic_fgs(adv_x_mean,
                             grads,
                             eps=args.eps / args.steps,
                             clipping=True)

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name),
                                      err)
Ejemplo n.º 17
0
def main(attack, src_model_name, target_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)

    set_flags(20)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    K.set_session(tf.Session(config=config))

    x = K.placeholder(
        (None, FLAGS.NUM_CHANNELS, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = load_data()

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print('{}: {:.1f}'.format(basename(src_model_name), 100 - err))

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print('{}: {:.1f}'.format(basename(name), 100 - err))
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rfgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rfgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "pgd":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps,
                         alpha=args.eps / 10.0)

    if attack == 'mim':
        adv_x = momentum_fgs(src_model, x, y, eps=args.eps)

    print('start')
    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
    print('-----done----')
    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print('{}->{}: {:.1f}'.format(basename(src_model_name),
                                  basename(src_model_name), 100 - err))

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print('{}->{}: {:.1f}'.format(basename(src_model_name), basename(name),
                                      100 - err))
Ejemplo n.º 18
0
def main(attack, src_model_name, target_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    K.set_session(tf.Session(config=config))

    if args.dataset == "mnist":
        K.set_image_data_format('channels_last')
        set_mnist_flags()
        x = K.placeholder(
            (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))
        y = K.placeholder((None, FLAGS.NUM_CLASSES))
        _, _, X_test, Y_test = data_mnist()
        # source model for crafting adversarial examples
        src_model = load_model_mnist(src_model_name)
        sd = 0.7

    elif args.dataset == "cifar10":
        set_flags(20)
        K.set_image_data_format('channels_first')
        x = K.placeholder(
            (None, FLAGS.NUM_CHANNELS, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS))
        y = K.placeholder((None, FLAGS.NUM_CLASSES))
        _, _, X_test, Y_test = load_data()
        # source model for crafting adversarial examples
        src_model = load_model(src_model_name)
        sd = 100. / 255.

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        acc = tf_test_acc(src_model, x, X_test, Y_test)
        print('{}: {:.1f}'.format(basename(src_model_name), acc))

        for (name, target_model) in zip(target_model_names, target_models):
            acc = tf_test_acc(target_model, x, X_test, Y_test)
            print('{}: {:.1f}'.format(basename(name), acc))
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rfgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rfgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "pgd":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=eps,
                         alpha=eps / 10.0)

    if attack == 'so':
        adv_x = so(src_model,
                   x,
                   y,
                   steps=args.steps,
                   eps=eps,
                   alpha=eps / 10.0,
                   norm=args.norm,
                   sd=sd)

    print('start')
    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
    # pdb.set_trace()
    print('-----done----')
    # white-box attack
    acc = tf_test_acc(src_model, x, X_adv, Y_test, sd=sd, num_iter=10)
    with open('attacks.txt', 'a') as log:
        log.write('{}->{}: {:.1f}, size = {:.4f}\n'.format(
            basename(src_model_name), basename(src_model_name), acc, eps))

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        acc = tf_test_acc(target_model, x, X_adv, Y_test, sd=sd, num_iter=10)
        with open('attacks.txt', 'a') as log:
            log.write('{}->{}: {:.1f}, size = {:.4f}\n'.format(
                basename(src_model_name), basename(name), acc, eps))
def white_box_fgsm_iter(prediction, target_model, x, logits, y, X_test,
                        X_test_ini, targets, targets_cat, eps, dim, beta):
    #Get gradient from model
    if args.loss_type == 'xent':
        grad = gen_grad(x, logits, y)
    elif args.loss_type == 'cw':
        real = tf.reduce_sum(y * logits, 1)
        other = tf.reduce_max((1 - y) * logits - (y * 10000), 1)
        if '_un' in args.method:
            loss = tf.maximum(0.0, real - other + args.conf)
        else:
            loss = tf.maximum(0.0, other - real + args.conf)
        grad = K.gradients(loss, [x])[0]

    # normalized gradient
    if args.norm == 'linf':
        normed_grad = K.sign(grad)
    elif args.norm == 'l2':
        normed_grad = K.l2_normalize(grad, axis=(1, 2, 3))

    # Multiply by constant epsilon
    scaled_grad = beta * normed_grad

    # Add perturbation to original example to obtain adversarial example
    if args.loss_type == 'xent':
        if '_un' in args.method:
            adv_x_t = K.stop_gradient(x + scaled_grad)
        else:
            adv_x_t = K.stop_gradient(x - scaled_grad)
    elif args.loss_type == 'cw':
        adv_x_t = K.stop_gradient(x - scaled_grad)

    adv_x_t = K.clip(adv_x_t, CLIP_MIN, CLIP_MAX)

    X_test_ini_mod = X_test_ini[:BATCH_SIZE * BATCH_EVAL_NUM]
    targets_cat_mod = targets_cat[:BATCH_SIZE * BATCH_EVAL_NUM]
    targets_mod = targets[:BATCH_SIZE * BATCH_EVAL_NUM]

    X_adv_t = np.zeros_like(X_test_ini_mod)

    for i in range(BATCH_EVAL_NUM):
        X_test_slice = X_test[i * (BATCH_SIZE):(i + 1) * (BATCH_SIZE)]
        X_test_ini_slice = X_test_ini[i * (BATCH_SIZE):(i + 1) * (BATCH_SIZE)]
        targets_cat_slice = targets_cat[i * (BATCH_SIZE):(i + 1) *
                                        (BATCH_SIZE)]
        X_adv_curr = X_test_slice
        for k in range(args.num_iter):
            X_adv_curr = K.get_session().run([adv_x_t],
                                             feed_dict={
                                                 x: X_adv_curr,
                                                 y: targets_cat_slice
                                             })[0]
            r = X_adv_curr - X_test_ini_slice
            r = np.clip(r, -eps, eps)
            X_adv_curr = X_test_ini_slice + r
        X_adv_t[i * (BATCH_SIZE):(i + 1) * (BATCH_SIZE)] = np.clip(
            X_adv_curr, CLIP_MIN, CLIP_MAX)

    adv_pred_np = K.get_session().run([prediction], feed_dict={x: X_adv_t})[0]

    # _, _, white_box_error = tf_test_error_rate(target_model, x, X_adv_t, targets_cat_mod)
    white_box_error = 100.0 * np.sum(
        np.argmax(adv_pred_np, 1) != targets_mod) / adv_pred_np.shape[0]
    if '_un' not in args.method:
        white_box_error = 100.0 - white_box_error

    wb_norm = np.mean(
        np.linalg.norm(
            (X_adv_t - X_test_ini_mod).reshape(BATCH_SIZE * BATCH_EVAL_NUM,
                                               dim),
            axis=1))
    print('Average white-box l2 perturbation: {}'.format(wb_norm))

    wb_write_out(eps, white_box_error, wb_norm)

    return
def main(attack, src_model_name, target_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 32, 'Size of batches')
    set_mnist_flags()

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = data_mnist()

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.3f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.3f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:1000]
        Y_test = Y_test[0:1000]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                          basename(name), err)

        return

    if attack == "cascade_ensemble":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2, sub_model_3, sub_model_4,
                         sub_model_5, sub_model_6, sub_model_7)
        sub_models = [None] * len(sub_model_ens)
        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        adv_x = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models + [src_model]):
                logits = m(adv_x)
                gradient = gen_grad(adv_x, logits, y)
                adv_x = symbolic_fgs(adv_x,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)

    if attack == "parallel_ensemble":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

        sub_model_ens = (sub_model_1, sub_model_2, sub_model_3)
        sub_models = [None] * len(sub_model_ens)

        for i in range(len(sub_model_ens)):
            sub_models[i] = load_model(sub_model_ens[i])

        x_advs = [([None] * len(sub_models)) for i in range(args.steps)]
        print x_advs

        x_adv = x
        for j in range(args.steps):
            for i, m in enumerate(sub_models):
                logits = m(x_adv)
                gradient = gen_grad(x_adv, logits, y)
                x_adv = symbolic_fgs(x_adv,
                                     gradient,
                                     eps=args.eps / args.steps,
                                     clipping=True)
                x_advs[j][i] = x_adv

        print x_advs
        adv_x_mean = x_advs[0][0]
        for j in range(args.steps):
            for i in range(len(sub_models)):
                if j == 0 and i == 0: continue
                adv_x_mean = adv_x_mean + x_advs[j][i]
        xadv = adv_x_mean / (args.steps * len(sub_models))
        preds = src_model(xadv)
        grads = gen_grad(xadv, preds, y)
        adv_x = symbolic_fgs(xadv, grads, eps=args.eps, clipping=True)

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.3f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name),
                                      err)