Exemple #1
0
def main(args):
    with tf.Session() as sess:

        random.seed(SEED)
        np.random.seed(SEED)
        tf.set_random_seed(SEED)

        image_id_set = np.random.choice(range(1000),
                                        args["image_number"] * 3,
                                        replace=False)
        #image_id_set = np.random.randint(1, 1000, args["image_number"] )
        arg_max_iter = args['maxiter']  ### max number of iterations
        arg_init_const = args[
            'init_const']  ### regularization prior to attack loss
        arg_kappa = args['kappa']  ### attack confidence level
        arg_q = args['q']  ### number of random direction vectors
        arg_mode = args['mode']  ### algorithm name
        arg_save_iteration = args['save_iteration']
        arg_Dataset = args["dataset"]
        arg_targeted_attack = args["targeted_attack"]
        arg_bsz = args["mini_batch_sz"]
        idx_lr = args["lr_idx"]

        ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5]
        if (arg_Dataset == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess, True)
        elif (arg_Dataset == 'cifar10'):
            data, model = CIFAR(), CIFARModel("models/cifar", sess, True)
        elif (arg_Dataset == 'imagenet'):
            data, model = ImageNet(SEED), InceptionModel(sess, True)
        else:
            print('Please specify a valid dataset')

        succ_count, ii, iii = 0, 0, 0
        final_distortion_count,first_iteration_count, first_distortion_count = [], [], []
        while iii < args["image_number"]:
            ii = ii + 1
            image_id = image_id_set[ii]

            # if image_id!= 836: continue # for test only

            orig_prob, orig_class, orig_prob_str = util.model_prediction(
                model, np.expand_dims(data.test_data[image_id],
                                      axis=0))  ## orig_class: predicted label;

            if arg_targeted_attack:  ### target attack
                target_label = np.remainder(orig_class + 1, 10)
            else:
                target_label = orig_class

            orig_img, target = util.generate_data(data, image_id, target_label)
            # shape of orig_img is (1,28,28,1) in [-0.5, 0.5]

            true_label_list = np.argmax(data.test_labels, axis=1)
            true_label = true_label_list[image_id]

            print("Image ID:{}, infer label:{}, true label:{}".format(
                image_id, orig_class, true_label))
            if true_label != orig_class:
                print(
                    "True Label is different from the original prediction, pass!"
                )
                continue
            else:
                iii = iii + 1

            print('\n', iii, '/', args["image_number"])

            ##  parameter
            d = orig_img.size  # feature dim
            print("dimension = ", d)

            # mu=1/d**2  # smoothing parameter
            q = arg_q + 0
            I = arg_max_iter + 0
            kappa = arg_kappa + 0
            const = arg_init_const + 0

            ## flatten image to vec
            orig_img_vec = np.resize(orig_img, (1, d))
            delta_adv = np.zeros((1, d))  ### initialized adv. perturbation
            #delta_adv = np.random.uniform(-16/255,16/255,(1,d))

            ## w adv image initialization
            if args["constraint"] == 'uncons':
                # * 0.999999 to avoid +-0.5 return +-infinity
                w_ori_img_vec = np.arctanh(
                    2 * (orig_img_vec) * 0.999999
                )  # in real value, note that orig_img_vec in [-0.5, 0.5]
                w_img_vec = np.arctanh(
                    2 * (np.clip(orig_img_vec + delta_adv, -0.5, 0.5)) *
                    0.999999)
            else:
                w_ori_img_vec = orig_img_vec.copy()
                w_img_vec = np.clip(w_ori_img_vec + delta_adv, -0.5, 0.5)

            # ## test ##
            # for test_value in w_ori_img_vec[0, :]:
            #     if np.isnan(test_value) or np.isinf(test_value):
            #         print(test_value)

            # initialize the best solution & best loss
            best_adv_img = []  # successful adv image in [-0.5, 0.5]
            best_delta = []  # best perturbation
            best_distortion = (0.5 * d)**2  # threshold for best perturbation
            total_loss = np.zeros(I)  ## I: max iters
            l2s_loss_all = np.zeros(I)
            attack_flag = False
            first_flag = True  ## record first successful attack

            # parameter setting for ZO gradient estimation
            mu = args["mu"]  ### smoothing parameter

            ## learning rate
            base_lr = args["lr"]

            if arg_mode == "ZOAdaMM":
                ## parameter initialization for AdaMM
                v_init = 1e-7  #0.00001
                v_hat = v_init * np.ones((1, d))
                v = v_init * np.ones((1, d))

                m = np.zeros((1, d))
                # momentum parameter for first and second order moment
                beta_1 = 0.9
                beta_2 = 0.9  # only used by AMSGrad
                print(beta_1, beta_2)

            #for i in tqdm(range(I)):
            for i in range(I):

                if args["decay_lr"]:
                    base_lr = args["lr"] / np.sqrt(i + 1)

                ## Total loss evaluation
                if args["constraint"] == 'uncons':
                    total_loss[i], l2s_loss_all[
                        i] = function_evaluation_uncons(
                            w_img_vec, kappa, target_label, const, model,
                            orig_img, arg_targeted_attack)

                else:
                    total_loss[i], l2s_loss_all[i] = function_evaluation_cons(
                        w_img_vec, kappa, target_label, const, model, orig_img,
                        arg_targeted_attack)

                ## gradient estimation w.r.t. w_img_vec
                if arg_mode == "ZOSCD":
                    grad_est = grad_coord_estimation(mu, q, w_img_vec, d,
                                                     kappa, target_label,
                                                     const, model, orig_img,
                                                     arg_targeted_attack,
                                                     args["constraint"])
                elif arg_mode == "ZONES":
                    grad_est = gradient_estimation_NES(mu, q, w_img_vec, d,
                                                       kappa, target_label,
                                                       const, model, orig_img,
                                                       arg_targeted_attack,
                                                       args["constraint"])
                else:
                    grad_est = gradient_estimation_v2(mu, q, w_img_vec, d,
                                                      kappa, target_label,
                                                      const, model, orig_img,
                                                      arg_targeted_attack,
                                                      args["constraint"])

                # if np.remainder(i,50)==0:
                # print("total loss:",total_loss[i])
                # print(np.linalg.norm(grad_est, np.inf))

                ## ZO-Attack, unconstrained optimization formulation
                if arg_mode == "ZOSGD":
                    delta_adv = delta_adv - base_lr * grad_est
                if arg_mode == "ZOsignSGD":
                    delta_adv = delta_adv - base_lr * np.sign(grad_est)
                if arg_mode == "ZOSCD":
                    delta_adv = delta_adv - base_lr * grad_est
                if arg_mode == "ZOAdaMM":
                    m = beta_1 * m + (1 - beta_1) * grad_est
                    v = beta_2 * v + (1 - beta_2) * np.square(grad_est)  ### vt
                    v_hat = np.maximum(v_hat, v)
                    #print(np.mean(v_hat))
                    delta_adv = delta_adv - base_lr * m / np.sqrt(v_hat)
                    if args["constraint"] == 'cons':
                        tmp = delta_adv.copy()
                        #X_temp = orig_img_vec.reshape((-1,1))
                        #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10))
                        V_temp = np.sqrt(v_hat.reshape(1, -1))
                        delta_adv = projection_box(tmp, orig_img_vec, V_temp,
                                                   -0.5, 0.5)
                        #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5)
                    # v_init = 1e-2 #0.00001
                    # v = v_init * np.ones((1, d))
                    # m = np.zeros((1, d))
                    # # momentum parameter for first and second order moment
                    # beta_1 = 0.9
                    # beta_2 = 0.99  # only used by AMSGrad
                    # m = beta_1 * m + (1-beta_1) * grad_est
                    # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v)
                    # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10)
                    # if args["constraint"] == 'cons':
                    #     V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10))
                    #     X_temp = orig_img_vec.reshape((-1,1))
                    #     delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5)
                if arg_mode == "ZOSMD":
                    delta_adv = delta_adv - 0.5 * base_lr * grad_est
                    # delta_adv = delta_adv - base_lr* grad_est
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X_temp = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)
                if arg_mode == "ZOPSGD":
                    delta_adv = delta_adv - base_lr * grad_est
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X_temp = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)
                if arg_mode == "ZONES":
                    delta_adv = delta_adv - base_lr * np.sign(grad_est)
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)

                # if arg_mode == "ZO-AdaFom":
                #     m = beta_1 * m + (1-beta_1) * grad_est
                #     v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1)
                #     w_img_vec = w_img_vec - base_lr * m/np.sqrt(v)
                ##

                ### adv. example update
                w_img_vec = w_ori_img_vec + delta_adv

                ## covert back to adv_img in [-0.5 , 0.5]
                if args["constraint"] == 'uncons':
                    adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999  #
                else:
                    adv_img_vec = w_img_vec.copy()

                adv_img = np.resize(adv_img_vec, orig_img.shape)

                ## update the best solution in the iterations
                attack_prob, _, _ = util.model_prediction(model, adv_img)
                target_prob = attack_prob[0, target_label]
                attack_prob_tmp = attack_prob.copy()
                attack_prob_tmp[0, target_label] = 0
                other_prob = np.amax(attack_prob_tmp)

                if args["print_iteration"]:
                    if np.remainder(i + 1, 1) == 0:
                        if true_label != np.argmax(attack_prob):
                            print(
                                "Iter %d (Succ): ID = %d, lr = %3.5f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d"
                                % (i + 1, image_id, args["lr"],
                                   int(args["decay_lr"]), arg_mode,
                                   args["constraint"], total_loss[i],
                                   l2s_loss_all[i], true_label,
                                   np.argmax(attack_prob)))
                        else:
                            print(
                                "Iter %d (Fail): ID = %d, lr = %3.6f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d"
                                % (i + 1, image_id, args["lr"],
                                   int(args["decay_lr"]), arg_mode,
                                   args["constraint"], total_loss[i],
                                   l2s_loss_all[i], true_label,
                                   np.argmax(attack_prob)))

                if arg_save_iteration:
                    os.system("mkdir Examples")
                    if (np.logical_or(
                            true_label != np.argmax(attack_prob),
                            np.remainder(i + 1,
                                         10) == 0)):  ## every 10 iterations
                        suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format(
                            image_id, arg_mode, true_label,
                            np.argmax(attack_prob), i + 1)
                        # util.save_img(adv_img, "Examples/{}.png".format(suffix))

                if arg_targeted_attack:
                    if (np.log(target_prob + 1e-10) -
                            np.log(other_prob + 1e-10) >=
                            kappa):  # check attack confidence
                        if (distortion(adv_img, orig_img) <
                                best_distortion):  # check distortion
                            # print('best distortion obtained at',i,'-th iteration')
                            best_adv_img = adv_img
                            best_distortion = distortion(adv_img, orig_img)
                            best_delta = adv_img - orig_img
                            best_iteration = i + 1
                            adv_class = np.argmax(attack_prob)
                            attack_flag = True
                            ## Record first attack
                            if (first_flag):
                                first_flag = False  ### once gets into this, it will no longer record the next sucessful attack
                                first_adv_img = adv_img
                                first_distortion = distortion(
                                    adv_img, orig_img)
                                first_delta = adv_img - orig_img
                                first_class = adv_class
                                first_iteration = i + 1
                else:
                    if (np.log(other_prob + 1e-10) -
                            np.log(target_prob + 1e-10) >=
                            kappa):  # check attack confidence
                        if (distortion(adv_img, orig_img) <
                                best_distortion):  # check distortion
                            # print('best distortion obtained at',i,'-th iteration')
                            best_adv_img = adv_img
                            best_distortion = distortion(adv_img, orig_img)
                            best_delta = adv_img - orig_img
                            best_iteration = i + 1
                            adv_class = np.argmax(attack_prob)
                            attack_flag = True
                            ## Record first attack
                            if (first_flag):
                                first_flag = False
                                first_adv_img = adv_img
                                first_distortion = distortion(
                                    adv_img, orig_img)
                                first_delta = adv_img - orig_img
                                first_class = adv_class
                                first_iteration = i + 1

            if (attack_flag):
                # os.system("mkdir Results_SL")
                # ## best attack (final attack)
                # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, orig_class) ## orig_class, predicted label
                # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, adv_class)
                # suffix3 = "id_{}_Mode_{}".format(image_id, arg_mode)
                # ### save original image
                # util.save_img(orig_img, "Results_SL/id_{}.png".format(image_id))
                # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix))
                # ### adv. image
                # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2))
                # ### adv. perturbation
                # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3))
                #
                #
                # ## first attack
                # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, first_class)
                # ## first adv. imag
                # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4))
                # ### first adv. perturbation
                # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3))

                ## save data
                succ_count = succ_count + 1
                final_distortion_count.append(l2s_loss_all[-1])
                first_distortion_count.append(first_distortion)
                first_iteration_count.append(first_iteration)
                suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format(
                    image_id, arg_mode, args["constraint"], args["lr"],
                    int(args["decay_lr"]), args["exp_code"])
                np.savez("{}".format(suffix0),
                         id=image_id,
                         mode=arg_mode,
                         loss=total_loss,
                         perturbation=l2s_loss_all,
                         best_distortion=best_distortion,
                         first_distortion=first_distortion,
                         first_iteration=first_iteration,
                         best_iteation=best_iteration,
                         learn_rate=args["lr"],
                         decay_lr=args["decay_lr"],
                         attack_flag=attack_flag)
                ## print
                print("It takes {} iteations to find the first attack".format(
                    first_iteration))
                # print(total_loss)
            else:
                ## save data
                suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format(
                    image_id, arg_mode, args["constraint"], args["lr"],
                    int(args["decay_lr"]), args["exp_code"])
                np.savez("{}".format(suffix0),
                         id=image_id,
                         mode=arg_mode,
                         loss=total_loss,
                         perturbation=l2s_loss_all,
                         best_distortion=best_distortion,
                         learn_rate=args["lr"],
                         decay_lr=args["decay_lr"],
                         attack_flag=attack_flag)
                print("Attack Fails")

            sys.stdout.flush()
    print('succ rate:', succ_count / args["image_number"])
    print('average first success l2', np.mean(first_distortion_count))
    print('average first itrs', np.mean(first_iteration_count))
    print('average l2:', np.mean(final_distortion_count), ' best l2:',
          np.min(final_distortion_count), ' worst l2:',
          np.max(final_distortion_count))
Exemple #2
0
## This program is licenced under the BSD 2-Clause licence,
## contained in the LICENCE file in this directory.

from setup_cifar import CIFAR, CIFARModel
from setup_mnist import MNIST, MNISTModel
from setup_inception import ImageNet, InceptionModel

import tensorflow as tf
import numpy as np

BATCH_SIZE = 1

with tf.Session() as sess:
    data, model = MNIST(), MNISTModel("models/mnist", sess)
    data, model = CIFAR(), CIFARModel("models/cifar", sess)
    data, model = ImageNet(), InceptionModel(sess)

    x = tf.placeholder(
        tf.float32,
        (None, model.image_size, model.image_size, model.num_channels))
    y = model.predict(x)

    r = []
    for i in range(0, len(data.test_data), BATCH_SIZE):
        pred = sess.run(y, {x: data.test_data[i:i + BATCH_SIZE]})
        #print(pred)
        #print('real',data.test_labels[i],'pred',np.argmax(pred))
        r.append(
            np.argmax(pred, 1) == np.argmax(data.test_labels[i:i +
                                                             BATCH_SIZE], 1))
        print(np.mean(r))
def main(args):
    with tf.Session() as sess:
        if (args['dataset'] == 'mnist'):
            data = MNIST()
            inception = False
            if (args['adversarial'] != "none"):
                model = MNISTModel(
                    "models/mnist_cw" + str(args['adversarial']), sess)
            elif (args['temp']):
                model = MNISTModel(
                    "models/mnist-distilled-" + str(args['temp']), sess)
            else:
                model = MNISTModel("models/mnist", sess)
        if (args['dataset'] == "cifar"):
            data = CIFAR()
            inception = False
            if (args['adversarial'] != "none"):
                model = CIFARModel(
                    "models/cifar_cw" + str(args['adversarial']), sess)
            elif (args['temp']):
                model = CIFARModel(
                    "models/cifar-distilled-" + str(args['temp']), sess)
            else:
                model = CIFARModel("models/cifar", sess)
        if (args['dataset'] == "imagenet"):
            data, model = ImageNet(args['seed_imagenet'],
                                   2 * args['numimg']), InceptionModel(sess)
            inception = True

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            targeted=not args['untargeted'],
            target_num=args['targetnum'],
            inception=inception,
            train=args['train'],
            seed=args['seed'])
        timestart = time.time()
        if (args['restore_np']):
            if (args['train']):
                adv = np.load(
                    str(args['dataset']) + '_' + str(args['attack']) +
                    '_train.npy')
            else:
                adv = np.load(
                    str(args['dataset']) + '_' + str(args['attack']) + '.npy')
        else:
            if (args['attack'] == 'L2'):
                attack = CarliniL2(sess,
                                   model,
                                   batch_size=args['batch_size'],
                                   max_iterations=args['maxiter'],
                                   confidence=args['conf'],
                                   initial_const=args['init_const'],
                                   binary_search_steps=args['binary_steps'],
                                   targeted=not args['untargeted'],
                                   beta=args['beta'],
                                   abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'L1'):
                attack = EADL1(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'EN'):
                attack = EADEN(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            """If untargeted, pass labels instead of targets"""
            if (args['attack'] == 'FGSM'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=np.inf,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'FGML1'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=1,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'FGML2'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=2,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)

            if (args['attack'] == 'IFGSM'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=np.inf,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'IFGML1'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=1,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'IFGML2'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=2,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)

        timeend = time.time()

        if args['untargeted']:
            num_targets = 1
        else:
            num_targets = args['targetnum']
        print("Took", timeend - timestart, "seconds to run",
              len(inputs) / num_targets, "random instances.")

        if (args['save_np']):
            if (args['train']):
                np.save(str(args['dataset']) + '_labels_train.npy', labels)
                np.save(
                    str(args['dataset']) + '_' + str(args['attack']) +
                    '_train.npy', adv)
            else:
                np.save(
                    str(args['dataset']) + '_' + str(args['attack'] + '.npy'),
                    adv)

        r_best_ = []
        d_best_l1_ = []
        d_best_l2_ = []
        d_best_linf_ = []
        r_average_ = []
        d_average_l1_ = []
        d_average_l2_ = []
        d_average_linf_ = []
        r_worst_ = []
        d_worst_l1_ = []
        d_worst_l2_ = []
        d_worst_linf_ = []

        #Transferability Tests
        model_ = []
        model_.append(model)
        if (args['targetmodel'] != "same"):
            if (args['targetmodel'] == "dd_100"):
                model_.append(MNISTModel("models/mnist-distilled-100", sess))
        num_models = len(model_)

        if (args['show']):
            if not os.path.exists(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack'])):
                os.makedirs(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack']))
        for m, model in enumerate(model_):
            r_best = []
            d_best_l1 = []
            d_best_l2 = []
            d_best_linf = []
            r_average = []
            d_average_l1 = []
            d_average_l2 = []
            d_average_linf = []
            r_worst = []
            d_worst_l1 = []
            d_worst_l2 = []
            d_worst_linf = []
            for i in range(0, len(inputs), num_targets):
                pred = []
                for j in range(i, i + num_targets):
                    if inception:
                        pred.append(
                            np.reshape(model.model.predict(adv[j:j + 1]),
                                       (data.test_labels[0:1].shape)))
                    else:
                        pred.append(model.model.predict(adv[j:j + 1]))

                dist_l1 = 1e10
                dist_l1_index = 1e10
                dist_linf = 1e10
                dist_linf_index = 1e10
                dist_l2 = 1e10
                dist_l2_index = 1e10
                for k, j in enumerate(range(i, i + num_targets)):
                    success = False
                    if (args['untargeted']):
                        if (np.argmax(pred[k], 1) != np.argmax(
                                targets[j:j + 1], 1)):
                            success = True
                    else:
                        if (np.argmax(pred[k],
                                      1) == np.argmax(targets[j:j + 1], 1)):
                            success = True
                    if (success):
                        if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1):
                            dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                            dist_l1_index = j
                        if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf):
                            dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                            dist_linf_index = j
                        if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2):
                            dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                            dist_l2_index = j
                if (dist_l1_index != 1e10):
                    d_best_l2.append((np.sum(
                        (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                    d_best_l1.append(
                        np.sum(
                            np.abs(adv[dist_l1_index] -
                                   inputs[dist_l1_index])))
                    d_best_linf.append(
                        np.amax(
                            np.abs(adv[dist_linf_index] -
                                   inputs[dist_linf_index])))
                    r_best.append(1)
                else:
                    r_best.append(0)

                rand_int = np.random.randint(i, i + num_targets)
                if inception:
                    pred_r = np.reshape(
                        model.model.predict(adv[rand_int:rand_int + 1]),
                        (data.test_labels[0:1].shape))
                else:
                    pred_r = model.model.predict(adv[rand_int:rand_int + 1])
                success_average = False
                if (args['untargeted']):
                    if (np.argmax(pred_r, 1) != np.argmax(
                            targets[rand_int:rand_int + 1], 1)):
                        success_average = True
                else:
                    if (np.argmax(pred_r, 1) == np.argmax(
                            targets[rand_int:rand_int + 1], 1)):
                        success_average = True
                if success_average:
                    r_average.append(1)
                    d_average_l2.append(
                        np.sum((adv[rand_int] - inputs[rand_int])**2)**.5)
                    d_average_l1.append(
                        np.sum(np.abs(adv[rand_int] - inputs[rand_int])))
                    d_average_linf.append(
                        np.amax(np.abs(adv[rand_int] - inputs[rand_int])))

                else:
                    r_average.append(0)

                dist_l1 = 0
                dist_l1_index = 1e10
                dist_linf = 0
                dist_linf_index = 1e10
                dist_l2 = 0
                dist_l2_index = 1e10
                for k, j in enumerate(range(i, i + num_targets)):
                    failure = True
                    if (args['untargeted']):
                        if (np.argmax(pred[k], 1) != np.argmax(
                                targets[j:j + 1], 1)):
                            failure = False
                    else:
                        if (np.argmax(pred[k],
                                      1) == np.argmax(targets[j:j + 1], 1)):
                            failure = False
                    if failure:
                        r_worst.append(0)
                        dist_l1_index = 1e10
                        dist_l2_index = 1e10
                        dist_linf_index = 1e10
                        break
                    else:
                        if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1):
                            dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                            dist_l1_index = j
                        if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf):
                            dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                            dist_linf_index = j
                        if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2):
                            dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                            dist_l2_index = j
                if (dist_l1_index != 1e10):
                    d_worst_l2.append((np.sum(
                        (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                    d_worst_l1.append(
                        np.sum(
                            np.abs(adv[dist_l1_index] -
                                   inputs[dist_l1_index])))
                    d_worst_linf.append(
                        np.amax(
                            np.abs(adv[dist_linf_index] -
                                   inputs[dist_linf_index])))
                    r_worst.append(1)

                if (args['show'] and m == (num_models - 1)):
                    for j in range(i, i + num_targets):
                        target_id = np.argmax(targets[j:j + 1], 1)
                        label_id = np.argmax(labels[j:j + 1], 1)
                        prev_id = np.argmax(
                            np.reshape(model.model.predict(inputs[j:j + 1]),
                                       (data.test_labels[0:1].shape)), 1)
                        adv_id = np.argmax(
                            np.reshape(model.model.predict(adv[j:j + 1]),
                                       (data.test_labels[0:1].shape)), 1)
                        suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format(
                            true_ids[i], target_id, label_id, prev_id, adv_id,
                            adv_id == target_id,
                            np.sum(np.abs(adv[j] - inputs[j])),
                            np.sum((adv[j] - inputs[j])**2)**.5,
                            np.amax(np.abs(adv[j] - inputs[j])))

                        show(
                            inputs[j:j + 1],
                            str(args['save']) + "/" + str(args['dataset']) +
                            "/" + str(args['attack']) +
                            "/original_{}.png".format(suffix))
                        show(
                            adv[j:j + 1],
                            str(args['save']) + "/" + str(args['dataset']) +
                            "/" + str(args['attack']) +
                            "/adversarial_{}.png".format(suffix))
            if (m != (num_models - 1)):
                lbl = "Src_"
                if (num_models > 2):
                    lbl += str(m) + "_"
            else:
                lbl = "Tgt_"
            if (num_targets > 1):
                print(lbl + 'best_case_L1_mean', np.mean(d_best_l1))
                print(lbl + 'best_case_L2_mean', np.mean(d_best_l2))
                print(lbl + 'best_case_Linf_mean', np.mean(d_best_linf))
                print(lbl + 'best_case_prob', np.mean(r_best))
                print(lbl + 'average_case_L1_mean', np.mean(d_average_l1))
                print(lbl + 'average_case_L2_mean', np.mean(d_average_l2))
                print(lbl + 'average_case_Linf_mean', np.mean(d_average_linf))
                print(lbl + 'average_case_prob', np.mean(r_average))
                print(lbl + 'worst_case_L1_mean', np.mean(d_worst_l1))
                print(lbl + 'worst_case_L2_mean', np.mean(d_worst_l2))
                print(lbl + 'worst_case_Linf_mean', np.mean(d_worst_linf))
                print(lbl + 'worst_case_prob', np.mean(r_worst))
            else:
                print(lbl + 'L1_mean', np.mean(d_average_l1))
                print(lbl + 'L2_mean', np.mean(d_average_l2))
                print(lbl + 'Linf_mean', np.mean(d_average_linf))
                print(lbl + 'success_prob', np.mean(r_average))
Exemple #4
0
def main(args):
    temp_encoder = encoder(level=args['level'])
    with tf.Session() as sess:
        use_log = not args['use_zvalue']
        is_inception = args['dataset'] == "imagenet"
        # load network
        print('Loading model', args['dataset'])
        if args['dataset'] == "mnist":
            data, model = MNIST(), MNISTModel("models/mnist", sess, use_log)
            # data, model =  MNIST(), MNISTModel("models/mnist-distilled-100", sess, use_log)
        elif args['dataset'] == "cifar10":
            #data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log)
            # data, model = CIFAR(), CIFARModel("models/cifar-distilled-100", sess, use_log)
            data, model = CIFAR(), CIFAR_WIDE("models/wide_resnet", sess,
                                              use_log)
        elif args['dataset'] == "imagenet":
            data, model = ImageNet(), InceptionModel(sess, use_log)
        print('Done...')
        if args['numimg'] == 0:
            args['numimg'] = len(data.test_labels) - args['firstimg']
        print('Using', args['numimg'], 'test images')
        # load attack module
        if args['attack'] == "white":
            # batch size 1, optimize on 1 image at a time, rather than optimizing images jointly
            attack = CarliniL2(sess,
                               model,
                               batch_size=1,
                               max_iterations=args['maxiter'],
                               print_every=args['print_every'],
                               early_stop_iters=args['early_stop_iters'],
                               confidence=0,
                               learning_rate=args['lr'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               use_log=use_log,
                               adam_beta1=args['adam_beta1'],
                               adam_beta2=args['adam_beta2'])
        else:
            # batch size 128, optimize on 128 coordinates of a single image
            attack = BlackBoxL2(sess,
                                model,
                                batch_size=128,
                                max_iterations=args['maxiter'],
                                print_every=args['print_every'],
                                early_stop_iters=args['early_stop_iters'],
                                confidence=0,
                                learning_rate=args['lr'],
                                initial_const=args['init_const'],
                                binary_search_steps=args['binary_steps'],
                                targeted=not args['untargeted'],
                                use_log=use_log,
                                use_tanh=args['use_tanh'],
                                use_resize=args['use_resize'],
                                adam_beta1=args['adam_beta1'],
                                adam_beta2=args['adam_beta2'],
                                reset_adam_after_found=args['reset_adam'],
                                solver=args['solver'],
                                save_ckpts=args['save_ckpts'],
                                load_checkpoint=args['load_ckpt'],
                                start_iter=args['start_iter'],
                                init_size=args['init_size'],
                                use_importance=not args['uniform'])

        random.seed(args['seed'])
        np.random.seed(args['seed'])
        print('Generate data')
        all_inputs, all_targets, all_labels, all_true_ids, encoding_all = generate_data(
            data,
            samples=args['numimg'],
            targeted=not args['untargeted'],
            start=args['firstimg'],
            inception=is_inception)
        print('Done...')
        #print('all_inputs : ', all_inputs.shape)
        #print('encoding_all : ',encoding_all.shape)
        os.system("mkdir -p {}/{}".format(args['save'], args['dataset']))
        img_no = 0
        total_success = 0
        l2_total = 0.0
        origin_correct = 0
        adv_correct = 0
        for i in range(all_true_ids.size):
            print(' adversarial_image_no: ', i)
            inputs = all_inputs[i:i + 1]
            encoding_inputs = encoding_all[i:i + 1]
            #print('encoding_inputs shape: ', encoding_inputs)
            targets = all_targets[i:i + 1]
            labels = all_labels[i:i + 1]
            print("true labels:", np.argmax(labels), labels)
            print("target:", np.argmax(targets), targets)
            # test if the image is correctly classified
            original_predict = model.model.predict(encoding_inputs)
            original_predict = np.squeeze(original_predict)
            original_prob = np.sort(original_predict)
            original_class = np.argsort(original_predict)
            print("original probabilities:", original_prob[-1:-6:-1])
            print("original classification:", original_class[-1:-6:-1])
            print("original probabilities (most unlikely):", original_prob[:6])
            print("original classification (most unlikely):",
                  original_class[:6])
            if original_class[-1] != np.argmax(labels):
                print(
                    "skip wrongly classified image no. {}, original class {}, classified as {}"
                    .format(i, np.argmax(labels), original_class[-1]))
                continue
            origin_correct += np.argmax(labels, 1) == original_class[-1]

            img_no += 1
            timestart = time.time()
            adv, const = attack.attack_batch(inputs, targets)
            if type(const) is list:
                const = const[0]
            if len(adv.shape) == 3:
                adv = adv.reshape((1, ) + adv.shape)
            timeend = time.time()
            l2_distortion = np.sum((adv - inputs)**2)**.5

            ##### llj
            encode_adv = np.transpose(adv, axes=(0, 3, 1, 2))
            channel0, channel1, channel2 = encode_adv[:,
                                                      0, :, :], encode_adv[:,
                                                                           1, :, :], encode_adv[:,
                                                                                                2, :, :]
            channel0, channel1, channel2 = temp_encoder.tempencoding(
                channel0), temp_encoder.tempencoding(
                    channel1), temp_encoder.tempencoding(channel2)
            encode_adv = np.concatenate([channel0, channel1, channel2], axis=1)
            encode_adv = np.transpose(encode_adv, axes=(0, 2, 3, 1))

            #### llj
            adversarial_predict = model.model.predict(encode_adv)
            adversarial_predict = np.squeeze(adversarial_predict)
            adversarial_prob = np.sort(adversarial_predict)
            adversarial_class = np.argsort(adversarial_predict)
            print("adversarial probabilities:", adversarial_prob[-1:-6:-1])
            print("adversarial classification:", adversarial_class[-1:-6:-1])

            adv_correct += np.argmax(labels, 1) == adversarial_class[-1]

            success = False
            if args['untargeted']:
                if adversarial_class[-1] != original_class[-1]:
                    success = True
            else:
                if adversarial_class[-1] == np.argmax(targets):
                    success = True
            if l2_distortion > 20.0:
                success = False
            if success:
                total_success += 1
                l2_total += l2_distortion
            suffix = "id{}_seq{}_prev{}_adv{}_{}_dist{}".format(
                all_true_ids[i], i, original_class[-1], adversarial_class[-1],
                success, l2_distortion)
            print("Saving to", suffix)
            show(
                inputs,
                "{}/{}/{}_original_{}.png".format(args['save'],
                                                  args['dataset'], img_no,
                                                  suffix))
            show(
                adv,
                "{}/{}/{}_adversarial_{}.png".format(args['save'],
                                                     args['dataset'], img_no,
                                                     suffix))
            show(
                adv - inputs,
                "{}/{}/{}_diff_{}.png".format(args['save'], args['dataset'],
                                              img_no, suffix))
            print(
                "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}"
                .format(img_no, i, all_true_ids[i], timeend - timestart,
                        success, const, original_class[-1],
                        adversarial_class[-1], l2_distortion,
                        total_success / float(img_no),
                        0 if total_success == 0 else l2_total / total_success))
            sys.stdout.flush()

        print(' origin accuracy : ',
              100.0 * origin_correct / all_true_ids.size)
        print(' adv accuracy : ', 100.0 * adv_correct / all_true_ids.size)
    else:  # untargeted attack, target_label is true label
        Loss1 = const * np.max(
            [np.log(orig_prob[:, target_label] + 1e-10) - np.log(np.amax(tmp, 1) + 1e-10), [-kappa] * n], 0)

    Loss1 = np.mean(Loss1)
    # Loss2 = np.linalg.norm(img[0] - orig_img[0]) ** 2  ### squared norm # check img[0] - orig_img[0],
    return Loss1  # , Loss2


# data, model = MNIST(), MNISTModel("models/mnist", sess, False)
# data, model = CIFAR(), CIFARModel("models/cifar", sess, True)
data = ImageNet_Universal(SEED)
g1 = tf.Graph()
with g1.as_default():
    sess = tf.Session(graph=g1)
    model1 = InceptionModel(sess, True)

g3 = tf.Graph()
with g3.as_default():
    sess = tf.Session(graph=g3)
    model3 = ResnetModel50(sess, True)

models = [model1, model3]

kappa = 0

const = 100
arg_targeted_attack = False
true_classes = [18, 162]
class_n = 20
orig_img, true_label, target_label = [], [], []
        else:
            inputs.append(data.test_data[start + i])
            targets.append(data.test_labels[start + i])

    inputs = np.array(inputs)
    targets = np.array(targets)

    return inputs, targets


if __name__ == "__main__":
    with tf.Session() as sess:
        use_log = True
        # data, model =  MNIST(), MNISTModel("models/mnist", sess, use_log)
        # data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log)
        data, model = ImageNet(), InceptionModel(sess, use_log)
        attack = BlackBoxL2(sess,
                            model,
                            batch_size=128,
                            max_iterations=15000,
                            confidence=0,
                            use_log=use_log)

        inputs, targets = generate_data(data,
                                        samples=1,
                                        targeted=True,
                                        start=6,
                                        inception=False)
        inputs = inputs[1:2]
        targets = targets[1:2]
        timestart = time.time()
Exemple #7
0
def main(args):
    #   gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)
    with tf.Session() as sess:
        if args['dataset'] == 'mnist':
            data, model = MNIST(), MNISTModel("models/mnist", sess)
            handpick = False
            inception = False
        if args['dataset'] == "cifar":
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            handpick = True
            inception = False
        if args['dataset'] == "imagenet":
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess)
            handpick = True
            inception = True

        if args['adversarial'] != "none":
            model = MNISTModel(
                "models/mnist_cwl2_admm" + str(args['adversarial']), sess)

        if args['temp'] and args['dataset'] == 'mnist':
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']),
                               sess)
        if args['temp'] and args['dataset'] == 'cifar':
            model = CIFARModel("models/cifar-distilled-" + str(args['temp']),
                               sess)

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            targeted=args['targeted'],
            start=0,
            inception=inception,
            handpick=handpick,
            seed=args['seed'])

        #print(true_ids)
        if args['attack'] == 'L2C':
            attack = CarliniL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               targeted=args['targeted'],
                               binary_search_steps=args['binary_steps'],
                               abort_early=args['abort_early'])

        if args['attack'] == 'L2BB':
            # score-based ZO-ADMM attack
            attack = LADMMBB(sess,
                             model,
                             batch_size=args['batch_size'],
                             max_iterations=args['maxiter'],
                             targeted=args['targeted'],
                             confidence=args['conf'],
                             binary_search_steps=args['iteration_steps'],
                             ro=args['ro'],
                             abort_early=args['abort_early'],
                             gama=args['gama'],
                             epi=args['epi'],
                             alpha=args['alpha'])

        timestart = time.time()
        #    adv = attack.attack(inputs, targets)
        adv, querycount, queryl2 = attack.attack(inputs, targets)
        timeend = time.time()
        print("Took", timeend - timestart, "seconds to run", len(inputs),
              "samples.\n")

        if args['train']:
            np.save('labels_train.npy', labels)
            np.save(str(args['attack']) + '_train.npy', adv)

        if (args['conf'] != 0):
            model = MNISTModel("models/mnist-distilled-100", sess)

        if args['targeted']:
            l1_l2_li_computation(args, data, model, adv, inception, inputs,
                                 targets, labels, true_ids, querycount,
                                 queryl2)
        else:
            l2_computation(args, data, model, adv, inception, inputs, targets,
                           labels, true_ids, querycount, queryl2)
Exemple #8
0
def main(args):

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        use_log = not args['use_zvalue']
        is_inception = args['dataset'] == "imagenet"
        # load network
        print('Loading model', args['dataset'])
        if args['dataset'] == "mnist":
            data, model = MNIST(), MNISTModel("models/mnist", sess, use_log)
            # data, model =  MNIST(), MNISTModel("models/mnist-distilled-100", sess, use_log)
        elif args['dataset'] == "cifar10":
            data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log)
            # data, model = CIFAR(), CIFARModel("models/cifar-distilled-100", sess, use_log)
        elif args['dataset'] == "imagenet":
            data, model = ImageNet(), InceptionModel(sess, use_log)
        print('Done...')
        if args['numimg'] == 0:
            args['numimg'] = len(data.test_labels) - args['firstimg']
        print('Using', args['numimg'], 'test images')
        # load attack module
        if args['attack'] == "white":
            # batch size 1, optimize on 1 image at a time, rather than optimizing images jointly
            attack = CarliniL2(sess,
                               model,
                               batch_size=1,
                               max_iterations=args['maxiter'],
                               print_every=args['print_every'],
                               early_stop_iters=args['early_stop_iters'],
                               confidence=0,
                               learning_rate=args['lr'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               use_log=use_log,
                               adam_beta1=args['adam_beta1'],
                               adam_beta2=args['adam_beta2'])
        else:
            # batch size 128, optimize on 128 coordinates of a single image
            attack = BlackBoxL2(sess,
                                model,
                                batch_size=128,
                                max_iterations=args['maxiter'],
                                print_every=args['print_every'],
                                early_stop_iters=args['early_stop_iters'],
                                confidence=0,
                                learning_rate=args['lr'],
                                initial_const=args['init_const'],
                                binary_search_steps=args['binary_steps'],
                                targeted=not args['untargeted'],
                                use_log=use_log,
                                use_tanh=args['use_tanh'],
                                use_resize=args['use_resize'],
                                adam_beta1=args['adam_beta1'],
                                adam_beta2=args['adam_beta2'],
                                reset_adam_after_found=args['reset_adam'],
                                solver=args['solver'],
                                save_ckpts=args['save_ckpts'],
                                load_checkpoint=args['load_ckpt'],
                                start_iter=args['start_iter'],
                                init_size=args['init_size'],
                                use_importance=not args['uniform'])

        random.seed(args['seed'])
        np.random.seed(args['seed'])
        print('Generate data')
        all_inputs, all_targets, all_labels, all_true_ids = generate_data(
            data,
            samples=args['numimg'],
            targeted=not args['untargeted'],
            start=args['firstimg'],
            inception=is_inception)
        print('Done...')
        os.system("mkdir -p {}/{}".format(args['save'], args['dataset']))
        img_no = 0
        total_success = 0
        l2_total = 0.0
        for i in range(all_true_ids.size):
            inputs = all_inputs[i:i + 1]
            targets = all_targets[i:i + 1]
            labels = all_labels[i:i + 1]
            print("true labels:", np.argmax(labels), labels)
            print("target:", np.argmax(targets), targets)
            # test if the image is correctly classified
            original_predict = model.model.predict(inputs)
            original_predict = np.squeeze(original_predict)
            original_prob = np.sort(original_predict)
            original_class = np.argsort(original_predict)
            print("original probabilities:", original_prob[-1:-6:-1])
            print("original classification:", original_class[-1:-6:-1])
            print("original probabilities (most unlikely):", original_prob[:6])
            print("original classification (most unlikely):",
                  original_class[:6])
            if original_class[-1] != np.argmax(labels):
                print(
                    "skip wrongly classified image no. {}, original class {}, classified as {}"
                    .format(i, np.argmax(labels), original_class[-1]))
                continue

            img_no += 1
            timestart = time.time()
            adv, const = attack.attack_batch(inputs, targets)
            if type(const) is list:
                const = const[0]
            if len(adv.shape) == 3:
                adv = adv.reshape((1, ) + adv.shape)
            timeend = time.time()
            l2_distortion = np.sum((adv - inputs)**2)**.5
            adversarial_predict = model.model.predict(adv)
            adversarial_predict = np.squeeze(adversarial_predict)
            adversarial_prob = np.sort(adversarial_predict)
            adversarial_class = np.argsort(adversarial_predict)
            print("adversarial probabilities:", adversarial_prob[-1:-6:-1])
            print("adversarial classification:", adversarial_class[-1:-6:-1])
            success = False
            if args['untargeted']:
                if adversarial_class[-1] != original_class[-1]:
                    success = True
            else:
                if adversarial_class[-1] == np.argmax(targets):
                    success = True
            if l2_distortion > 20.0:
                success = False
            if success:
                total_success += 1
                l2_total += l2_distortion
            suffix = "id{}_seq{}_prev{}_adv{}_{}_dist{}".format(
                all_true_ids[i], i, original_class[-1], adversarial_class[-1],
                success, l2_distortion)
            print("Saving to", suffix)
            show(
                inputs,
                "{}/{}/{}_original_{}.png".format(args['save'],
                                                  args['dataset'], img_no,
                                                  suffix))
            show(
                adv,
                "{}/{}/{}_adversarial_{}.png".format(args['save'],
                                                     args['dataset'], img_no,
                                                     suffix))
            show(
                adv - inputs,
                "{}/{}/{}_diff_{}.png".format(args['save'], args['dataset'],
                                              img_no, suffix))
            print(
                "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}"
                .format(img_no, i, all_true_ids[i], timeend - timestart,
                        success, const, original_class[-1],
                        adversarial_class[-1], l2_distortion,
                        total_success / float(img_no),
                        0 if total_success == 0 else l2_total / total_success))

            with open(args['save'] + "/report.txt", 'a') as f:
                f.write("*" * 20)
                to_write = "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}".format(
                    img_no, i, all_true_ids[i], timeend - timestart, success,
                    const, original_class[-1], adversarial_class[-1],
                    l2_distortion, total_success / float(img_no),
                    0 if total_success == 0 else l2_total / total_success)
                f.write(to_write)
                f.write("*" * 20)
                f.write("\n\n")

            sys.stdout.flush()
Exemple #9
0
def main(args):
    with tf.Session() as sess:
        if args['dataset'] == 'mnist':
            data, model = MNIST(), MNISTModel("models/mnist", sess)
            handpick = False
            inception = False
        if args['dataset'] == "cifar":
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            handpick = True
            inception = False
        if args['dataset'] == "imagenet":
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess)
            handpick = True
            inception = True

        if args['adversarial'] != "none":
            model = MNISTModel(
                "models/mnist_cwl2_admm" + str(args['adversarial']), sess)

        if args['temp'] and args['dataset'] == 'mnist':
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']),
                               sess)
        if args['temp'] and args['dataset'] == 'cifar':
            model = CIFARModel("models/cifar-distilled-" + str(args['temp']),
                               sess)

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            targeted=True,
            start=0,
            inception=inception,
            handpick=handpick,
            seed=args['seed'])

        #print(true_ids)
        if args['attack'] == 'L2C':
            attack = CarliniL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['binary_steps'],
                               abort_early=args['abort_early'])

        if args['attack'] == 'L0A':
            attack = ADMML0(sess,
                            model,
                            batch_size=args['batch_size'],
                            max_iterations=args['maxiter'],
                            confidence=args['conf'],
                            binary_search_steps=args['iteration_steps'],
                            ro=args['ro'],
                            abort_early=args['abort_early'])

        timestart = time.time()
        adv = attack.attack(inputs, targets)
        timeend = time.time()

        print("Took", timeend - timestart, "seconds to run", len(inputs),
              "samples.\n")

        if args['train']:
            np.save('labels_train.npy', labels)
            np.save(str(args['attack']) + '_train.npy', adv)

        if (args['conf'] != 0):
            model = MNISTModel("models/mnist-distilled-100", sess)

        if args['attack'] != 'L0A' and args['attack'] != 'L0AE' and args[
                'attack'] != 'L0C' and args['attack'] != 'L0AE2':
            l1_l2_li_computation(args, data, model, adv, inception, inputs,
                                 targets, labels, true_ids)
        else:
            l0_computation(args, data, model, adv, inception, inputs, targets,
                           labels, true_ids)
Exemple #10
0
def main(args):

    with tf.Session() as sess:
        if args['dataset'] == 'mnist':
            data, model = MNIST(), MNISTModel("models/mnist", sess)
            handpick = False
            inception = False
        if args['dataset'] == "cifar":
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            handpick = True
            inception = False
        if args['dataset'] == "imagenet":
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess)
            handpick = True
            inception = True

        if args['adversarial'] != "none":
            model = MNISTModel("models/mnist_cwl2_admm" + str(args['adversarial']), sess)

        if args['temp'] and args['dataset'] == 'mnist':
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess)
        if args['temp'] and args['dataset'] == 'cifar':
            model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess)

        inputs, targets, labels, true_ids = generate_data_ST(data, model, samples=args['numimg'],
                                                             samplesT=args['numimgT'], targeted=True,
                                        start=0, inception=inception, handpick=handpick, seed=args['seed'])
        #print(true_ids)
        if args['attack'] == 'L2C':
            attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['binary_steps'],
                               abort_early=args['abort_early'])

        if args['attack'] == 'L2LA2':
            attack = LADMML2re(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'],
                               layernum=args['layer_number'], use_kernel=args['use_kernel'],
                               confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'],
                               abort_early=args['abort_early'])


        timestart = time.time()
        adv = attack.attack(inputs, targets)
        timeend = time.time()

        print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n")

        if args['conf'] != 0:
            model = MNISTModel("models/mnist-distilled-100", sess)

        if args['kernel_bias']:
            EP = evaluate_perturbation_kb(args, sess, model, inputs)
            scores, l2 = EP(inputs, targets, adv)
            EPT = evaluate_perturbation_testset(args, sess, model, data.test_data)
            test_scores = EPT(data.test_data, data.test_labels)
            EP2 = evaluate_perturbation_kb_restore(args, sess, model, inputs)
            scores2 = EP2(inputs, targets, adv)
            EPT2 = evaluate_perturbation_testset(args, sess, model, data.test_data)
            test_scores2 = EPT2(data.test_data, data.test_labels)
        else:
            EP = evaluate_perturbation(args, sess, model, inputs)
#        scores = EP(inputs, targets, adv)
#        scores2 = EP2(inputs, targets, adv)

        score_count = []
        score_count2 = []
        score_count3 = []

        score_count4 = []
        for e, (sc) in enumerate(scores):

            if np.argmax(sc) == np.argmax(targets[e]):
                score_count.append(1)
                if e < args['numimg']:
                    score_count4.append(1)
            else:
                score_count.append(0)
                if e < args['numimg']:
                    score_count4.append(0)

        for e, (sc) in enumerate(scores):
            if np.argmax(sc) == np.argmax(labels[e]):
                score_count3.append(1)
            else:
                score_count3.append(0)

        for e, (sc2) in enumerate(scores2):
            if np.argmax(sc2) == np.argmax(labels[e]):
                score_count2.append(1)
            else:
                score_count2.append(0)

        test_score_count = []
        test_score_count2 = []

        for e, (tsc) in enumerate(test_scores):

            if np.argmax(tsc) == np.argmax(data.test_labels[e]):
                test_score_count.append(1)
            else:
                test_score_count.append(0)

        for e, (tsc2) in enumerate(test_scores2):

            if np.argmax(tsc2) == np.argmax(data.test_labels[e]):
                test_score_count2.append(1)
            else:
                test_score_count2.append(0)

        l0s = np.count_nonzero(adv)
        successrate = np.mean(score_count)
        successrate2 = np.mean(score_count2)
        successrate3 = np.mean(score_count3)
        test_successrate = np.mean(test_score_count)
        test_successrate2 = np.mean(test_score_count2)

        print('original model, success rate of T images for the original labels:', successrate2)
        print('modified model, success rate of T images for the original labels:', successrate3)
        print('modified model, success rate of T images for the target labels:', successrate)
        print('modified model, success rate of S imges for the target labels:', np.mean(score_count4))

        print('modified model, success rate of test set for the original labels:', test_successrate)
        print('original model, success rate of test set for the original labels:', test_successrate2)
        print('l0 distance:', l0s)
        print('l2 distance:', l2)
Exemple #11
0
flags.DEFINE_bool('adaptive', True, 'Turns on the dynamic scaling of mutation prameters')
flags.DEFINE_string('model', 'inception', 'model name')
flags.DEFINE_integer('target', None, 'target class. if not provided will be random')
FLAGS = flags.FLAGS

if __name__ == '__main__':

    # random.seed(FLAGS.seed)
    # tf.set_random_seed(FLAGS.seed)
    # np.random.seed(FLAGS.seed)

    dataset = ImageNet(FLAGS.input_dir)
    inputs, targets, reals, paths = utils.generate_data(dataset, FLAGS.test_size)
    
    with tf.Session() as sess:
        model = InceptionModel(sess, use_log=True)
        test_in = tf.placeholder(tf.float32, (1,299,299,3), 'x')
        test_pred = tf.argmax(model.predict(test_in), axis=1)
        
         
        attack = GenAttack2(model=model,
                pop_size=FLAGS.pop_size,
                mutation_rate = FLAGS.mutation_rate,
                eps=FLAGS.eps,
                max_steps=FLAGS.max_steps,
                alpha=FLAGS.alpha,
                resize_dim=FLAGS.resize_dim,
                adaptive=FLAGS.adaptive)
        num_valid_images = len(inputs)
        total_count = 0 # Total number of images attempted
        success_count = 0
Exemple #12
0
def main(args):
    #   gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)
    with tf.Session() as sess:
        if args['dataset'] == 'mnist':
            data, model = MNIST(), MadryMNISTModel("models/secret/", sess)
            handpick = False
            inception = False
        if args['dataset'] == "cifar":
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            #data, model = CIFAR(), MadryCIFARModel("models/model_0/", sess)
            handpick = True
            inception = False
        if args['dataset'] == "imagenet":
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(
                sess, False)
            handpick = True
            inception = True

        if args['adversarial'] != "none":
            model = MNISTModel(
                "models/mnist_cwl2_admm" + str(args['adversarial']), sess)

        if args['temp'] and args['dataset'] == 'mnist':
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']),
                               sess)
        if args['temp'] and args['dataset'] == 'cifar':
            model = MadryCIFARModel(
                "models/cifar-distilled-" + str(args['temp']), sess)

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            targeted=True,
            target_num=args['target_number'],
            start=0,
            inception=inception,
            handpick=handpick,
            seed=args['seed'])

        #print(true_ids)
        if args['attack'] == 'L2C':
            attack = CarliniL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['binary_steps'],
                               abort_early=args['abort_early'])
        if args['attack'] == 'LiCW':
            attack = CarliniLi(sess,
                               model,
                               max_iterations=args['maxiter'],
                               abort_early=args['abort_early'])

        if args['attack'] == 'L2A':
            attack = ADMML2(sess,
                            model,
                            batch_size=args['batch_size'],
                            max_iterations=args['maxiter'],
                            confidence=args['conf'],
                            binary_search_steps=args['iteration_steps'],
                            ro=args['ro'],
                            abort_early=args['abort_early'])

        if args['attack'] == 'L2AE':
            attack = ADMML2en(sess,
                              model,
                              batch_size=args['batch_size'],
                              max_iterations=args['maxiter'],
                              confidence=args['conf'],
                              binary_search_steps=args['binary_steps'],
                              ro=args['ro'],
                              iteration_steps=args['iteration_steps'],
                              abort_early=args['abort_early'])

        if args['attack'] == 'L2LA':
            attack = LADMML2(sess,
                             model,
                             batch_size=args['batch_size'],
                             max_iterations=args['maxiter'],
                             confidence=args['conf'],
                             binary_search_steps=args['iteration_steps'],
                             ro=args['ro'],
                             abort_early=args['abort_early'])
        if args['attack'] == 'L2LAST':
            attack = LADMMSTL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['iteration_steps'],
                               ro=args['ro'],
                               abort_early=args['abort_early'],
                               retrain=args['retrain'])

        if args['attack'] == 'LiIF':
            attack = IFGM(sess,
                          model,
                          batch_size=args['batch_size'],
                          ord=np.inf,
                          inception=inception)
        if args['attack'] == 'LiF':
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=np.inf,
                         inception=inception)

        if args['attack'] == 'L1':
            attack = EADL1(sess,
                           model,
                           batch_size=args['batch_size'],
                           max_iterations=args['maxiter'],
                           confidence=args['conf'],
                           binary_search_steps=args['binary_steps'],
                           beta=args['beta'],
                           abort_early=args['abort_early'])

        if args['attack'] == 'L1EN':
            attack = EADEN(sess,
                           model,
                           batch_size=args['batch_size'],
                           max_iterations=args['maxiter'],
                           confidence=args['conf'],
                           binary_search_steps=args['binary_steps'],
                           beta=args['beta'],
                           abort_early=args['abort_early'])

        if args['attack'] == 'L1IFGM':
            attack = IFGM(sess,
                          model,
                          batch_size=args['batch_size'],
                          ord=1,
                          inception=inception)
        if args['attack'] == 'L2IFGM':
            attack = IFGM(sess,
                          model,
                          batch_size=args['batch_size'],
                          ord=2,
                          inception=inception)

        if args['attack'] == 'L1FGM':
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=1,
                         inception=inception)
        if args['attack'] == 'L2FGM':
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=2,
                         inception=inception)

        timestart = time.time()
        adv = attack.attack(inputs, targets)
        timeend = time.time()

        print("Took", timeend - timestart, "seconds to run", len(inputs),
              "samples.\n")

        if args['train']:
            np.save('labels_train.npy', labels)
            np.save(str(args['attack']) + '_train.npy', adv)

        #if (args['conf'] != 0):
        #    model = MNISTModel("models/mnist-distilled-100", sess)

        l1_l2_li_computation(args, data, model, adv, inception, inputs,
                             targets, labels, true_ids)
            targets.append(data.test_labels[start + i])

    inputs = np.array(inputs)
    targets = np.array(targets)
    new_inputs = np.array(new_inputs)
    new_targets = np.array(new_targets)

    return inputs, targets, new_inputs, new_targets


if __name__ == "__main__":
    with tf.Session() as sess:
        #data, model =  MNIST(), MNISTModel("models/mnist", sess)
        #data, model =  CIFAR(), CIFARModel("models/cifar", sess)
        data = ImageNet()
        model = InceptionModel(sess)
        attack = CarliniL2(sess,
                           model,
                           batch_size=1,
                           max_iterations=1000,
                           confidence=0)
        #attack = CarliniL0(sess, model, max_iterations=1000, initial_const=10,
        #                   largest_const=15)
        inputs, targets, new_inputs, new_targets = generate_data(
            data, samples=10, targeted=True, start=0, inception=True)
        """
        #total_attack = []        
        #target_all = np.zeros( (10,1008) )
        #D2 = new_inputs[9]
        #D2 = D2 + .5
        #D2 = D2 * 255
Exemple #14
0
def main(args):
    with tf.Session() as sess:
        if (args['dataset'] == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess)
            handpick = False
            inception = False
        if (args['dataset'] == "cifar"):
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            handpick = True
            inception = False
        if (args['dataset'] == "imagenet"):
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess)
            handpick = True
            inception = True

        if (args['adversarial'] != "none"):
            model = MNISTModel("models/mnist_cw" + str(args['adversarial']),
                               sess)

        if (args['temp'] and args['dataset'] == 'mnist'):
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']),
                               sess)
        if (args['temp'] and args['dataset'] == 'cifar'):
            model = CIFARModel("models/cifar-distilled-" + str(args['temp']),
                               sess)

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            inception=inception,
            handpick=handpick,
            train=args['train'],
            seed=args['seed'])
        timestart = time.time()
        if (args['attack'] == 'L2'):
            attack = CarliniL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['binary_steps'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'L1'):
            attack = EADL1(sess,
                           model,
                           batch_size=args['batch_size'],
                           max_iterations=args['maxiter'],
                           confidence=args['conf'],
                           binary_search_steps=args['binary_steps'],
                           beta=args['beta'],
                           abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'EN'):
            attack = EADEN(sess,
                           model,
                           batch_size=args['batch_size'],
                           max_iterations=args['maxiter'],
                           confidence=args['conf'],
                           binary_search_steps=args['binary_steps'],
                           beta=args['beta'],
                           abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        """If untargeted, pass labels instead of targets"""
        if (args['attack'] == 'FGSM'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=np.inf,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'FGML1'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=1,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'FGML2'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=2,
                         inception=inception)
            adv = attack.attack(inputs, targets)

        if (args['attack'] == 'IFGSM'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=np.inf,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'IFGML1'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=1,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'IFGML2'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=2,
                         inception=inception)
            adv = attack.attack(inputs, targets)

        timeend = time.time()
        print("Took", timeend - timestart, "seconds to run",
              len(inputs) / args['batch_size'], "random instances.")

        if (args['train']):
            np.save('labels_train.npy', labels)
            np.save(str(args['attack']) + '_train.npy', adv)
            return

        r_best = []
        d_best_l1 = []
        d_best_l2 = []
        d_best_linf = []
        r_average = []
        d_average_l1 = []
        d_average_l2 = []
        d_average_linf = []
        r_worst = []
        d_worst_l1 = []
        d_worst_l2 = []
        d_worst_linf = []

        if (args['conf'] != 0):
            model = MNISTModel("models/mnist-distilled-100", sess)

        if (args['show']):
            if not os.path.exists(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack'])):
                os.makedirs(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack']))

        for i in range(0, len(inputs), args['batch_size']):

            pred = []
            for j in range(i, i + args['batch_size']):
                if inception:
                    pred.append(
                        np.reshape(model.model.predict(adv[j:j + 1]),
                                   (data.test_labels[0:1].shape)))
                else:
                    pred.append(model.model.predict(adv[j:j + 1]))

            dist_l1 = 1e10
            dist_l2 = 1e10
            dist_linf = 1e10
            dist_l1_index = 1e10
            dist_l2_index = 1e10
            dist_linf_index = 1e10
            for k, j in enumerate(range(i, i + args['batch_size'])):
                if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)):
                    if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1):
                        dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                        dist_l1_index = j
                    if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf):
                        dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                        dist_linf_index = j
                    if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2):
                        dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                        dist_l2_index = j
            if (dist_l1_index != 1e10):
                d_best_l2.append((np.sum(
                    (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                d_best_l1.append(
                    np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index])))
                d_best_linf.append(
                    np.amax(
                        np.abs(adv[dist_linf_index] -
                               inputs[dist_linf_index])))
                r_best.append(1)
            else:
                r_best.append(0)

            rand_int = np.random.randint(i, i + args['batch_size'])
            if inception:
                pred_r = np.reshape(
                    model.model.predict(adv[rand_int:rand_int + 1]),
                    (data.test_labels[0:1].shape))
            else:
                pred_r = model.model.predict(adv[rand_int:rand_int + 1])
            if (np.argmax(pred_r,
                          1) == np.argmax(targets[rand_int:rand_int + 1], 1)):
                r_average.append(1)
                d_average_l2.append(
                    np.sum((adv[rand_int] - inputs[rand_int])**2)**.5)
                d_average_l1.append(
                    np.sum(np.abs(adv[rand_int] - inputs[rand_int])))
                d_average_linf.append(
                    np.amax(np.abs(adv[rand_int] - inputs[rand_int])))

            else:
                r_average.append(0)

            dist_l1 = 0
            dist_l1_index = 1e10
            dist_linf = 0
            dist_linf_index = 1e10
            dist_l2 = 0
            dist_l2_index = 1e10
            for k, j in enumerate(range(i, i + args['batch_size'])):
                if (np.argmax(pred[k], 1) != np.argmax(targets[j:j + 1], 1)):
                    r_worst.append(0)
                    dist_l1_index = 1e10
                    dist_l2_index = 1e10
                    dist_linf_index = 1e10
                    break
                else:
                    if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1):
                        dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                        dist_l1_index = j
                    if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf):
                        dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                        dist_linf_index = j
                    if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2):
                        dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                        dist_l2_index = j
            if (dist_l1_index != 1e10):
                d_worst_l2.append((np.sum(
                    (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                d_worst_l1.append(
                    np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index])))
                d_worst_linf.append(
                    np.amax(
                        np.abs(adv[dist_linf_index] -
                               inputs[dist_linf_index])))
                r_worst.append(1)

            if (args['show']):
                for j in range(i, i + args['batch_size']):
                    target_id = np.argmax(targets[j:j + 1], 1)
                    label_id = np.argmax(labels[j:j + 1], 1)
                    prev_id = np.argmax(
                        np.reshape(model.model.predict(inputs[j:j + 1]),
                                   (data.test_labels[0:1].shape)), 1)
                    adv_id = np.argmax(
                        np.reshape(model.model.predict(adv[j:j + 1]),
                                   (data.test_labels[0:1].shape)), 1)
                    suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format(
                        true_ids[i], target_id, label_id, prev_id,
                        adv_id, adv_id == target_id,
                        np.sum(np.abs(adv[j] - inputs[j])),
                        np.sum((adv[j] - inputs[j])**2)**.5,
                        np.amax(np.abs(adv[j] - inputs[j])))

                    show(
                        inputs[j:j + 1],
                        str(args['save']) + "/" + str(args['dataset']) + "/" +
                        str(args['attack']) +
                        "/original_{}.png".format(suffix))
                    show(
                        adv[j:j + 1],
                        str(args['save']) + "/" + str(args['dataset']) + "/" +
                        str(args['attack']) +
                        "/adversarial_{}.png".format(suffix))

        print('best_case_L1_mean', np.mean(d_best_l1))
        print('best_case_L2_mean', np.mean(d_best_l2))
        print('best_case_Linf_mean', np.mean(d_best_linf))
        print('best_case_prob', np.mean(r_best))
        print('average_case_L1_mean', np.mean(d_average_l1))
        print('average_case_L2_mean', np.mean(d_average_l2))
        print('average_case_Linf_mean', np.mean(d_average_linf))
        print('average_case_prob', np.mean(r_average))
        print('worst_case_L1_mean', np.mean(d_worst_l1))
        print('worst_case_L2_mean', np.mean(d_worst_l2))
        print('worst_case_Linf_mean', np.mean(d_worst_linf))
        print('worst_case_prob', np.mean(r_worst))
Exemple #15
0
def main(args):
    with tf.Session() as sess:
        print("Loading data and classification model: {}".format(
            args["dataset"]))
        if args['dataset'] == "mnist":
            data, model = MNIST(), MNISTModel("models/mnist",
                                              sess,
                                              use_softmax=True)
        elif args['dataset'] == "cifar10":
            data, model = CIFAR(), CIFARModel("models/cifar",
                                              sess,
                                              use_softmax=True)
        elif args['dataset'] == "imagenet":
            # data, model = ImageNet(data_path=args["imagenet_dir"], targetFile=args["attack_single_img"]), InceptionModel(sess, use_softmax=True)
            data, model = ImageNetDataNP(), InceptionModel(sess,
                                                           use_softmax=True)
        # elif args['dataset'] == "imagenet_np":

        if len(data.test_labels) < args["num_img"]:
            raise Exception("No enough data, only have {} but need {}".format(
                len(data.test_labels), args["num_img"]))

        if args["attack_single_img"]:
            # manually setup attack set
            # attacking only one image with random attack]
            orig_img = data.test_data
            orig_labels = data.test_labels
            orig_img_id = np.array([1])

            if args["attack_type"] == "targeted":
                target_labels = [
                    np.eye(model.num_labels)[args["single_img_target_label"]]
                ]
            else:
                target_labels = orig_labels
        else:
            # generate attack set
            if args["dataset"] == "imagenet" or args[
                    "dataset"] == "imagenet_np":
                shift_index = True
            else:
                shift_index = False

        if args["random_target"] and (args["dataset"] == "imagenet"
                                      or args["dataset"] == "imagenet_np"):
            # find all possible class
            all_class = np.unique(np.argmax(data.test_labels, 1))
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                random_target_class=all_class,
                shift_index=shift_index)
        elif args["random_target"]:
            # random target on all possible classes
            class_num = data.test_labels.shape[1]
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                random_target_class=list(range(class_num)),
                shift_index=shift_index)
        else:
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                shift_index=shift_index)

            # check attack data
        # for i in range(len(orig_img_id)):
        #     tar_lab = np.argmax(target_labels[i])
        #     orig_lab = np.argmax(orig_labels[i])
        #     print("{}:, target label:{}, orig_label:{}, orig_img_id:{}".format(i, tar_lab, orig_lab, orig_img_id[i]))

        # attack related settings
        if args["attack_method"] == "zoo" or args[
                "attack_method"] == "autozoom_bilin":
            if args["img_resize"] is None:
                args["img_resize"] = model.image_size
                print(
                    "Argument img_resize is not set and not using autoencoder, set to image original size:{}"
                    .format(args["img_resize"]))

        if args["attack_method"] == "zoo" or args["attack_method"] == "zoo_ae":
            if args["batch_size"] is None:
                args["batch_size"] = 128
                print(
                    "Using zoo or zoo_ae attack, and batch_size is not set.\nSet batch_size to {}."
                    .format(args["batch_size"]))

        else:
            if args["batch_size"] is not None:
                print("Argument batch_size is not used")
                args["batch_size"] = 1  # force to be 1

        if args["attack_method"] == "zoo_ae" or args[
                "attack_method"] == "autozoom_ae":
            #_, decoder = util.load_codec(args["codec_prefix"])
            if args["dataset"] == "mnist" or args["dataset"] == "cifar10":
                codec = CODEC(model.image_size,
                              model.num_channels,
                              args["compress_mode"],
                              use_tanh=False)
            else:
                codec = CODEC(128, model.num_channels, args["compress_mode"])
            print(args["codec_prefix"])
            codec.load_codec(args["codec_prefix"])
            decoder = codec.decoder
            print(decoder.input_shape)
            args["img_resize"] = decoder.input_shape[1]
            print("Using autoencoder, set the attack image size to:{}".format(
                args["img_resize"]))

        # setup attack
        if args["attack_method"] == "zoo":
            blackbox_attack = ZOO(sess, model, args)
        elif args["attack_method"] == "zoo_ae":
            blackbox_attack = ZOO_AE(sess, model, args, decoder)
        elif args["attack_method"] == "autozoom_bilin":
            blackbox_attack = AutoZOOM_BiLIN(sess, model, args)
        elif args["attack_method"] == "autozoom_ae":
            blackbox_attack = AutoZOOM_AE(sess, model, args, decoder, codec)

        save_prefix = os.path.join(args["save_path"], args["dataset"],
                                   args["attack_method"], args["attack_type"])

        os.system("mkdir -p {}".format(save_prefix))

        total_success = 0
        l2_total = 0

        for i in range(all_orig_img_id.size):
            orig_img = all_orig_img[i:i + 1]
            target = all_target_labels[i:i + 1]
            label = all_orig_labels[i:i + 1]

            target_class = np.argmax(target)
            true_class = np.argmax(label)
            test_index = all_orig_img_id[i]

            # print information
            print(
                "[Info][Start]{}: test_index:{}, true label:{}, target label:{}"
                .format(i, test_index, true_class, target_class))
            if args["attack_method"] == "zoo_ae" or args[
                    "attack_method"] == "autozoom_ae":
                #print ae info
                if args["dataset"] == "mnist" or args["dataset"] == "cifar10":
                    temp_img = all_orig_img[i:i + 1]
                else:
                    temp_img = all_orig_img[i]
                    temp_img = (temp_img + 0.5) * 255
                    temp_img = scipy.misc.imresize(temp_img, (128, 128))
                    temp_img = temp_img / 255 - 0.5
                    temp_img = np.expand_dims(temp_img, axis=0)
                encode_img = codec.encoder.predict(temp_img)
                decode_img = codec.decoder.predict(encode_img)
                diff_img = (decode_img - temp_img)
                diff_mse = np.mean(diff_img.reshape(-1)**2)
                print("[Info][AE] MSE:{:.4f}".format(diff_mse))

            timestart = time.time()
            adv_img = blackbox_attack.attack(orig_img, target)
            timeend = time.time()

            if len(adv_img.shape) == 3:
                adv_img = np.expand_dims(adv_img, axis=0)

            l2_dist = np.sum((adv_img - orig_img)**2)**.5
            adv_class = np.argmax(model.model.predict(adv_img))

            success = False
            if args["attack_type"] == "targeted":
                if adv_class == target_class:
                    success = True
            else:
                if adv_class != true_class:
                    success = True

            if success:
                total_success += 1
                l2_total += l2_dist

            print(
                "[Info][End]{}: test_index:{}, true label:{}, adv label:{}, success:{}, distortion:{:.5f}, success_rate:{:.4f}, l2_avg:{:.4f}"
                .format(i, test_index, true_class, adv_class, success, l2_dist,
                        total_success / (i + 1),
                        0 if total_success == 0 else l2_total / total_success))

            # save images
            suffix = "id{}_testIndex{}_true{}_adv{}".format(
                i, test_index, true_class, adv_class)
            # original image
            save_name = os.path.join(save_prefix, "Orig_{}.png".format(suffix))
            util.save_img(orig_img, save_name)
            save_name = os.path.join(save_prefix, "Orig_{}.npy".format(suffix))
            np.save(save_name, orig_img)

            # adv image
            save_name = os.path.join(save_prefix, "Adv_{}.png".format(suffix))
            util.save_img(adv_img, save_name)
            save_name = os.path.join(save_prefix, "Adv_{}.npy".format(suffix))
            np.save(save_name, adv_img)

            # diff image
            save_name = os.path.join(save_prefix, "Diff_{}.png".format(suffix))
            util.save_img((adv_img - orig_img) / 2, save_name)
            save_name = os.path.join(save_prefix, "Diff_{}.npy".format(suffix))
            np.save(save_name, adv_img - orig_img)
Exemple #16
0
def main(args):
    with tf.Session() as sess:

        random.seed(SEED)
        np.random.seed(SEED)
        tf.set_random_seed(SEED)

        class_id = args['class_id']  ### input image (natural example)
        target_id = args[
            'target_id']  ### target images id (adv example) if target attack
        arg_max_iter = args['maxiter']  ### max number of iterations
        arg_init_const = args[
            'init_const']  ### regularization prior to attack loss
        arg_kappa = args['kappa']  ### attack confidence level
        arg_q = args['q']  ### number of random direction vectors
        arg_mode = args['mode']  ### algorithm name
        arg_save_iteration = args['save_iteration']
        arg_Dataset = args["dataset"]
        arg_targeted_attack = args["targeted_attack"]
        arg_bsz = args["mini_batch_sz"]
        idx_lr = args["lr_idx"]

        ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5]
        if (arg_Dataset == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess, True)
        elif (arg_Dataset == 'cifar10'):
            data, model = CIFAR(), CIFARModel("models/cifar", sess, True)
        elif (arg_Dataset == 'imagenet'):
            data, model = ImageNet_Universal(SEED), InceptionModel(sess, True)
            #model = InceptionModel(sess, True)
        else:
            print('Please specify a valid dataset')

        #orig_img = np.load('ori_img_backup.npy')
        orig_img = data.test_data[np.where(
            np.argmax(data.test_labels, 1) == class_id)]
        #np.save('ori_img_backup',orig_img)

        #true_label = data.test_labels[np.where(np.argmax(data.test_labels,1) == class_id)]
        _, orig_class = util.model_prediction_u(
            model, orig_img[:30]
        )  # take 30 or less images to make sure arg_bsz number of them are valid

        # filter out the images which misclassified already
        orig_img = orig_img[np.where(orig_class == class_id)]
        if orig_img.shape[0] < arg_bsz:
            assert 'no enough valid inputs'

        orig_img = orig_img[:arg_bsz]

        np.save('original_imgsID' + str(class_id), orig_img)
        #true_label = np.zeros((arg_bsz, 1001))
        #true_label[np.arange(arg_bsz), class_id] = 1
        true_label = class_id

        if arg_targeted_attack:  ### target attack
            #target_label = np.zeros((arg_bsz, 1001))
            #target_label[np.arange(arg_bsz), target_id] = 1
            target_label = target_id
        else:
            target_label = true_label

        #orig_img, target = util.generate_data(data, class_id, target_label)
        # shape of orig_img is (1,28,28,1) in [-0.5, 0.5]

        ##  parameter
        if orig_img.ndim == 3 or orig_img.shape[0] == 1:
            d = orig_img.size  # feature dim
        else:
            d = orig_img[0].size
        print("dimension = ", d)

        # mu=1/d**2  # smoothing parameter
        q = arg_q + 0
        I = arg_max_iter + 0
        kappa = arg_kappa + 0
        const = arg_init_const + 0

        ## flatten image to vec
        orig_img_vec = np.resize(orig_img, (arg_bsz, d))

        ## w adv image initialization
        if args["constraint"] == 'uncons':
            # * 0.999999 to avoid +-0.5 return +-infinity
            w_ori_img_vec = np.arctanh(
                2 * (orig_img_vec) * 0.999999
            )  # in real value, note that orig_img_vec in [-0.5, 0.5]
            w_img_vec = w_ori_img_vec.copy()
        else:
            w_ori_img_vec = orig_img_vec.copy()
            w_img_vec = w_ori_img_vec.copy()

        # ## test ##
        # for test_value in w_ori_img_vec[0, :]:
        #     if np.isnan(test_value) or np.isinf(test_value):
        #         print(test_value)

        delta_adv = np.zeros((1, d))  ### initialized adv. perturbation

        # initialize the best solution & best loss
        best_adv_img = []  # successful adv image in [-0.5, 0.5]
        best_delta = []  # best perturbation
        best_distortion = (0.5 * d)**2  # threshold for best perturbation
        total_loss = np.zeros(I)  ## I: max iters
        l2s_loss_all = np.zeros(I)
        attack_flag = False
        first_flag = True  ## record first successful attack

        # parameter setting for ZO gradient estimation
        mu = args["mu"]  ### smoothing parameter

        ## learning rate
        base_lr = args["lr"]

        if arg_mode == "ZOAdaMM":
            ## parameter initialization for AdaMM
            v_init = 1e-7  #0.00001
            v_hat = v_init * np.ones((1, d))
            v = v_init * np.ones((1, d))

            m = np.zeros((1, d))
            # momentum parameter for first and second order moment
            beta_1 = 0.9
            beta_2 = 0.3  # only used by AMSGrad
            print(beta_1, beta_2)

        #for i in tqdm(range(I)):
        for i in range(I):

            if args["decay_lr"]:
                base_lr = args["lr"] / np.sqrt(i + 1)

            ## Total loss evaluation
            if args["constraint"] == 'uncons':
                total_loss[i], l2s_loss_all[i] = function_evaluation_uncons(
                    w_img_vec, kappa, target_label, const, model, orig_img,
                    arg_targeted_attack)

            else:
                total_loss[i], l2s_loss_all[i] = function_evaluation_cons(
                    w_img_vec, kappa, target_label, const, model, orig_img,
                    arg_targeted_attack)

            ## gradient estimation w.r.t. w_img_vec
            if arg_mode == "ZOSCD":
                grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa,
                                                 target_label, const, model,
                                                 orig_img, arg_targeted_attack,
                                                 args["constraint"])
            elif arg_mode == "ZONES":
                grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa,
                                                   target_label, const, model,
                                                   orig_img,
                                                   arg_targeted_attack,
                                                   args["constraint"])
            else:
                grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa,
                                                  target_label, const, model,
                                                  orig_img,
                                                  arg_targeted_attack,
                                                  args["constraint"])

            # if np.remainder(i,50)==0:
            # print("total loss:",total_loss[i])
            # print(np.linalg.norm(grad_est, np.inf))

            ## ZO-Attack, unconstrained optimization formulation
            if arg_mode == "ZOSGD":
                delta_adv = delta_adv - base_lr * grad_est
            if arg_mode == "ZOsignSGD":
                delta_adv = delta_adv - base_lr * np.sign(grad_est)
            if arg_mode == "ZOSCD":
                delta_adv = delta_adv - base_lr * grad_est
            if arg_mode == "ZOAdaMM":
                m = beta_1 * m + (1 - beta_1) * grad_est
                v = beta_2 * v + (1 - beta_2) * np.square(grad_est)  ### vt
                #print(np.mean(np.abs(m)),np.mean(np.sqrt(v)))
                v_hat = np.maximum(v_hat, v)
                delta_adv = delta_adv - base_lr * m / np.sqrt(v)
                if args["constraint"] == 'cons':
                    tmp = delta_adv.copy()
                    #X_temp = orig_img_vec.reshape((-1,1))
                    #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10))
                    V_temp = np.sqrt(v_hat.reshape(1, -1))
                    delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5,
                                               0.5)
                    #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5)
                # v_init = 1e-2 #0.00001
                # v = v_init * np.ones((1, d))
                # m = np.zeros((1, d))
                # # momentum parameter for first and second order moment
                # beta_1 = 0.9
                # beta_2 = 0.99  # only used by AMSGrad
                # m = beta_1 * m + (1-beta_1) * grad_est
                # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v)
                # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10)
                # if args["constraint"] == 'cons':
                #     V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10))
                #     X_temp = orig_img_vec.reshape((-1,1))
                #     delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5)
            if arg_mode == "ZOSMD":
                delta_adv = delta_adv - 0.5 * base_lr * grad_est
                # delta_adv = delta_adv - base_lr* grad_est
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X_temp = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)
            if arg_mode == "ZOPSGD":
                delta_adv = delta_adv - base_lr * grad_est
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X_temp = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)
            if arg_mode == "ZONES":
                delta_adv = delta_adv - base_lr * np.sign(grad_est)
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)

            # if arg_mode == "ZO-AdaFom":
            #     m = beta_1 * m + (1-beta_1) * grad_est
            #     v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1)
            #     w_img_vec = w_img_vec - base_lr * m/np.sqrt(v)
            ##

            ### adv. example update
            w_img_vec = w_ori_img_vec + delta_adv

            ## covert back to adv_img in [-0.5 , 0.5]
            if args["constraint"] == 'uncons':
                adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999  #
            else:
                adv_img_vec = w_img_vec.copy()

            adv_img = np.resize(adv_img_vec, orig_img.shape)

            ## update the best solution in the iterations
            attack_prob, _, _ = util.model_prediction(model, adv_img)
            target_prob = attack_prob[:, target_label]
            attack_prob_tmp = attack_prob.copy()
            attack_prob_tmp[:, target_label] = 0
            other_prob = np.amax(attack_prob_tmp, 1)

            if i % 1000 == 0 and i != 0:
                if arg_mode == "ZOAdaMM": print(beta_1, beta_2)
                print("save delta_adv")
                np.save(
                    'retimgs/' + str(i) + 'itrs' +
                    str(np.argmax(attack_prob, 1)) + arg_mode +
                    str(args["lr"]), delta_adv)

            if args["print_iteration"]:
                if np.remainder(i + 1, 20) == 0:
                    if (true_label != np.argmax(attack_prob, 1)).all():
                        print(
                            "Iter %d (Succ): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s"
                            % (i + 1, class_id, args["lr"],
                               int(args["decay_lr"]), arg_mode,
                               args["constraint"], total_loss[i],
                               l2s_loss_all[i], true_label,
                               np.argmax(attack_prob, 1)))
                    else:
                        sr = np.sum(
                            true_label != np.argmax(attack_prob, 1)) / arg_bsz
                        print(
                            "Iter %d (Fail): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s, succ rate = %.2f"
                            % (i + 1, class_id, args["lr"],
                               int(args["decay_lr"]), arg_mode,
                               args["constraint"], total_loss[i],
                               l2s_loss_all[i], true_label,
                               np.argmax(attack_prob, 1), sr))

            if arg_save_iteration:
                os.system("mkdir Examples")
                if (np.logical_or(
                        true_label != np.argmax(attack_prob, 1),
                        np.remainder(i + 1, 10) == 0)):  ## every 10 iterations
                    suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format(
                        class_id, arg_mode, true_label,
                        np.argmax(attack_prob, 1), i + 1)
                    # util.save_img(adv_img, "Examples/{}.png".format(suffix))

            if arg_targeted_attack:
                if ((np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10))
                        >= kappa).all():  # check attack confidence
                    if (distortion(adv_img, orig_img) <
                            best_distortion):  # check distortion
                        # print('best distortion obtained at',i,'-th iteration')
                        best_adv_img = adv_img
                        best_distortion = distortion(adv_img, orig_img)
                        #best_delta = adv_img - orig_img
                        best_iteration = i + 1
                        adv_class = np.argmax(attack_prob, 1)
                        attack_flag = True
                        ## Record first attack
                        if (first_flag):
                            first_flag = False  ### once gets into this, it will no longer record the next sucessful attack
                            first_adv_img = adv_img
                            first_distortion = distortion(adv_img, orig_img)
                            #first_delta = adv_img - orig_img
                            first_class = adv_class
                            first_iteration = i + 1
            else:
                if ((np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10))
                        >= kappa).all():  # check attack confidence
                    if (distortion(adv_img, orig_img) <
                            best_distortion):  # check distortion
                        # print('best distortion obtained at',i,'-th iteration')
                        best_adv_img = adv_img
                        best_distortion = distortion(adv_img, orig_img)
                        #best_delta = adv_img - orig_img
                        best_iteration = i + 1
                        adv_class = np.argmax(attack_prob, 1)
                        attack_flag = True
                        ## Record first attack
                        if (first_flag):
                            first_flag = False
                            first_adv_img = adv_img
                            first_distortion = distortion(adv_img, orig_img)
                            #first_delta = adv_img - orig_img
                            first_class = adv_class
                            first_iteration = i + 1

        if (attack_flag):
            # os.system("mkdir Results_SL")
            # ## best attack (final attack)
            # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, orig_class) ## orig_class, predicted label
            # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, adv_class)
            # suffix3 = "id_{}_Mode_{}".format(class_id, arg_mode)
            # ### save original image
            # util.save_img(orig_img, "Results_SL/id_{}.png".format(class_id))
            # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix))
            # ### adv. image
            # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2))
            # ### adv. perturbation
            # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3))
            #
            #
            # ## first attack
            # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, first_class)
            # ## first adv. imag
            # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4))
            # ### first adv. perturbation
            # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3))

            ## save data
            suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format(
                class_id, arg_mode, args["constraint"], str(args["lr"]),
                int(args["decay_lr"]), args["exp_code"], args["init_const"])
            np.savez("{}".format(suffix0),
                     id=class_id,
                     mode=arg_mode,
                     loss=total_loss,
                     perturbation=l2s_loss_all,
                     best_distortion=best_distortion,
                     first_distortion=first_distortion,
                     first_iteration=first_iteration,
                     best_iteation=best_iteration,
                     learn_rate=args["lr"],
                     decay_lr=args["decay_lr"],
                     attack_flag=attack_flag)
            ## print
            print("It takes {} iteations to find the first attack".format(
                first_iteration))
            # print(total_loss)
        else:
            ## save data
            suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format(
                class_id, arg_mode, args["constraint"], str(args["lr"]),
                int(args["decay_lr"]), args["exp_code"], args["init_const"])
            np.savez("{}".format(suffix0),
                     id=class_id,
                     mode=arg_mode,
                     loss=total_loss,
                     perturbation=l2s_loss_all,
                     best_distortion=best_distortion,
                     learn_rate=args["lr"],
                     decay_lr=args["decay_lr"],
                     attack_flag=attack_flag)
            print("Attack Fails")

        sys.stdout.flush()
Exemple #17
0
def main(args):

    random.seed(SEED)
    np.random.seed(SEED)
    tf.set_random_seed(SEED)

    print('ZO-minmax case') if MAX_W else print('ZO-Finite-Sum case')
    args["minmax"] = MAX_W
    class_id = args['class_id']  ### input image (natural example)
    target_id = args[
        'target_id']  ### target images id (adv example) if target attack
    arg_max_iter = args['maxiter']  ### max number of iterations
    arg_init_const = args[
        'init_const']  ### regularization prior to attack loss
    arg_kappa = args['kappa']  ### attack confidence level
    arg_q = args['q']  ### number of random direction vectors
    arg_mode = args['mode']  ### algorithm name
    arg_save_iteration = args['save_iteration']
    arg_Dataset = args["dataset"]
    arg_targeted_attack = args["targeted_attack"]
    #arg_models = args["models_number"]
    arg_bsz = args["mini_batch_sz"]
    idx_lr = args["lr_idx"]
    class_number = len(class_id)

    ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5]
    if (arg_Dataset == 'mnist'):
        data, model = MNIST(), MNISTModel("models/mnist", sess, True)
    elif (arg_Dataset == 'cifar10'):
        data, model = CIFAR(), CIFARModel("models/cifar", sess, True)
    elif (arg_Dataset == 'imagenet'):
        data = ImageNet_Universal(SEED)

        g1 = tf.Graph()
        with g1.as_default():
            if GPUs:
                config1 = tf.ConfigProto(device_count={'GPU': 0})
                sess1 = tf.Session(graph=g1, config=config1)
            else:
                sess1 = tf.Session(graph=g1)
            model1 = InceptionModel(sess1, True)

#         g2=tf.Graph()
#         with g2.as_default():
#             if GPUs:
#                 config2 = tf.ConfigProto(device_count = {'GPU': 1})
#                 sess2=tf.Session(graph=g2,config=config2)
#             else:
#                 sess2=tf.Session(graph=g2)
#             model2 = ResnetModel152(sess2, True)
#
        g3 = tf.Graph()
        with g3.as_default():
            if GPUs:
                config3 = tf.ConfigProto(device_count={'GPU': 1})
                sess3 = tf.Session(graph=g3, config=config3)
            else:
                sess3 = tf.Session(graph=g3)
            model3 = ResnetModel50(sess3, True)

        models = [model1, model3]
    else:
        print('Please specify a valid dataset')

    # preprocess data for multiple classes
    orig_img, true_label, target_label = [], [], []

    for i in range(len(class_id)):

        #orig_img = np.load('ori_img_backup.npy')
        orig_img_ = data.test_data[np.where(
            np.argmax(data.test_labels, 1) == class_id[i])]
        #np.save('ori_img_backup',orig_img)

        #true_label = data.test_labels[np.where(np.argmax(data.test_labels,1) == class_id)]
        _, orig_class1 = util.model_prediction_u(
            models[0], orig_img_
        )  # take 50 or less images to make sure arg_bsz number of them are valid
        _, orig_class2 = util.model_prediction_u(
            models[1], orig_img_
        )  # take 50 or less images to make sure arg_bsz number of them are valid
        #_, orig_class3  = util.model_prediction_u(models[2],orig_img_) # take 50 or less images to make sure arg_bsz number of them are valid
        # filter out the images which misclassified already
        orig_img_ = orig_img_[np.where((orig_class1 == class_id[i])
                                       & (orig_class2 == class_id[i]))]
        assert orig_img_.shape[0] >= arg_bsz, 'no enough valid inputs'

        orig_img.append(orig_img_[:arg_bsz])

        #np.save('original_imgsID'+str(class_id), orig_img)
        #true_label = np.zeros((arg_bsz, 1001))
        #true_label[np.arange(arg_bsz), class_id] = 1
        true_label.append(class_id[i])  # [class_id[i]]*arg_bsz

        if arg_targeted_attack:  ### target attack
            #target_label = np.zeros((arg_bsz, 1001))
            #target_label[np.arange(arg_bsz), target_id] = 1
            target_label.append(target_id[i])
        else:
            target_label.append(class_id[i])

    #orig_img, target = util.generate_data(data, class_id, target_label)
    orig_img = np.array(orig_img)
    np.save('original_imgs_ID' + str(class_id), orig_img)
    print('input images shape', orig_img.shape)
    print('true label', true_label)
    print('target label', target_label)

    d = orig_img[0, 0].size
    print("dimension = ", d)

    # mu=1/d**2  # smoothing parameter
    q = arg_q + 0
    I = arg_max_iter + 0
    kappa = arg_kappa + 0
    const = arg_init_const + 0

    ## flatten image to vec
    orig_img_vec = np.resize(orig_img, (class_number, arg_bsz, d))

    ## w adv image initialization
    if args["constraint"] == 'uncons':
        # * 0.999999 to avoid +-0.5 return +-infinity
        w_ori_img_vec = np.arctanh(
            2 * (orig_img_vec) *
            0.999999)  # in real value, note that orig_img_vec in [-0.5, 0.5]
        w_img_vec = w_ori_img_vec.copy()
    else:
        w_ori_img_vec = orig_img_vec.copy()
        w_img_vec = w_ori_img_vec.copy()

    # ## test ##
    # for test_value in w_ori_img_vec[0, :]:
    #     if np.isnan(test_value) or np.isinf(test_value):
    #         print(test_value)

    delta_adv = np.zeros((1, d))  ### initialized adv. perturbation

    # initialize the best solution & best loss
    best_adv_img = []  # successful adv image in [-0.5, 0.5]
    best_delta = []  # best perturbation
    best_distortion = (0.5 * d)**2  # threshold for best perturbation
    total_loss = np.zeros((I, len(models), class_number))  ## I: max iters
    l2s_loss_all = np.zeros((I, len(models), class_number))
    stationary = np.zeros(I)
    attack_flag = False
    first_flag = True  ## record first successful attack
    weights = np.ones((len(models), class_number),
                      dtype=np.float32) * 1.0 / (len(models) * class_number)
    weights_record = np.zeros((I, len(models), class_number))
    sr = []
    # parameter setting for ZO gradient estimation
    mu = args["mu"]  ### smoothing parameter

    ## learning rate
    base_lr = args["lr"]

    if arg_mode == "ZOAdaMM":
        ## parameter initialization for AdaMM
        v_init = 1e-7  #0.00001
        v_hat = v_init * np.ones((1, d))
        v = v_init * np.ones((1, d))

        m = np.zeros((1, d))
        # momentum parameter for first and second order moment
        beta_1 = 0.9
        beta_2 = 0.3  # only used by AMSGrad
        print(beta_1, beta_2)

    #for i in tqdm(range(I)):
    for i in range(I):

        if args["decay_lr"]:
            base_lr = args["lr"] / np.sqrt(i + 1)

        ## gradient estimation w.r.t. w_img_vec
        if arg_mode == "ZOSCD":
            grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa,
                                             target_label, const, model,
                                             orig_img, arg_targeted_attack,
                                             args["constraint"])
        elif arg_mode == "ZONES":
            grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa,
                                               target_label, const, model,
                                               orig_img, arg_targeted_attack,
                                               args["constraint"])
        elif args["mode"] == "ZOPGD":  # we use weights w instead const here
            grad_est = gradient_estimation_v3(mu, q, w_img_vec, d, kappa,
                                              target_label, weights, models,
                                              orig_img, arg_targeted_attack,
                                              args["constraint"], class_number)
        else:
            grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa,
                                              target_label, const, model,
                                              orig_img, arg_targeted_attack,
                                              args["constraint"])

        if args["mode"] == "ZOPGD":
            d_tmp = delta_adv.copy()
            delta_adv = delta_adv - base_lr * grad_est
            if args["constraint"] == 'cons':
                #V_temp = np.eye(orig_img_vec.size)
                V_temp = np.ones((1, d))
                #X_temp = orig_img_vec.reshape((-1,1))
                delta_adv = projection_box_models(delta_adv, orig_img_vec,
                                                  V_temp, -0.5, 0.5, 16 / 256)

        # if np.remainder(i,50)==0:
        # print("total loss:",total_loss[i])
        # print(np.linalg.norm(grad_est, np.inf))

        ## ZO-Attack, unconstrained optimization formulation
        if arg_mode == "ZOSGD":
            delta_adv = delta_adv - base_lr * grad_est
        if arg_mode == "ZOsignSGD":
            delta_adv = delta_adv - base_lr * np.sign(grad_est)
        if arg_mode == "ZOSCD":
            delta_adv = delta_adv - base_lr * grad_est
        if arg_mode == "ZOAdaMM":
            m = beta_1 * m + (1 - beta_1) * grad_est
            v = beta_2 * v + (1 - beta_2) * np.square(grad_est)  ### vt
            #print(np.mean(np.abs(m)),np.mean(np.sqrt(v)))
            v_hat = np.maximum(v_hat, v)
            delta_adv = delta_adv - base_lr * m / np.sqrt(v)
            if args["constraint"] == 'cons':
                tmp = delta_adv.copy()
                #X_temp = orig_img_vec.reshape((-1,1))
                #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10))
                V_temp = np.sqrt(v_hat.reshape(1, -1))
                delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5,
                                           0.5)
                #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5)
            # v_init = 1e-2 #0.00001
            # v = v_init * np.ones((1, d))
            # m = np.zeros((1, d))
            # # momentum parameter for first and second order moment
            # beta_1 = 0.9
            # beta_2 = 0.99  # only used by AMSGrad
            # m = beta_1 * m + (1-beta_1) * grad_est
            # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v)
            # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10)
            # if args["constraint"] == 'cons':
            #     V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10))
            #     X_temp = orig_img_vec.reshape((-1,1))
            #     delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5)
        if arg_mode == "ZOSMD":
            delta_adv = delta_adv - 0.5 * base_lr * grad_est
            # delta_adv = delta_adv - base_lr* grad_est
            if args["constraint"] == 'cons':
                #V_temp = np.eye(orig_img_vec.size)
                V_temp = np.ones((1, d))
                #X_temp = orig_img_vec.reshape((-1,1))
                delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                           -0.5, 0.5)
        if arg_mode == "ZOPSGD":
            delta_adv = delta_adv - base_lr * grad_est
            if args["constraint"] == 'cons':
                #V_temp = np.eye(orig_img_vec.size)
                V_temp = np.ones((1, d))
                #X_temp = orig_img_vec.reshape((-1,1))
                delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                           -0.5, 0.5)
        if arg_mode == "ZONES":
            delta_adv = delta_adv - base_lr * np.sign(grad_est)
            if args["constraint"] == 'cons':
                #V_temp = np.eye(orig_img_vec.size)
                V_temp = np.ones((1, d))
                #X = orig_img_vec.reshape((-1,1))
                delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                           -0.5, 0.5)

        # if arg_mode == "ZO-AdaFom":
        #     m = beta_1 * m + (1-beta_1) * grad_est
        #     v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1)
        #     w_img_vec = w_img_vec - base_lr * m/np.sqrt(v)
        ##

        ### adv. example update
        w_img_vec = w_ori_img_vec + delta_adv

        ## Total loss evaluation
        if args["constraint"] == 'uncons':
            total_loss[i], l2s_loss_all[i] = function_evaluation_uncons(
                w_img_vec, kappa, target_label, const, model, orig_img,
                arg_targeted_attack)
        else:  # we are here
            for m in range(len(models)):
                for n in range(class_number):
                    total_loss[i, m, n] = function_evaluation_cons_models(
                        w_img_vec[n], kappa, target_label[n], const, models[m],
                        orig_img[n], arg_targeted_attack)

        # solve max of w here
        if args["mode"] == "ZOPGD":
            if MAX_W:
                w_tmp = weights.copy()
                w_grad = total_loss[i] - 2 * args["lmd"] * (
                    weights - 1 / (len(models) * class_number))
                w_proj = weights + args["beta"] * w_grad
                weights = util.bisection(w_proj, 1, 1e-5, ub=1e5)
            weights_record[i] = weights

        if MAX_W:
            stationary[i] = util.stationary_gap(d_tmp, delta_adv, base_lr,
                                                w_tmp, weights, args["beta"])
        #print(stationary[i])
        ## covert back to adv_img in [-0.5 , 0.5]
        if args["constraint"] == 'uncons':
            adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999  #
        else:
            adv_img_vec = w_img_vec.copy()

        adv_img = np.resize(adv_img_vec, orig_img.shape)

        ## print_iteration
        ## update the best solution in the iterations
        #print(weights)
        if args["print_iteration"]:
            if np.remainder(i + 1, 20) == 0:
                for m in range(len(models)):
                    for c in range(class_number):
                        #print('model',m,' class id',class_id[c])
                        attack_prob, _, _ = util.model_prediction(
                            models[m], adv_img[c])
                        target_prob = attack_prob[:, target_label[c]]
                        attack_prob_tmp = attack_prob.copy()
                        attack_prob_tmp[:, target_label[c]] = 0
                        other_prob = np.amax(attack_prob_tmp, 1)
                        sr.append(
                            np.sum(true_label[c] != np.argmax(attack_prob, 1))
                            / arg_bsz)
                        if (true_label[c] != np.argmax(attack_prob, 1)).all():
                            print(
                                "model %d class_id %d Iter %d (Succ): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, TL = %d, PL = %s"
                                % (m, class_id[c], i + 1, class_id[c],
                                   args["lr"], int(args["decay_lr"]), arg_mode,
                                   args["constraint"], total_loss[i, m, c],
                                   true_label[c], np.argmax(attack_prob, 1)))
                        else:

                            print(
                                "model %d class_id %d Iter %d (Fail): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, succ rate = %.2f"
                                % (m, class_id[c], i + 1, class_id[c],
                                   args["lr"], int(args["decay_lr"]), arg_mode,
                                   args["constraint"], total_loss[i, m,
                                                                  c], sr[-1]))
                print(weights)
                #print(np.max(np.abs(delta_adv)),np.min(w_img_vec),np.max(w_img_vec),np.sum(total_loss[i]),)
        print('sum of losses: ', np.sum(total_loss[i]), 'weighted loss',
              np.sum(total_loss[i] * weights))

        if i % 1000 == 0 and i != 0:
            if arg_mode == "ZOAdaMM": print(beta_1, beta_2)
            print("save delta_adv")
            np.save(
                'retimgs_nips/' + str(i) + 'itrs' +
                str(np.argmax(attack_prob, 1)) + arg_mode + str(args["lr"]) +
                str(args["lmd"]), delta_adv)
            #np.save('retimgs/'+str(i)+'itrs'+str(np.argmax(attack_prob,1))+arg_mode+str(args["lr"])+'_weights',weights_record)

        if arg_save_iteration:
            os.system("mkdir Examples")
            if (np.logical_or(true_label != np.argmax(attack_prob, 1),
                              np.remainder(i + 1,
                                           10) == 0)):  ## every 10 iterations
                suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format(
                    class_id, arg_mode, true_label, np.argmax(attack_prob, 1),
                    i + 1)
                # util.save_img(adv_img, "Examples/{}.png".format(suffix))

    if (attack_flag):

        ## save data
        suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format(
            class_id[0], arg_mode, args["constraint"], str(args["lr"]),
            int(args["decay_lr"]), args["exp_code"], args["init_const"])
        np.savez(
            "{}".format(suffix0),
            id=class_id,
            mode=arg_mode,
            loss=total_loss,
            weights=weights_record,
            sr=np.array(sr),
            stationary=stationary
            #best_distortion=best_distortion, first_distortion=first_distortion,
            #first_iteration=first_iteration, best_iteation=best_iteration,
            #learn_rate=args["lr"], decay_lr = args["decay_lr"], attack_flag = attack_flag
        )
        ## print
        print("It takes {} iteations to find the first attack".format(
            first_iteration))
        # print(total_loss)
    else:
        ## save data
        suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format(
            class_id[0], arg_mode, args["constraint"], str(args["lr"]),
            int(args["decay_lr"]), args["exp_code"], args["init_const"])
        np.savez(
            "{}".format(suffix0),
            id=class_id,
            mode=arg_mode,
            loss=total_loss,
            weights=weights_record,
            sr=np.array(sr),
            stationary=stationary
            #best_distortion=best_distortion,  learn_rate=args["lr"], decay_lr = args["decay_lr"], attack_flag = attack_flag
        )
        print("Attack Fails")

    sys.stdout.flush()