Example #1
0
def main():

    batch_size = 128
    tag = "GBP_0"

    model = CIFARModel().model  # pure resnet
    data = CIFAR(tag)

    sgd = SGD(lr=0.00, momentum=0.9, nesterov=False)
    schedule = LearningRateScheduler(get_lr)

    model.compile(loss=fn, optimizer=sgd, metrics=['accuracy'])

    datagen = ImageDataGenerator(rotation_range=10,
                                 width_shift_range=0.1,
                                 height_shift_range=0.1,
                                 horizontal_flip=True)

    datagen.fit(data.train_data)

    model.fit_generator(datagen.flow(data.train_data,
                                     data.train_labels,
                                     batch_size=batch_size),
                        steps_per_epoch=data.train_data.shape[0] // batch_size,
                        epochs=300,
                        verbose=1,
                        validation_data=(data.test_data, data.test_labels),
                        callbacks=[schedule])

    model.save_weights('Models/{}'.format(tag))
def main(_):
    with tf.Session() as sess:
        K.set_session(sess)
        if FLAGS.dataset == 'MNIST':
            data, model =  MNIST(), MNISTModel("models/mnist", sess)
        elif FLAGS.datset == 'Cifar':
            data, model =  CIFAR(), CIFARModel("models/cifar", sess)


        def _model_fn(x, logits=False):
            ybar, logits_ = model.predict(x)
            if logits:
                return ybar, logits_
            return ybar

        
        if FLAGS.dataset == 'MNIST':
            x_adv = fgsm(_model_fn, x, epochs=9, eps=0.02)
        elif FLAGS.datset == 'Cifar':
            x_adv = fgsm(_model_fn, x, epochs=4, eps=0.01)

        X_adv_test = attack(x_adv, data.test_data, data.test_labels, sess)
        X_adv_train = attack(x_adv, data.train_data, data.train_labels, sess)

        np.save('adversarial_outputs/fgsm_train_' + FLAGS.dataset.lower() + '.npy', X_adv_train)
        np.save('adversarial_outputs/fgsm_test_' + FLAGS.dataset.lower() + '.npy', X_adv_test)
        print("Legit/Adversarial training set")
        model.evaluate(data.train_data, data.train_labels)
        model.evaluate(X_adv_train, data.train_labels)
        
        print("Legit/Adversarial test set")
        model.evaluate(data.test_data, data.test_labels)
        model.evaluate(X_adv_test, data.test_labels)
Example #3
0
def DCN(trainpath,testpath,modelpath,dstl = False, target = True):

	train = load_data(trainpath)
	test = load_data(testpath)
	#good = load_data('data/mnist5kgood55k.pkl')

	if dstl:
#		train.dstl()
		test.dstl()

	if target:
		para = 9
	else:
		para = 1

	model, adv_accu = binary_model(train, test)
	false = detect(model,test)
	print('**********corrector************')
	region_model = CIFARModel(modelpath)
	t5 = time.time()
	c = Corrector(region_model, testpath, false, target = target,r=0.02, n = 50)
	error = c.correct()
	t6 = time.time()

	accuracy_good = (test.num - error[0])/test.num
	attack_success = (1-adv_accu)+error[1]/test.num/para
	print('accuracy_good:',accuracy_good)
	print('attack_success:',attack_success)
	print('time:', t6 -t5)
def prep_bbox(sess, x, y, X_train, Y_train, X_test, Y_test, nb_epochs,
              batch_size, learning_rate):
    """
    Define and train a model that simulates the "remote"
    black-box oracle described in the original paper.
    :param sess: the TF session
    :param x: the input placeholder for MNIST
    :param y: the ouput placeholder for MNIST
    :param X_train: the training data for the oracle
    :param Y_train: the training labels for the oracle
    :param X_test: the testing data for the oracle
    :param Y_test: the testing labels for the oracle
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :return:
    """

    # Define TF model graph (for the black-box model)
    if DATASET == "mnist":
        model = MNISTModel(use_log=True).model
    else:
        model = CIFARModel(use_log=True).model
    predictions = model(x)
    print("Defined TensorFlow model graph.")

    # Train an MNIST model
    if FLAGS.load_pretrain:
        tf_model_load(sess)
    else:
        train_params = {
            'nb_epochs': nb_epochs,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        model_train(sess,
                    x,
                    y,
                    predictions,
                    X_train,
                    Y_train,
                    verbose=True,
                    save=True,
                    args=train_params)

    # Print out the accuracy on legitimate data
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          X_test,
                          Y_test,
                          args=eval_params)
    print('Test accuracy of black-box on legitimate test '
          'examples: ' + str(accuracy))

    return model, predictions, accuracy
Example #5
0
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes,
              nb_epochs_s, batch_size, learning_rate, data_aug, lmbda):
    """
    This function creates the substitute by alternatively
    augmenting the training data and training the substitute.
    :param sess: TF session
    :param x: input TF placeholder
    :param y: output TF placeholder
    :param bbox_preds: output of black-box model predictions
    :param X_sub: initial substitute training data
    :param Y_sub: initial substitute training labels
    :param nb_classes: number of output classes
    :param nb_epochs_s: number of epochs to train substitute model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param data_aug: number of times substitute training data is augmented
    :param lmbda: lambda from arxiv.org/abs/1602.02697
    :return:
    """
    # Define TF model graph (for the black-box model)
    model_sub = CIFARModel(use_log = True).model
    preds_sub = model_sub(x)
    print("Defined TensorFlow model graph for the substitute.")

    # Define the Jacobian symbolically using TensorFlow
    grads = jacobian_graph(preds_sub, x, nb_classes)

    # Train the substitute and augment dataset alternatively
    for rho in xrange(data_aug):
        print("Substitute training epoch #" + str(rho))
        train_params = {
            'nb_epochs': nb_epochs_s,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub),
                    init_all=False, verbose=False, args=train_params)

        # If we are not at last substitute training iteration, augment dataset
        if rho < data_aug - 1:
            print("Augmenting substitute training data.")
            # Perform the Jacobian augmentation
            X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda)

            print("Labeling substitute training data.")
            # Label the newly generated synthetic points using the black-box
            Y_sub = np.hstack([Y_sub, Y_sub])
            X_sub_prev = X_sub[int(len(X_sub)/2):]
            eval_params = {'batch_size': batch_size}
            bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev],
                                  args=eval_params)[0]
            # Note here that we take the argmax because the adversary
            # only has access to the label (not the probabilities) output
            # by the black-box model
            Y_sub[int(len(X_sub)/2):] = np.argmax(bbox_val, axis=1)

    return model_sub, preds_sub
Example #6
0
def main(args):
    with tf.Session() as sess:
        if args['dataset'] == 'mnist':
            data, model = MNIST(), MNISTModel("models/mnist", sess)
            handpick = False
            inception = False
        if args['dataset'] == "cifar":
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            handpick = True
            inception = False
        if args['dataset'] == "imagenet":
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess)
            handpick = True
            inception = True

        if args['adversarial'] != "none":
            model = MNISTModel(
                "models/mnist_cwl2_admm" + str(args['adversarial']), sess)

        if args['temp'] and args['dataset'] == 'mnist':
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']),
                               sess)
        if args['temp'] and args['dataset'] == 'cifar':
            model = CIFARModel("models/cifar-distilled-" + str(args['temp']),
                               sess)

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            targeted=True,
            start=0,
            inception=inception,
            handpick=handpick,
            seed=args['seed'])

        #print(true_ids)
        if args['attack'] == 'L2C':
            attack = CarliniL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['binary_steps'],
                               abort_early=args['abort_early'])

        if args['attack'] == 'L0A':
            attack = ADMML0(sess,
                            model,
                            batch_size=args['batch_size'],
                            max_iterations=args['maxiter'],
                            confidence=args['conf'],
                            binary_search_steps=args['iteration_steps'],
                            ro=args['ro'],
                            abort_early=args['abort_early'])

        timestart = time.time()
        adv = attack.attack(inputs, targets)
        timeend = time.time()

        print("Took", timeend - timestart, "seconds to run", len(inputs),
              "samples.\n")

        if args['train']:
            np.save('labels_train.npy', labels)
            np.save(str(args['attack']) + '_train.npy', adv)

        if (args['conf'] != 0):
            model = MNISTModel("models/mnist-distilled-100", sess)

        if args['attack'] != 'L0A' and args['attack'] != 'L0AE' and args[
                'attack'] != 'L0C' and args['attack'] != 'L0AE2':
            l1_l2_li_computation(args, data, model, adv, inception, inputs,
                                 targets, labels, true_ids)
        else:
            l0_computation(args, data, model, adv, inception, inputs, targets,
                           labels, true_ids)
Example #7
0
    mask_min = mask.min()
    mask_max = mask.max()
    mask_vis = (mask - mask_min) / (mask_max - mask_min)
    ax[1].imshow(mask_vis, cmap='jet', alpha=0.6)
    ax[1].axis('off')
    return fig


FLAGS = flags.FLAGS
cifar_data = CIFAR()
test_idx = FLAGS.test_idx
test_img = cifar_data.test_data[test_idx] + 0.5
tf.reset_default_graph()

with tf.Session() as sess:
    model = CIFARModel('cifar10_model', sess, False)
    input_holder = tf.placeholder(tf.float32, [1, 32, 32, 3], name='x')
    model_out = model(input_holder)

    mask_net = NeuroMask(model,
                         coeffs=(0.4, 0.35, FLAGS.smooth_lambda),
                         temp=1,
                         is_cifar=True)
    mask_net.init_model(sess)
    pred_ = sess.run(model_out, feed_dict={input_holder: [test_img]})
    print('correct label = ',
          np.argmax(cifar_data.test_labels[test_idx], axis=0))
    mask_result = mask_net.explain(sess,
                                   test_img,
                                   target_label=None,
                                   iters=FLAGS.num_iters)
Example #8
0
def main(args):

    random.seed(SEED)
    np.random.seed(SEED)
    tf.set_random_seed(SEED)

    print('ZO-minmax case') if MAX_W else print('ZO-Finite-Sum case')
    args["minmax"] = MAX_W
    class_id = args['class_id']  ### input image (natural example)
    target_id = args[
        'target_id']  ### target images id (adv example) if target attack
    arg_max_iter = args['maxiter']  ### max number of iterations
    arg_init_const = args[
        'init_const']  ### regularization prior to attack loss
    arg_kappa = args['kappa']  ### attack confidence level
    arg_q = args['q']  ### number of random direction vectors
    arg_mode = args['mode']  ### algorithm name
    arg_save_iteration = args['save_iteration']
    arg_Dataset = args["dataset"]
    arg_targeted_attack = args["targeted_attack"]
    #arg_models = args["models_number"]
    arg_bsz = args["mini_batch_sz"]
    idx_lr = args["lr_idx"]
    class_number = len(class_id)

    ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5]
    if (arg_Dataset == 'mnist'):
        data, model = MNIST(), MNISTModel("models/mnist", sess, True)
    elif (arg_Dataset == 'cifar10'):
        data, model = CIFAR(), CIFARModel("models/cifar", sess, True)
    elif (arg_Dataset == 'imagenet'):
        data = ImageNet_Universal(SEED)

        g1 = tf.Graph()
        with g1.as_default():
            if GPUs:
                config1 = tf.ConfigProto(device_count={'GPU': 0})
                sess1 = tf.Session(graph=g1, config=config1)
            else:
                sess1 = tf.Session(graph=g1)
            model1 = InceptionModel(sess1, True)

#         g2=tf.Graph()
#         with g2.as_default():
#             if GPUs:
#                 config2 = tf.ConfigProto(device_count = {'GPU': 1})
#                 sess2=tf.Session(graph=g2,config=config2)
#             else:
#                 sess2=tf.Session(graph=g2)
#             model2 = ResnetModel152(sess2, True)
#
        g3 = tf.Graph()
        with g3.as_default():
            if GPUs:
                config3 = tf.ConfigProto(device_count={'GPU': 1})
                sess3 = tf.Session(graph=g3, config=config3)
            else:
                sess3 = tf.Session(graph=g3)
            model3 = ResnetModel50(sess3, True)

        models = [model1, model3]
    else:
        print('Please specify a valid dataset')

    # preprocess data for multiple classes
    orig_img, true_label, target_label = [], [], []

    for i in range(len(class_id)):

        #orig_img = np.load('ori_img_backup.npy')
        orig_img_ = data.test_data[np.where(
            np.argmax(data.test_labels, 1) == class_id[i])]
        #np.save('ori_img_backup',orig_img)

        #true_label = data.test_labels[np.where(np.argmax(data.test_labels,1) == class_id)]
        _, orig_class1 = util.model_prediction_u(
            models[0], orig_img_
        )  # take 50 or less images to make sure arg_bsz number of them are valid
        _, orig_class2 = util.model_prediction_u(
            models[1], orig_img_
        )  # take 50 or less images to make sure arg_bsz number of them are valid
        #_, orig_class3  = util.model_prediction_u(models[2],orig_img_) # take 50 or less images to make sure arg_bsz number of them are valid
        # filter out the images which misclassified already
        orig_img_ = orig_img_[np.where((orig_class1 == class_id[i])
                                       & (orig_class2 == class_id[i]))]
        assert orig_img_.shape[0] >= arg_bsz, 'no enough valid inputs'

        orig_img.append(orig_img_[:arg_bsz])

        #np.save('original_imgsID'+str(class_id), orig_img)
        #true_label = np.zeros((arg_bsz, 1001))
        #true_label[np.arange(arg_bsz), class_id] = 1
        true_label.append(class_id[i])  # [class_id[i]]*arg_bsz

        if arg_targeted_attack:  ### target attack
            #target_label = np.zeros((arg_bsz, 1001))
            #target_label[np.arange(arg_bsz), target_id] = 1
            target_label.append(target_id[i])
        else:
            target_label.append(class_id[i])

    #orig_img, target = util.generate_data(data, class_id, target_label)
    orig_img = np.array(orig_img)
    np.save('original_imgs_ID' + str(class_id), orig_img)
    print('input images shape', orig_img.shape)
    print('true label', true_label)
    print('target label', target_label)

    d = orig_img[0, 0].size
    print("dimension = ", d)

    # mu=1/d**2  # smoothing parameter
    q = arg_q + 0
    I = arg_max_iter + 0
    kappa = arg_kappa + 0
    const = arg_init_const + 0

    ## flatten image to vec
    orig_img_vec = np.resize(orig_img, (class_number, arg_bsz, d))

    ## w adv image initialization
    if args["constraint"] == 'uncons':
        # * 0.999999 to avoid +-0.5 return +-infinity
        w_ori_img_vec = np.arctanh(
            2 * (orig_img_vec) *
            0.999999)  # in real value, note that orig_img_vec in [-0.5, 0.5]
        w_img_vec = w_ori_img_vec.copy()
    else:
        w_ori_img_vec = orig_img_vec.copy()
        w_img_vec = w_ori_img_vec.copy()

    # ## test ##
    # for test_value in w_ori_img_vec[0, :]:
    #     if np.isnan(test_value) or np.isinf(test_value):
    #         print(test_value)

    delta_adv = np.zeros((1, d))  ### initialized adv. perturbation

    # initialize the best solution & best loss
    best_adv_img = []  # successful adv image in [-0.5, 0.5]
    best_delta = []  # best perturbation
    best_distortion = (0.5 * d)**2  # threshold for best perturbation
    total_loss = np.zeros((I, len(models), class_number))  ## I: max iters
    l2s_loss_all = np.zeros((I, len(models), class_number))
    stationary = np.zeros(I)
    attack_flag = False
    first_flag = True  ## record first successful attack
    weights = np.ones((len(models), class_number),
                      dtype=np.float32) * 1.0 / (len(models) * class_number)
    weights_record = np.zeros((I, len(models), class_number))
    sr = []
    # parameter setting for ZO gradient estimation
    mu = args["mu"]  ### smoothing parameter

    ## learning rate
    base_lr = args["lr"]

    if arg_mode == "ZOAdaMM":
        ## parameter initialization for AdaMM
        v_init = 1e-7  #0.00001
        v_hat = v_init * np.ones((1, d))
        v = v_init * np.ones((1, d))

        m = np.zeros((1, d))
        # momentum parameter for first and second order moment
        beta_1 = 0.9
        beta_2 = 0.3  # only used by AMSGrad
        print(beta_1, beta_2)

    #for i in tqdm(range(I)):
    for i in range(I):

        if args["decay_lr"]:
            base_lr = args["lr"] / np.sqrt(i + 1)

        ## gradient estimation w.r.t. w_img_vec
        if arg_mode == "ZOSCD":
            grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa,
                                             target_label, const, model,
                                             orig_img, arg_targeted_attack,
                                             args["constraint"])
        elif arg_mode == "ZONES":
            grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa,
                                               target_label, const, model,
                                               orig_img, arg_targeted_attack,
                                               args["constraint"])
        elif args["mode"] == "ZOPGD":  # we use weights w instead const here
            grad_est = gradient_estimation_v3(mu, q, w_img_vec, d, kappa,
                                              target_label, weights, models,
                                              orig_img, arg_targeted_attack,
                                              args["constraint"], class_number)
        else:
            grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa,
                                              target_label, const, model,
                                              orig_img, arg_targeted_attack,
                                              args["constraint"])

        if args["mode"] == "ZOPGD":
            d_tmp = delta_adv.copy()
            delta_adv = delta_adv - base_lr * grad_est
            if args["constraint"] == 'cons':
                #V_temp = np.eye(orig_img_vec.size)
                V_temp = np.ones((1, d))
                #X_temp = orig_img_vec.reshape((-1,1))
                delta_adv = projection_box_models(delta_adv, orig_img_vec,
                                                  V_temp, -0.5, 0.5, 16 / 256)

        # if np.remainder(i,50)==0:
        # print("total loss:",total_loss[i])
        # print(np.linalg.norm(grad_est, np.inf))

        ## ZO-Attack, unconstrained optimization formulation
        if arg_mode == "ZOSGD":
            delta_adv = delta_adv - base_lr * grad_est
        if arg_mode == "ZOsignSGD":
            delta_adv = delta_adv - base_lr * np.sign(grad_est)
        if arg_mode == "ZOSCD":
            delta_adv = delta_adv - base_lr * grad_est
        if arg_mode == "ZOAdaMM":
            m = beta_1 * m + (1 - beta_1) * grad_est
            v = beta_2 * v + (1 - beta_2) * np.square(grad_est)  ### vt
            #print(np.mean(np.abs(m)),np.mean(np.sqrt(v)))
            v_hat = np.maximum(v_hat, v)
            delta_adv = delta_adv - base_lr * m / np.sqrt(v)
            if args["constraint"] == 'cons':
                tmp = delta_adv.copy()
                #X_temp = orig_img_vec.reshape((-1,1))
                #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10))
                V_temp = np.sqrt(v_hat.reshape(1, -1))
                delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5,
                                           0.5)
                #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5)
            # v_init = 1e-2 #0.00001
            # v = v_init * np.ones((1, d))
            # m = np.zeros((1, d))
            # # momentum parameter for first and second order moment
            # beta_1 = 0.9
            # beta_2 = 0.99  # only used by AMSGrad
            # m = beta_1 * m + (1-beta_1) * grad_est
            # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v)
            # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10)
            # if args["constraint"] == 'cons':
            #     V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10))
            #     X_temp = orig_img_vec.reshape((-1,1))
            #     delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5)
        if arg_mode == "ZOSMD":
            delta_adv = delta_adv - 0.5 * base_lr * grad_est
            # delta_adv = delta_adv - base_lr* grad_est
            if args["constraint"] == 'cons':
                #V_temp = np.eye(orig_img_vec.size)
                V_temp = np.ones((1, d))
                #X_temp = orig_img_vec.reshape((-1,1))
                delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                           -0.5, 0.5)
        if arg_mode == "ZOPSGD":
            delta_adv = delta_adv - base_lr * grad_est
            if args["constraint"] == 'cons':
                #V_temp = np.eye(orig_img_vec.size)
                V_temp = np.ones((1, d))
                #X_temp = orig_img_vec.reshape((-1,1))
                delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                           -0.5, 0.5)
        if arg_mode == "ZONES":
            delta_adv = delta_adv - base_lr * np.sign(grad_est)
            if args["constraint"] == 'cons':
                #V_temp = np.eye(orig_img_vec.size)
                V_temp = np.ones((1, d))
                #X = orig_img_vec.reshape((-1,1))
                delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                           -0.5, 0.5)

        # if arg_mode == "ZO-AdaFom":
        #     m = beta_1 * m + (1-beta_1) * grad_est
        #     v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1)
        #     w_img_vec = w_img_vec - base_lr * m/np.sqrt(v)
        ##

        ### adv. example update
        w_img_vec = w_ori_img_vec + delta_adv

        ## Total loss evaluation
        if args["constraint"] == 'uncons':
            total_loss[i], l2s_loss_all[i] = function_evaluation_uncons(
                w_img_vec, kappa, target_label, const, model, orig_img,
                arg_targeted_attack)
        else:  # we are here
            for m in range(len(models)):
                for n in range(class_number):
                    total_loss[i, m, n] = function_evaluation_cons_models(
                        w_img_vec[n], kappa, target_label[n], const, models[m],
                        orig_img[n], arg_targeted_attack)

        # solve max of w here
        if args["mode"] == "ZOPGD":
            if MAX_W:
                w_tmp = weights.copy()
                w_grad = total_loss[i] - 2 * args["lmd"] * (
                    weights - 1 / (len(models) * class_number))
                w_proj = weights + args["beta"] * w_grad
                weights = util.bisection(w_proj, 1, 1e-5, ub=1e5)
            weights_record[i] = weights

        if MAX_W:
            stationary[i] = util.stationary_gap(d_tmp, delta_adv, base_lr,
                                                w_tmp, weights, args["beta"])
        #print(stationary[i])
        ## covert back to adv_img in [-0.5 , 0.5]
        if args["constraint"] == 'uncons':
            adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999  #
        else:
            adv_img_vec = w_img_vec.copy()

        adv_img = np.resize(adv_img_vec, orig_img.shape)

        ## print_iteration
        ## update the best solution in the iterations
        #print(weights)
        if args["print_iteration"]:
            if np.remainder(i + 1, 20) == 0:
                for m in range(len(models)):
                    for c in range(class_number):
                        #print('model',m,' class id',class_id[c])
                        attack_prob, _, _ = util.model_prediction(
                            models[m], adv_img[c])
                        target_prob = attack_prob[:, target_label[c]]
                        attack_prob_tmp = attack_prob.copy()
                        attack_prob_tmp[:, target_label[c]] = 0
                        other_prob = np.amax(attack_prob_tmp, 1)
                        sr.append(
                            np.sum(true_label[c] != np.argmax(attack_prob, 1))
                            / arg_bsz)
                        if (true_label[c] != np.argmax(attack_prob, 1)).all():
                            print(
                                "model %d class_id %d Iter %d (Succ): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, TL = %d, PL = %s"
                                % (m, class_id[c], i + 1, class_id[c],
                                   args["lr"], int(args["decay_lr"]), arg_mode,
                                   args["constraint"], total_loss[i, m, c],
                                   true_label[c], np.argmax(attack_prob, 1)))
                        else:

                            print(
                                "model %d class_id %d Iter %d (Fail): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, succ rate = %.2f"
                                % (m, class_id[c], i + 1, class_id[c],
                                   args["lr"], int(args["decay_lr"]), arg_mode,
                                   args["constraint"], total_loss[i, m,
                                                                  c], sr[-1]))
                print(weights)
                #print(np.max(np.abs(delta_adv)),np.min(w_img_vec),np.max(w_img_vec),np.sum(total_loss[i]),)
        print('sum of losses: ', np.sum(total_loss[i]), 'weighted loss',
              np.sum(total_loss[i] * weights))

        if i % 1000 == 0 and i != 0:
            if arg_mode == "ZOAdaMM": print(beta_1, beta_2)
            print("save delta_adv")
            np.save(
                'retimgs_nips/' + str(i) + 'itrs' +
                str(np.argmax(attack_prob, 1)) + arg_mode + str(args["lr"]) +
                str(args["lmd"]), delta_adv)
            #np.save('retimgs/'+str(i)+'itrs'+str(np.argmax(attack_prob,1))+arg_mode+str(args["lr"])+'_weights',weights_record)

        if arg_save_iteration:
            os.system("mkdir Examples")
            if (np.logical_or(true_label != np.argmax(attack_prob, 1),
                              np.remainder(i + 1,
                                           10) == 0)):  ## every 10 iterations
                suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format(
                    class_id, arg_mode, true_label, np.argmax(attack_prob, 1),
                    i + 1)
                # util.save_img(adv_img, "Examples/{}.png".format(suffix))

    if (attack_flag):

        ## save data
        suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format(
            class_id[0], arg_mode, args["constraint"], str(args["lr"]),
            int(args["decay_lr"]), args["exp_code"], args["init_const"])
        np.savez(
            "{}".format(suffix0),
            id=class_id,
            mode=arg_mode,
            loss=total_loss,
            weights=weights_record,
            sr=np.array(sr),
            stationary=stationary
            #best_distortion=best_distortion, first_distortion=first_distortion,
            #first_iteration=first_iteration, best_iteation=best_iteration,
            #learn_rate=args["lr"], decay_lr = args["decay_lr"], attack_flag = attack_flag
        )
        ## print
        print("It takes {} iteations to find the first attack".format(
            first_iteration))
        # print(total_loss)
    else:
        ## save data
        suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format(
            class_id[0], arg_mode, args["constraint"], str(args["lr"]),
            int(args["decay_lr"]), args["exp_code"], args["init_const"])
        np.savez(
            "{}".format(suffix0),
            id=class_id,
            mode=arg_mode,
            loss=total_loss,
            weights=weights_record,
            sr=np.array(sr),
            stationary=stationary
            #best_distortion=best_distortion,  learn_rate=args["lr"], decay_lr = args["decay_lr"], attack_flag = attack_flag
        )
        print("Attack Fails")

    sys.stdout.flush()
Example #9
0
def main(args):
    with tf.Session() as sess:
        if (args['dataset'] == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess)
            handpick = False
            inception = False
        if (args['dataset'] == "cifar"):
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            handpick = True
            inception = False
        if (args['dataset'] == "imagenet"):
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess)
            handpick = True
            inception = True

        if (args['adversarial'] != "none"):
            model = MNISTModel("models/mnist_cw" + str(args['adversarial']),
                               sess)

        if (args['temp'] and args['dataset'] == 'mnist'):
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']),
                               sess)
        if (args['temp'] and args['dataset'] == 'cifar'):
            model = CIFARModel("models/cifar-distilled-" + str(args['temp']),
                               sess)

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            inception=inception,
            handpick=handpick,
            train=args['train'],
            seed=args['seed'])
        timestart = time.time()
        if (args['attack'] == 'L2'):
            attack = CarliniL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['binary_steps'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'L1'):
            attack = EADL1(sess,
                           model,
                           batch_size=args['batch_size'],
                           max_iterations=args['maxiter'],
                           confidence=args['conf'],
                           binary_search_steps=args['binary_steps'],
                           beta=args['beta'],
                           abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'EN'):
            attack = EADEN(sess,
                           model,
                           batch_size=args['batch_size'],
                           max_iterations=args['maxiter'],
                           confidence=args['conf'],
                           binary_search_steps=args['binary_steps'],
                           beta=args['beta'],
                           abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        """If untargeted, pass labels instead of targets"""
        if (args['attack'] == 'FGSM'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=np.inf,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'FGML1'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=1,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'FGML2'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=2,
                         inception=inception)
            adv = attack.attack(inputs, targets)

        if (args['attack'] == 'IFGSM'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=np.inf,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'IFGML1'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=1,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'IFGML2'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=2,
                         inception=inception)
            adv = attack.attack(inputs, targets)

        timeend = time.time()
        print("Took", timeend - timestart, "seconds to run",
              len(inputs) / args['batch_size'], "random instances.")

        if (args['train']):
            np.save('labels_train.npy', labels)
            np.save(str(args['attack']) + '_train.npy', adv)
            return

        r_best = []
        d_best_l1 = []
        d_best_l2 = []
        d_best_linf = []
        r_average = []
        d_average_l1 = []
        d_average_l2 = []
        d_average_linf = []
        r_worst = []
        d_worst_l1 = []
        d_worst_l2 = []
        d_worst_linf = []

        if (args['conf'] != 0):
            model = MNISTModel("models/mnist-distilled-100", sess)

        if (args['show']):
            if not os.path.exists(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack'])):
                os.makedirs(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack']))

        for i in range(0, len(inputs), args['batch_size']):

            pred = []
            for j in range(i, i + args['batch_size']):
                if inception:
                    pred.append(
                        np.reshape(model.model.predict(adv[j:j + 1]),
                                   (data.test_labels[0:1].shape)))
                else:
                    pred.append(model.model.predict(adv[j:j + 1]))

            dist_l1 = 1e10
            dist_l2 = 1e10
            dist_linf = 1e10
            dist_l1_index = 1e10
            dist_l2_index = 1e10
            dist_linf_index = 1e10
            for k, j in enumerate(range(i, i + args['batch_size'])):
                if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)):
                    if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1):
                        dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                        dist_l1_index = j
                    if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf):
                        dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                        dist_linf_index = j
                    if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2):
                        dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                        dist_l2_index = j
            if (dist_l1_index != 1e10):
                d_best_l2.append((np.sum(
                    (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                d_best_l1.append(
                    np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index])))
                d_best_linf.append(
                    np.amax(
                        np.abs(adv[dist_linf_index] -
                               inputs[dist_linf_index])))
                r_best.append(1)
            else:
                r_best.append(0)

            rand_int = np.random.randint(i, i + args['batch_size'])
            if inception:
                pred_r = np.reshape(
                    model.model.predict(adv[rand_int:rand_int + 1]),
                    (data.test_labels[0:1].shape))
            else:
                pred_r = model.model.predict(adv[rand_int:rand_int + 1])
            if (np.argmax(pred_r,
                          1) == np.argmax(targets[rand_int:rand_int + 1], 1)):
                r_average.append(1)
                d_average_l2.append(
                    np.sum((adv[rand_int] - inputs[rand_int])**2)**.5)
                d_average_l1.append(
                    np.sum(np.abs(adv[rand_int] - inputs[rand_int])))
                d_average_linf.append(
                    np.amax(np.abs(adv[rand_int] - inputs[rand_int])))

            else:
                r_average.append(0)

            dist_l1 = 0
            dist_l1_index = 1e10
            dist_linf = 0
            dist_linf_index = 1e10
            dist_l2 = 0
            dist_l2_index = 1e10
            for k, j in enumerate(range(i, i + args['batch_size'])):
                if (np.argmax(pred[k], 1) != np.argmax(targets[j:j + 1], 1)):
                    r_worst.append(0)
                    dist_l1_index = 1e10
                    dist_l2_index = 1e10
                    dist_linf_index = 1e10
                    break
                else:
                    if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1):
                        dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                        dist_l1_index = j
                    if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf):
                        dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                        dist_linf_index = j
                    if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2):
                        dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                        dist_l2_index = j
            if (dist_l1_index != 1e10):
                d_worst_l2.append((np.sum(
                    (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                d_worst_l1.append(
                    np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index])))
                d_worst_linf.append(
                    np.amax(
                        np.abs(adv[dist_linf_index] -
                               inputs[dist_linf_index])))
                r_worst.append(1)

            if (args['show']):
                for j in range(i, i + args['batch_size']):
                    target_id = np.argmax(targets[j:j + 1], 1)
                    label_id = np.argmax(labels[j:j + 1], 1)
                    prev_id = np.argmax(
                        np.reshape(model.model.predict(inputs[j:j + 1]),
                                   (data.test_labels[0:1].shape)), 1)
                    adv_id = np.argmax(
                        np.reshape(model.model.predict(adv[j:j + 1]),
                                   (data.test_labels[0:1].shape)), 1)
                    suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format(
                        true_ids[i], target_id, label_id, prev_id,
                        adv_id, adv_id == target_id,
                        np.sum(np.abs(adv[j] - inputs[j])),
                        np.sum((adv[j] - inputs[j])**2)**.5,
                        np.amax(np.abs(adv[j] - inputs[j])))

                    show(
                        inputs[j:j + 1],
                        str(args['save']) + "/" + str(args['dataset']) + "/" +
                        str(args['attack']) +
                        "/original_{}.png".format(suffix))
                    show(
                        adv[j:j + 1],
                        str(args['save']) + "/" + str(args['dataset']) + "/" +
                        str(args['attack']) +
                        "/adversarial_{}.png".format(suffix))

        print('best_case_L1_mean', np.mean(d_best_l1))
        print('best_case_L2_mean', np.mean(d_best_l2))
        print('best_case_Linf_mean', np.mean(d_best_linf))
        print('best_case_prob', np.mean(r_best))
        print('average_case_L1_mean', np.mean(d_average_l1))
        print('average_case_L2_mean', np.mean(d_average_l2))
        print('average_case_Linf_mean', np.mean(d_average_linf))
        print('average_case_prob', np.mean(r_average))
        print('worst_case_L1_mean', np.mean(d_worst_l1))
        print('worst_case_L2_mean', np.mean(d_worst_l2))
        print('worst_case_Linf_mean', np.mean(d_worst_linf))
        print('worst_case_prob', np.mean(r_worst))
    def load_model(self,
                   dataset="mnist",
                   model_name="2-layer",
                   activation="relu",
                   model=None,
                   batch_size=0,
                   compute_slope=False,
                   order=1):
        """
        model: if set to None, then load dataset with model_name. Otherwise use the model directly.
        dataset: mnist, cifar and imagenet. recommend to use mnist and cifar as a starting point.
        model_name: possible options are 2-layer, distilled, and normal
        """
        from setup_cifar import CIFAR, CIFARModel, TwoLayerCIFARModel
        from setup_mnist import MNIST, MNISTModel, TwoLayerMNISTModel
        from nlayer_model import NLayerModel
        from setup_imagenet import ImageNet, ImageNetModel

        # if set this to true, we will use the logit layer output instead of probability
        # the logit layer's gradients are usually larger and more stable
        output_logits = True
        self.dataset = dataset
        self.model_name = model_name

        if model is None:
            print('Loading model...')
            if dataset == "mnist":
                self.batch_size = 1024
                if model_name == "2-layer":
                    model = TwoLayerMNISTModel("models/mnist_2layer",
                                               self.sess, not output_logits)
                elif model_name == "normal":
                    if activation == "relu":
                        model = MNISTModel("models/mnist", self.sess,
                                           not output_logits)
                    else:
                        print("actviation = {}".format(activation))
                        model = MNISTModel("models/mnist_cnn_7layer_" +
                                           activation,
                                           self.sess,
                                           not output_logits,
                                           activation=activation)
                        time.sleep(5)

                elif model_name == "brelu":
                    model = MNISTModel("models/mnist_brelu",
                                       self.sess,
                                       not output_logits,
                                       use_brelu=True)
                elif model_name == "distilled":
                    model = MNISTModel("models/mnist-distilled-100", self.sess,
                                       not output_logits)
                else:
                    # specify model parameters as N,M,opts
                    model_params = model_name.split(",")
                    if len(model_params) < 3:
                        raise (RuntimeError("incorrect model option" +
                                            model_name))
                    numlayer = int(model_params[0])
                    nhidden = int(model_params[1])
                    modelfile = "models/mnist_{}layer_relu_{}_{}".format(
                        numlayer, nhidden, model_params[2])
                    print("loading", modelfile)
                    model = NLayerModel([nhidden] * (numlayer - 1), modelfile)
            elif dataset == "cifar":
                self.batch_size = 1024
                if model_name == "2-layer":
                    model = TwoLayerCIFARModel("models/cifar_2layer",
                                               self.sess, not output_logits)
                elif model_name == "normal":
                    if activation == "relu":
                        model = CIFARModel("models/cifar", self.sess,
                                           not output_logits)
                    else:
                        model = CIFARModel("models/cifar_cnn_7layer_" +
                                           activation,
                                           self.sess,
                                           not output_logits,
                                           activation=activation)
                elif model_name == "brelu":
                    model = CIFARModel("models/cifar_brelu",
                                       self.sess,
                                       not output_logits,
                                       use_brelu=True)
                elif model_name == "distilled":
                    model = CIFARModel("models/cifar-distilled-100", self.sess,
                                       not output_logits)
                else:
                    # specify model parameters as N,M,opts
                    model_params = model_name.split(",")
                    if len(model_params) < 3:
                        raise (RuntimeError("incorrect model option" +
                                            model_name))
                    numlayer = int(model_params[0])
                    nhidden = int(model_params[1])
                    modelfile = "models/cifar_{}layer_relu_{}_{}".format(
                        numlayer, nhidden, model_params[2])
                    print("loading", modelfile)
                    model = NLayerModel([nhidden] * (numlayer - 1),
                                        modelfile,
                                        image_size=32,
                                        image_channel=3)
            elif dataset == "imagenet":
                self.batch_size = 32
                model = ImageNetModel(self.sess,
                                      use_softmax=not output_logits,
                                      model_name=model_name,
                                      create_prediction=False)
            else:
                raise (RuntimeError("dataset unknown"))

        #print("*** Loaded model successfully")

        self.model = model
        self.compute_slope = compute_slope
        if batch_size != 0:
            self.batch_size = batch_size

        ## placeholders: self.img, self.true_label, self.target_label
        # img is the placeholder for image input
        self.img = tf.placeholder(shape=[
            None, model.image_size, model.image_size, model.num_channels
        ],
                                  dtype=tf.float32)
        # output is the output tensor of the entire network
        self.output = model.predict(self.img)
        # create the graph to compute gradient
        # get the desired true label and target label
        self.true_label = tf.placeholder(dtype=tf.int32, shape=[])
        self.target_label = tf.placeholder(dtype=tf.int32, shape=[])
        true_output = self.output[:, self.true_label]
        target_output = self.output[:, self.target_label]
        # get the difference
        self.objective = true_output - target_output
        # get the gradient(deprecated arguments)
        self.grad_op = tf.gradients(self.objective, self.img)[0]
        # compute gradient norm: (in computation graph, so is faster)
        grad_op_rs = tf.reshape(self.grad_op, (tf.shape(self.grad_op)[0], -1))
        self.grad_2_norm_op = tf.norm(grad_op_rs, axis=1)
        self.grad_1_norm_op = tf.norm(grad_op_rs, ord=1, axis=1)
        self.grad_inf_norm_op = tf.norm(grad_op_rs, ord=np.inf, axis=1)

        ### Lily: added Hessian-vector product calculation here for 2nd order bound:
        if order == 2:
            ## _hessian_vector_product(ys, xs, v): return a list of tensors containing the product between the Hessian and v
            ## ys: a scalar valur or a tensor or a list of tensors to be summed to yield of scalar
            ## xs: a list of tensors that we should construct the Hessian over
            ## v: a list of tensors with the same shape as xs that we want to multiply by the Hessian
            # self.randv: shape = (Nimg,28,28,1) (the v in _hessian_vector_product)
            self.randv = tf.placeholder(shape=[
                None, model.image_size, model.image_size, model.num_channels
            ],
                                        dtype=tf.float32)
            # hv_op_tmp: shape = (Nimg,28,28,1) for mnist, same as self.img (the xs in _hessian_vector_product)
            hv_op_tmp = gradients_impl._hessian_vector_product(
                self.objective, [self.img], [self.randv])[0]
            # hv_op_rs: reshape hv_op_tmp to hv_op_rs whose shape = (Nimg, 784) for mnist
            hv_op_rs = tf.reshape(hv_op_tmp, (tf.shape(hv_op_tmp)[0], -1))
            # self.hv_norm_op: norm of hessian vector product, keep shape = (Nimg,1) using keepdims
            self.hv_norm_op = tf.norm(hv_op_rs, axis=1, keepdims=True)
            # hv_op_rs_normalize: normalize Hv to Hv/||Hv||, shape = (Nimg, 784)
            hv_op_rs_normalize = hv_op_rs / self.hv_norm_op
            # self.hv_op: reshape hv_op_rs_normalize to shape = (Nimg,28,28,1)
            self.hv_op = tf.reshape(hv_op_rs_normalize, tf.shape(hv_op_tmp))

            ## reshape randv and compute its norm
            # shape: (Nimg, 784)
            randv_rs = tf.reshape(self.randv, (tf.shape(self.randv)[0], -1))
            # shape: (Nimg,)
            self.randv_norm_op = tf.norm(randv_rs, axis=1)
            ## compute v'Hv: use un-normalized Hv (hv_op_tmp, hv_op_rs)
            # element-wise multiplication and then sum over axis = 1 (now shape: (Nimg,))
            self.vhv_op = tf.reduce_sum(tf.multiply(randv_rs, hv_op_rs),
                                        axis=1)
            ## compute Rayleigh quotient: v'Hv/v'v (estimated largest eigenvalue), shape: (Nimg,)
            # note: self.vhv_op and self.randv_norm_op has to be in the same dimension (either (Nimg,) or (Nimg,1))
            self.eig_est = self.vhv_op / tf.square(self.randv_norm_op)

            ## Lily added the tf.while to compute the eigenvalue in computational graph later
            # cond for computing largest abs/neg eigen-value
            def cond(it, randv, eig_est, eig_est_prev, tfconst):
                norm_diff = tf.norm(eig_est - eig_est_prev, axis=0)
                return tf.logical_and(it < 500, norm_diff > 0.001)

            # compute largest abs eigenvalue: tfconst = 0
            # compute largest neg eigenvalue: tfconst = 10
            def body(it, randv, eig_est, eig_est_prev, tfconst):
                #hv_op_tmp = gradients_impl._hessian_vector_product(self.objective, [self.img], [randv])[0]-10*randv
                hv_op_tmp = gradients_impl._hessian_vector_product(
                    self.objective, [self.img], [randv])[0] - tf.multiply(
                        tfconst, randv)
                hv_op_rs = tf.reshape(hv_op_tmp, (tf.shape(hv_op_tmp)[0], -1))
                hv_norm_op = tf.norm(hv_op_rs, axis=1, keepdims=True)
                hv_op_rs_normalize = hv_op_rs / hv_norm_op
                hv_op = tf.reshape(hv_op_rs_normalize, tf.shape(hv_op_tmp))

                randv_rs = tf.reshape(randv, (tf.shape(randv)[0], -1))
                randv_norm_op = tf.norm(randv_rs, axis=1)
                vhv_op = tf.reduce_sum(tf.multiply(randv_rs, hv_op_rs), axis=1)
                eig_est_prev = eig_est
                eig_est = vhv_op / tf.square(randv_norm_op)

                return (it + 1, hv_op, eig_est, eig_est_prev, tfconst)

            it = tf.constant(0)
            # compute largest abs eigenvalue
            result = tf.while_loop(
                cond, body,
                [it, self.randv, self.vhv_op, self.eig_est,
                 tf.constant(0.0)])
            # compute largest neg eigenvalue
            self.shiftconst = tf.placeholder(shape=(), dtype=tf.float32)
            result_1 = tf.while_loop(
                cond, body,
                [it, self.randv, self.vhv_op, self.eig_est, self.shiftconst])

            # computing largest abs eig value and save result
            self.it = result[0]
            self.while_hv_op = result[1]
            self.while_eig = result[2]

            # computing largest neg eig value and save result
            self.it_1 = result_1[0]
            #self.while_eig_1 = tf.add(result_1[2], tfconst)
            self.while_eig_1 = tf.add(result_1[2], result_1[4])

            show_tensor_op = False
            if show_tensor_op:
                print("====================")
                print("Define hessian_vector_product operator: ")
                print("hv_op_tmp = {}".format(hv_op_tmp))
                print("hv_op_rs = {}".format(hv_op_rs))
                print("self.hv_norm_op = {}".format(self.hv_norm_op))
                print("hv_op_rs_normalize = {}".format(hv_op_rs_normalize))
                print("self.hv_op = {}".format(self.hv_op))
                print("self.grad_op = {}".format(self.grad_op))
                print("randv_rs = {}".format(randv_rs))
                print("self.randv_norm_op = {}".format(self.randv_norm_op))
                print("self.vhv_op = {}".format(self.vhv_op))
                print("self.eig_est = {}".format(self.eig_est))
                print("====================")

        return self.img, self.output
def main(args):
    with tf.Session() as sess:
        if (args['dataset'] == 'mnist'):
            data = MNIST()
            inception = False
            if (args['adversarial'] != "none"):
                model = MNISTModel(
                    "models/mnist_cw" + str(args['adversarial']), sess)
            elif (args['temp']):
                model = MNISTModel(
                    "models/mnist-distilled-" + str(args['temp']), sess)
            else:
                model = MNISTModel("models/mnist", sess)
        if (args['dataset'] == "cifar"):
            data = CIFAR()
            inception = False
            if (args['adversarial'] != "none"):
                model = CIFARModel(
                    "models/cifar_cw" + str(args['adversarial']), sess)
            elif (args['temp']):
                model = CIFARModel(
                    "models/cifar-distilled-" + str(args['temp']), sess)
            else:
                model = CIFARModel("models/cifar", sess)
        if (args['dataset'] == "imagenet"):
            data, model = ImageNet(args['seed_imagenet'],
                                   2 * args['numimg']), InceptionModel(sess)
            inception = True

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            targeted=not args['untargeted'],
            target_num=args['targetnum'],
            inception=inception,
            train=args['train'],
            seed=args['seed'])
        timestart = time.time()
        if (args['restore_np']):
            if (args['train']):
                adv = np.load(
                    str(args['dataset']) + '_' + str(args['attack']) +
                    '_train.npy')
            else:
                adv = np.load(
                    str(args['dataset']) + '_' + str(args['attack']) + '.npy')
        else:
            if (args['attack'] == 'L2'):
                attack = CarliniL2(sess,
                                   model,
                                   batch_size=args['batch_size'],
                                   max_iterations=args['maxiter'],
                                   confidence=args['conf'],
                                   initial_const=args['init_const'],
                                   binary_search_steps=args['binary_steps'],
                                   targeted=not args['untargeted'],
                                   beta=args['beta'],
                                   abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'L1'):
                attack = EADL1(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'EN'):
                attack = EADEN(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            """If untargeted, pass labels instead of targets"""
            if (args['attack'] == 'FGSM'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=np.inf,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'FGML1'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=1,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'FGML2'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=2,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)

            if (args['attack'] == 'IFGSM'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=np.inf,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'IFGML1'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=1,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'IFGML2'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=2,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)

        timeend = time.time()

        if args['untargeted']:
            num_targets = 1
        else:
            num_targets = args['targetnum']
        print("Took", timeend - timestart, "seconds to run",
              len(inputs) / num_targets, "random instances.")

        if (args['save_np']):
            if (args['train']):
                np.save(str(args['dataset']) + '_labels_train.npy', labels)
                np.save(
                    str(args['dataset']) + '_' + str(args['attack']) +
                    '_train.npy', adv)
            else:
                np.save(
                    str(args['dataset']) + '_' + str(args['attack'] + '.npy'),
                    adv)

        r_best_ = []
        d_best_l1_ = []
        d_best_l2_ = []
        d_best_linf_ = []
        r_average_ = []
        d_average_l1_ = []
        d_average_l2_ = []
        d_average_linf_ = []
        r_worst_ = []
        d_worst_l1_ = []
        d_worst_l2_ = []
        d_worst_linf_ = []

        #Transferability Tests
        model_ = []
        model_.append(model)
        if (args['targetmodel'] != "same"):
            if (args['targetmodel'] == "dd_100"):
                model_.append(MNISTModel("models/mnist-distilled-100", sess))
        num_models = len(model_)

        if (args['show']):
            if not os.path.exists(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack'])):
                os.makedirs(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack']))
        for m, model in enumerate(model_):
            r_best = []
            d_best_l1 = []
            d_best_l2 = []
            d_best_linf = []
            r_average = []
            d_average_l1 = []
            d_average_l2 = []
            d_average_linf = []
            r_worst = []
            d_worst_l1 = []
            d_worst_l2 = []
            d_worst_linf = []
            for i in range(0, len(inputs), num_targets):
                pred = []
                for j in range(i, i + num_targets):
                    if inception:
                        pred.append(
                            np.reshape(model.model.predict(adv[j:j + 1]),
                                       (data.test_labels[0:1].shape)))
                    else:
                        pred.append(model.model.predict(adv[j:j + 1]))

                dist_l1 = 1e10
                dist_l1_index = 1e10
                dist_linf = 1e10
                dist_linf_index = 1e10
                dist_l2 = 1e10
                dist_l2_index = 1e10
                for k, j in enumerate(range(i, i + num_targets)):
                    success = False
                    if (args['untargeted']):
                        if (np.argmax(pred[k], 1) != np.argmax(
                                targets[j:j + 1], 1)):
                            success = True
                    else:
                        if (np.argmax(pred[k],
                                      1) == np.argmax(targets[j:j + 1], 1)):
                            success = True
                    if (success):
                        if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1):
                            dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                            dist_l1_index = j
                        if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf):
                            dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                            dist_linf_index = j
                        if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2):
                            dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                            dist_l2_index = j
                if (dist_l1_index != 1e10):
                    d_best_l2.append((np.sum(
                        (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                    d_best_l1.append(
                        np.sum(
                            np.abs(adv[dist_l1_index] -
                                   inputs[dist_l1_index])))
                    d_best_linf.append(
                        np.amax(
                            np.abs(adv[dist_linf_index] -
                                   inputs[dist_linf_index])))
                    r_best.append(1)
                else:
                    r_best.append(0)

                rand_int = np.random.randint(i, i + num_targets)
                if inception:
                    pred_r = np.reshape(
                        model.model.predict(adv[rand_int:rand_int + 1]),
                        (data.test_labels[0:1].shape))
                else:
                    pred_r = model.model.predict(adv[rand_int:rand_int + 1])
                success_average = False
                if (args['untargeted']):
                    if (np.argmax(pred_r, 1) != np.argmax(
                            targets[rand_int:rand_int + 1], 1)):
                        success_average = True
                else:
                    if (np.argmax(pred_r, 1) == np.argmax(
                            targets[rand_int:rand_int + 1], 1)):
                        success_average = True
                if success_average:
                    r_average.append(1)
                    d_average_l2.append(
                        np.sum((adv[rand_int] - inputs[rand_int])**2)**.5)
                    d_average_l1.append(
                        np.sum(np.abs(adv[rand_int] - inputs[rand_int])))
                    d_average_linf.append(
                        np.amax(np.abs(adv[rand_int] - inputs[rand_int])))

                else:
                    r_average.append(0)

                dist_l1 = 0
                dist_l1_index = 1e10
                dist_linf = 0
                dist_linf_index = 1e10
                dist_l2 = 0
                dist_l2_index = 1e10
                for k, j in enumerate(range(i, i + num_targets)):
                    failure = True
                    if (args['untargeted']):
                        if (np.argmax(pred[k], 1) != np.argmax(
                                targets[j:j + 1], 1)):
                            failure = False
                    else:
                        if (np.argmax(pred[k],
                                      1) == np.argmax(targets[j:j + 1], 1)):
                            failure = False
                    if failure:
                        r_worst.append(0)
                        dist_l1_index = 1e10
                        dist_l2_index = 1e10
                        dist_linf_index = 1e10
                        break
                    else:
                        if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1):
                            dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                            dist_l1_index = j
                        if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf):
                            dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                            dist_linf_index = j
                        if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2):
                            dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                            dist_l2_index = j
                if (dist_l1_index != 1e10):
                    d_worst_l2.append((np.sum(
                        (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                    d_worst_l1.append(
                        np.sum(
                            np.abs(adv[dist_l1_index] -
                                   inputs[dist_l1_index])))
                    d_worst_linf.append(
                        np.amax(
                            np.abs(adv[dist_linf_index] -
                                   inputs[dist_linf_index])))
                    r_worst.append(1)

                if (args['show'] and m == (num_models - 1)):
                    for j in range(i, i + num_targets):
                        target_id = np.argmax(targets[j:j + 1], 1)
                        label_id = np.argmax(labels[j:j + 1], 1)
                        prev_id = np.argmax(
                            np.reshape(model.model.predict(inputs[j:j + 1]),
                                       (data.test_labels[0:1].shape)), 1)
                        adv_id = np.argmax(
                            np.reshape(model.model.predict(adv[j:j + 1]),
                                       (data.test_labels[0:1].shape)), 1)
                        suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format(
                            true_ids[i], target_id, label_id, prev_id, adv_id,
                            adv_id == target_id,
                            np.sum(np.abs(adv[j] - inputs[j])),
                            np.sum((adv[j] - inputs[j])**2)**.5,
                            np.amax(np.abs(adv[j] - inputs[j])))

                        show(
                            inputs[j:j + 1],
                            str(args['save']) + "/" + str(args['dataset']) +
                            "/" + str(args['attack']) +
                            "/original_{}.png".format(suffix))
                        show(
                            adv[j:j + 1],
                            str(args['save']) + "/" + str(args['dataset']) +
                            "/" + str(args['attack']) +
                            "/adversarial_{}.png".format(suffix))
            if (m != (num_models - 1)):
                lbl = "Src_"
                if (num_models > 2):
                    lbl += str(m) + "_"
            else:
                lbl = "Tgt_"
            if (num_targets > 1):
                print(lbl + 'best_case_L1_mean', np.mean(d_best_l1))
                print(lbl + 'best_case_L2_mean', np.mean(d_best_l2))
                print(lbl + 'best_case_Linf_mean', np.mean(d_best_linf))
                print(lbl + 'best_case_prob', np.mean(r_best))
                print(lbl + 'average_case_L1_mean', np.mean(d_average_l1))
                print(lbl + 'average_case_L2_mean', np.mean(d_average_l2))
                print(lbl + 'average_case_Linf_mean', np.mean(d_average_linf))
                print(lbl + 'average_case_prob', np.mean(r_average))
                print(lbl + 'worst_case_L1_mean', np.mean(d_worst_l1))
                print(lbl + 'worst_case_L2_mean', np.mean(d_worst_l2))
                print(lbl + 'worst_case_Linf_mean', np.mean(d_worst_linf))
                print(lbl + 'worst_case_prob', np.mean(r_worst))
            else:
                print(lbl + 'L1_mean', np.mean(d_average_l1))
                print(lbl + 'L2_mean', np.mean(d_average_l2))
                print(lbl + 'Linf_mean', np.mean(d_average_linf))
                print(lbl + 'success_prob', np.mean(r_average))
def run(args, restrict=True):
    if restrict:
        # Restrict the visible GPUs to the one for this subprocess
        id = np.int(multiprocessing.current_process().name.split("-")[1])
        os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1)

    # Load Parameters
    dataset = args[0]
    epsilon = float(args[1])
    mode = args[2]
    K = int(args[3])
    bias = float(args[4])

    fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str(
        K) + "_" + str(bias)

    # Configure Keras/Tensorflow
    Keras.clear_session()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))

    sess = Keras.get_session()
    Keras.set_learning_phase(False)

    # Fix Random Seeds
    np.random.seed(1)
    tf.set_random_seed(
        1
    )  #Having this before keras.clear_session() causes it it hang for some reason

    # Load Model/Data and setup SPSA placeholders
    N = 1000
    if dataset == "MNIST":
        # Base Model
        base_model = MNISTModel("../1-Models/MNIST")
        data = MNIST()
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
        # SPSA
        shape_spsa = (1, 28, 28, 1)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    elif dataset == "CIFAR":
        # Base Model
        base_model = CIFARModel("../1-Models/CIFAR")
        data = CIFAR()
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        # SPSA
        shape_spsa = (1, 32, 32, 3)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    y_spsa = tf.placeholder(tf.int32)

    # Load the hidden representations of the real and adversarial examples from the training set
    x_train_real = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy"))
    x_train_adv = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_adv_" + mode +
                ".npy"))

    n_train = x_train_real.shape[0]
    n_train_adv = x_train_adv.shape[0]
    x_train = np.float32(np.vstack((x_train_real, x_train_adv)))
    #print("Bounds ", np.max(np.abs(x_train)))
    y_train = np.float32(
        np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv))))

    # Create the defended model
    model_defended = DefendedModel(base_model, x_train, y_train, K, bias=bias)
    defended_logits = model_defended.get_logits(x)

    # Get the predictions on the original images
    labels = np.argmax(data.test_labels[:N], axis=1)
    logits_real = sess.run(defended_logits, {x: data.test_data[:N]})
    fp = (np.argmax(logits_real,
                    axis=1) == 10)  #False positives of the defense
    pred_undefended = np.argmax(np.delete(logits_real, -1, axis=1),
                                axis=1)  #Original model prediction

    # Configure the attack
    attack = SPSA(model_defended, back="tf", sess=sess)
    with tf.name_scope("Attack") as scope:
        gen = attack.generate(x_spsa,
                              y_target=y_spsa,
                              epsilon=epsilon,
                              is_targeted=True,
                              num_steps=100,
                              batch_size=2048,
                              early_stop_loss_threshold=-5.0)

    # Run the attack
    pred_adv = -1.0 * np.ones((N, 10))
    for i in range(N):
        if i % 10 == 0:
            print(fname, " ", i)
            out = {}
            out["FP"] = fp
            out["Labels"] = labels
            out["UndefendedPrediction"] = pred_undefended
            out["AdversarialPredictions"] = pred_adv
            file = open(fname, "wb")
            pickle.dump(out, file)
            file.close()

        x_real = data.test_data[i].reshape(shape_spsa)

        # Try a targeted attack for each class other than the original network prediction and the adversarial class
        for y in range(10):
            if y != pred_undefended[i]:
                x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: y})
                pred_adv[i,
                         y] = np.argmax(sess.run(defended_logits, {x: x_adv}))

    out = {}
    out["FP"] = fp
    out["Labels"] = labels
    out["UndefendedPrediction"] = pred_undefended
    out["AdversarialPredictions"] = pred_adv
    file = open(fname, "wb")
    pickle.dump(out, file)
    file.close()

    analysis(fname)
Example #13
0
            inputs.append(data.test_data[start + i])
            targets.append(data.test_labels[start + i])

    inputs = np.array(inputs)
    targets = np.array(targets)

    return inputs, targets


if __name__ == "__main__":

    with tf.Session() as sess:

        data = CIFAR("ORI")

        Model = CIFARModel(restore="Models/CIFAR10_End2End_Trainable",
                           end2end=True)

        attack = CarliniL2(sess,
                           Model,
                           batch_size=9,
                           max_iterations=1000,
                           confidence=0)

        inputs, targets = generate_data(data,
                                        samples=1,
                                        targeted=True,
                                        start=0,
                                        inception=False)

        timestart = time.time()
        adv = attack.attack(inputs, targets)
Example #14
0
def run(args, restrict=True):
    if restrict:
        # Restrict the visible GPUs to the one for this subprocess
        id = np.int(multiprocessing.current_process().name.split("-")[1])
        os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1)

    # Load Parameters
    dataset = args[0]
    epsilon = float(args[1])
    mode = args[2]
    K = int(args[3])

    fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str(K)

    # Configure Keras/Tensorflow
    Keras.clear_session()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))

    sess = Keras.get_session()
    Keras.set_learning_phase(False)

    # Fix Random Seeds
    np.random.seed(1)
    tf.set_random_seed(
        1
    )  #Having this before keras.clear_session() causes it it hang for some reason

    # Load Model/Data and setup SPSA placeholders
    N = 500
    if dataset == "MNIST":
        # Base Model
        base_model = MNISTModel("../1-Models/MNIST")
        data = MNIST()
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
        # SPSA
        shape_spsa = (1, 28, 28, 1)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    elif dataset == "CIFAR":
        # Base Model
        base_model = CIFARModel("../1-Models/CIFAR")
        data = CIFAR()
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        # SPSA
        shape_spsa = (1, 32, 32, 3)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    y_spsa = tf.placeholder(tf.int32)

    # Load the hidden representations of the real and adversarial examples from the training set
    x_train_real = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy"))
    x_train_adv = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_adv_" + mode +
                ".npy"))

    n_train = x_train_real.shape[0]
    n_train_adv = x_train_adv.shape[0]
    x_train = np.float32(np.vstack((x_train_real, x_train_adv)))
    y_train = np.float32(
        np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv))))

    # Create the defended model
    defense = DefendedModel(base_model, x_train, y_train, K)
    get_votes = defense.get_votes(
        x)  # Should this be get_votes, introducing separate method
    get_logits = defense.get_logits(x)

    # Configure the attack
    attack = SPSA(defense, back="tf", sess=sess)
    with tf.name_scope("Attack") as scope:
        gen = attack.generate(x_spsa,
                              y=y_spsa,
                              epsilon=0.01,
                              is_targeted=False,
                              num_steps=100,
                              batch_size=2048,
                              early_stop_loss_threshold=-0.05)

    # Run the test
    sample = np.random.choice(data.test_data.shape[0], N, replace=False)
    x_sample = data.test_data[sample]
    y_sample = np.argmax(data.test_labels[sample], axis=1)

    votes = sess.run(get_votes, {x: x_sample})

    count = 0
    bound = 0
    correct = 0
    for i in range(N):
        if votes[i, 0] > 0:
            count += 1
            # Project via an adversarially attack on the votest
            #x_real = x_sample[i].reshape(shape_spsa)
            #x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: 0}) #TODO: not adv, is projected
            x_proj = sess.run(get_logits, {x: x_sample[i]})
            projection_labels = np.argmax(x_proj, axis=1)
            successful_projections = projection_labels[np.nonzero(
                projection_labels * (projection_labels != 10))]

            # Check if the projection was a success
            if successful_projections.shape[0] != 0:
                bound += 1

            # Check if the projection is predicted correctly
            if y_sample[i] == np.argmax(sess.run(get_logits, {x: x_proj}),
                                        axis=1)[0]:
                correct += 1

    print("FP Count: ", count)
    print("FP Recovery in Bounds: ", bound / count)
    print("FP Recovery Accuracy: ", correct / count)
Example #15
0
            image_dim = 299
            image_channels = 3
            num_labels = 1001
            model = InceptionModel(sess, use_log=True)
        elif FLAGS.model == 'mnist':
            dataset = MNIST()
            model = MNISTModel('models/mnist', sess, use_log=True)
            image_dim = 28
            image_channels = 1
            num_labels = 10
            inputs, targets, reals = utils.generate_data(
                dataset, FLAGS.test_size)
            assert FLAGS.resize_dim is None, 'Dimensionality reduction of noise is used only for ImageNet models'
        elif FLAGS.model == 'cifar':
            dataset = CIFAR()
            model = CIFARModel('models/cifar', sess, use_log=True)
            image_dim = 32
            image_channels = 3
            num_labels = 10
            inputs, targets, reals = utils.generate_data(
                dataset, FLAGS.test_size)
            assert FLAGS.resize_dim is None, 'Dimensionality reduction of noise is used only for ImageNet models'
        else:
            raise ValueError('Incorrect model name provided ({})'.format(
                FLAGS.model))
        test_in = tf.placeholder(tf.float32,
                                 (1, image_dim, image_dim, image_channels),
                                 'x')
        test_pred = tf.argmax(model.predict(test_in), axis=1)

        attack = GenAttack2(model=model,
    targets = np.array(targets)

    return inputs, targets


if __name__ == "__main__":
    with tf.Session() as sess:
        #data, model =  MNIST(), Classifier(sess)
        data = CIFAR10()
        
        # target model
        if sys.argv[1] == 'our':
            model = Classifier(input_shape=data.IMG_SHAPE, session=sess)
            model.restore('../Clf/models/cifar_classifier')
        elif sys.argv[1] == 'orgONLY':
            model = CIFARModel('models/cifar', sess)
        elif sys.argv[1] == 'orgDIS':
            model = CIFARModel('models/cifar-distilled-100', sess)
        else:
            print('Wrong Parameters')
            sys.exit()

        # init attack
        attack = CarliniL2(sess, model, targeted=False, max_iterations=1000, confidence=10, boxmin=0, boxmax=1)

        #inputs, targets = generate_data(data, samples=128, targeted=False, start=0, inception=False)
        inputs = data.X_test[:128]
        targets = data.y_test[:128]

        timestart = time.time()
        adv = attack.attack(inputs, targets)
Example #17
0
def test_cw():
    sess = tf.Session()
    # sess.run(tf.global_variables_initializer())

    # keras maintains a tf session. It must be set by either
    # keras.backend.set_session(sess), or use inside a context manager
    # sess.as_default()
    with sess.as_default():
        data, model = MNIST(), MNISTModel("models/mnist", sess)
    with sess.as_default():
        data, model = CIFAR(), CIFARModel("models/cifar", sess)

    # testing the model
    np.argmax(model.model.predict(data.test_data[:10]), axis=1)
    print(np.argmax(data.test_labels[:10], axis=1))

    #data, model =  CIFAR(), CIFARModel("models/cifar", sess)
    attack_l2 = CarliniL2(sess,
                          model,
                          batch_size=10,
                          max_iterations=1000,
                          confidence=0)
    attack_l0 = CarliniL0(sess,
                          model,
                          max_iterations=1000,
                          initial_const=10,
                          largest_const=15)
    attack_li = CarliniLi(sess, model)

    inputs, targets = generate_data(data,
                                    samples=1,
                                    targeted=True,
                                    start=0,
                                    inception=False)
    # TODO find the first digits of each kind, try map it to the next digit
    inputs, targets = generate_data_2(data)

    adv_l2 = attack_l2.attack(inputs, targets)
    adv_l0 = attack_l0.attack(inputs, targets)
    adv_li = attack_li.attack(inputs, targets)

    plt.tight_layout()
    plt.tight_layout(pad=1, w_pad=1, h_pad=1)

    grid_show_image(inputs, 10, 1, 'images/orig-mnist.png')
    grid_show_image(adv_l2, 10, 1, 'images/l2.png')
    grid_show_image(adv_l0, 10, 1, 'images/l0.png')
    grid_show_image(adv_li, 9, 2, 'images/li.png')

    from contextlib import redirect_stdout
    redirect_stdout

    np.sum((adv_l2[0] - inputs[0])**2)

    # np.argmax(targets, axis=1)
    # import keras
    # keras.backend.set_session(sess)
    np.argmax(model.model.predict(inputs), axis=1)
    np.argmax(targets, axis=1)
    # # (((adv_l2 + 0.5)*255).round())

    np.argmax(model.model.predict(adv_l2), axis=1)
    np.argmax(model.model.predict(adv_l0), axis=1)
    np.argmax(model.model.predict(adv_li), axis=1)

    np.sum(model.model.predict(adv_l2), axis=1)

    np.sum(sess.run(tf.nn.softmax(model.model.predict(adv_l2))), axis=1)

    softmax_pred = sess.run(tf.nn.softmax(model.model.predict(adv_l2)))
    softmax_pred[0]
    np.argmax(softmax_pred, axis=1)

    keras.activations.softmax(model.model)

    model.model.predict(((adv_l2 + 0.5) * 255).round())
Example #18
0
def main(args):
    #   gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)
    with tf.Session() as sess:
        if args['dataset'] == 'mnist':
            data, model = MNIST(), MadryMNISTModel("models/secret/", sess)
            handpick = False
            inception = False
        if args['dataset'] == "cifar":
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            #data, model = CIFAR(), MadryCIFARModel("models/model_0/", sess)
            handpick = True
            inception = False
        if args['dataset'] == "imagenet":
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(
                sess, False)
            handpick = True
            inception = True

        if args['adversarial'] != "none":
            model = MNISTModel(
                "models/mnist_cwl2_admm" + str(args['adversarial']), sess)

        if args['temp'] and args['dataset'] == 'mnist':
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']),
                               sess)
        if args['temp'] and args['dataset'] == 'cifar':
            model = MadryCIFARModel(
                "models/cifar-distilled-" + str(args['temp']), sess)

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            targeted=True,
            target_num=args['target_number'],
            start=0,
            inception=inception,
            handpick=handpick,
            seed=args['seed'])

        #print(true_ids)
        if args['attack'] == 'L2C':
            attack = CarliniL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['binary_steps'],
                               abort_early=args['abort_early'])
        if args['attack'] == 'LiCW':
            attack = CarliniLi(sess,
                               model,
                               max_iterations=args['maxiter'],
                               abort_early=args['abort_early'])

        if args['attack'] == 'L2A':
            attack = ADMML2(sess,
                            model,
                            batch_size=args['batch_size'],
                            max_iterations=args['maxiter'],
                            confidence=args['conf'],
                            binary_search_steps=args['iteration_steps'],
                            ro=args['ro'],
                            abort_early=args['abort_early'])

        if args['attack'] == 'L2AE':
            attack = ADMML2en(sess,
                              model,
                              batch_size=args['batch_size'],
                              max_iterations=args['maxiter'],
                              confidence=args['conf'],
                              binary_search_steps=args['binary_steps'],
                              ro=args['ro'],
                              iteration_steps=args['iteration_steps'],
                              abort_early=args['abort_early'])

        if args['attack'] == 'L2LA':
            attack = LADMML2(sess,
                             model,
                             batch_size=args['batch_size'],
                             max_iterations=args['maxiter'],
                             confidence=args['conf'],
                             binary_search_steps=args['iteration_steps'],
                             ro=args['ro'],
                             abort_early=args['abort_early'])
        if args['attack'] == 'L2LAST':
            attack = LADMMSTL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['iteration_steps'],
                               ro=args['ro'],
                               abort_early=args['abort_early'],
                               retrain=args['retrain'])

        if args['attack'] == 'LiIF':
            attack = IFGM(sess,
                          model,
                          batch_size=args['batch_size'],
                          ord=np.inf,
                          inception=inception)
        if args['attack'] == 'LiF':
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=np.inf,
                         inception=inception)

        if args['attack'] == 'L1':
            attack = EADL1(sess,
                           model,
                           batch_size=args['batch_size'],
                           max_iterations=args['maxiter'],
                           confidence=args['conf'],
                           binary_search_steps=args['binary_steps'],
                           beta=args['beta'],
                           abort_early=args['abort_early'])

        if args['attack'] == 'L1EN':
            attack = EADEN(sess,
                           model,
                           batch_size=args['batch_size'],
                           max_iterations=args['maxiter'],
                           confidence=args['conf'],
                           binary_search_steps=args['binary_steps'],
                           beta=args['beta'],
                           abort_early=args['abort_early'])

        if args['attack'] == 'L1IFGM':
            attack = IFGM(sess,
                          model,
                          batch_size=args['batch_size'],
                          ord=1,
                          inception=inception)
        if args['attack'] == 'L2IFGM':
            attack = IFGM(sess,
                          model,
                          batch_size=args['batch_size'],
                          ord=2,
                          inception=inception)

        if args['attack'] == 'L1FGM':
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=1,
                         inception=inception)
        if args['attack'] == 'L2FGM':
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=2,
                         inception=inception)

        timestart = time.time()
        adv = attack.attack(inputs, targets)
        timeend = time.time()

        print("Took", timeend - timestart, "seconds to run", len(inputs),
              "samples.\n")

        if args['train']:
            np.save('labels_train.npy', labels)
            np.save(str(args['attack']) + '_train.npy', adv)

        #if (args['conf'] != 0):
        #    model = MNISTModel("models/mnist-distilled-100", sess)

        l1_l2_li_computation(args, data, model, adv, inception, inputs,
                             targets, labels, true_ids)
Example #19
0
def main(args):
    with tf.Session() as sess:
        use_log = not args['use_zvalue']
        is_inception = args['dataset'] == "imagenet"
        # load network
        print('Loading model', args['dataset'])
        if args['dataset'] == "mnist":
            data, model = MNIST(), MNISTModel("models/mnist", sess, use_log)
            # data, model =  MNIST(), MNISTModel("models/mnist-distilled-100", sess, use_log)
        elif args['dataset'] == "cifar10":
            data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log)
            # data, model = CIFAR(), CIFARModel("models/cifar-distilled-100", sess, use_log)
        elif args['dataset'] == "imagenet":
            # data, model = ImageNet(), InceptionModel(sess, use_log)
            data, model = ImageNet(), ClarifaiModel(sess, use_log)
        print('Done...')
        if args['numimg'] == 0:
            args['numimg'] = len(data.test_labels) - args['firstimg']
        print('Using', args['numimg'], 'test images')
        # load attack module
        if args['attack'] == "white":
            # batch size 1, optimize on 1 image at a time, rather than optimizing images jointly
            attack = CarliniL2(sess,
                               model,
                               batch_size=1,
                               max_iterations=args['maxiter'],
                               print_every=args['print_every'],
                               early_stop_iters=args['early_stop_iters'],
                               confidence=0,
                               learning_rate=args['lr'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               use_log=use_log,
                               adam_beta1=args['adam_beta1'],
                               adam_beta2=args['adam_beta2'])
        else:
            # batch size 128, optimize on 128 coordinates of a single image
            # swtiched batch size to 8
            attack = BlackBoxL2(sess,
                                model,
                                batch_size=45,
                                max_iterations=args['maxiter'],
                                print_every=args['print_every'],
                                early_stop_iters=args['early_stop_iters'],
                                confidence=0,
                                learning_rate=args['lr'],
                                initial_const=args['init_const'],
                                binary_search_steps=args['binary_steps'],
                                targeted=not args['untargeted'],
                                use_log=use_log,
                                use_tanh=args['use_tanh'],
                                use_resize=args['use_resize'],
                                adam_beta1=args['adam_beta1'],
                                adam_beta2=args['adam_beta2'],
                                reset_adam_after_found=args['reset_adam'],
                                solver=args['solver'],
                                save_ckpts=args['save_ckpts'],
                                load_checkpoint=args['load_ckpt'],
                                start_iter=args['start_iter'],
                                init_size=args['init_size'],
                                use_importance=not args['uniform'])
        random.seed(args['seed'])
        np.random.seed(args['seed'])

        print('Generate data')
        all_inputs, all_targets, all_labels, all_true_ids = generate_data(
            data,
            samples=args['numimg'],
            targeted=not args['untargeted'],
            start=args['firstimg'],
            inception=is_inception)
        #print(type(all_inputs))
        #print(type(all_targets))
        #print(type(all_labels))
        #print(type(all_true_ids))
        #print(all_targets[0])
        #print(all_labels[0])
        #print(type(all_true_ids[0]))

        print('Done...')
        os.system("mkdir -p {}/{}".format(args['save'], args['dataset']))
        img_no = 0
        total_success = 0
        l2_total = 0.0
        for i in range(all_true_ids.size):
            inputs = all_inputs[i:i + 1]
            targets = all_targets[i:i + 1]
            labels = all_labels[i:i + 1]
            #print("true labels:", np.argmax(labels), labels)
            #print("target:", np.argmax(targets), targets)
            # test if the image is correctly classified
            original_predict = model.model.predict(inputs)
            print(original_predict)
            original_predict = np.squeeze(original_predict)
            original_prob = np.sort(original_predict)
            original_class = np.argsort(original_predict)
            print("original probabilities:", original_prob[-1:-6:-1])
            print("original classification:", original_class[-1:-6:-1])
            print("original probabilities (most unlikely):", original_prob[:6])
            print("original classification (most unlikely):",
                  original_class[:6])
            #if original_class[-1] != np.argmax(labels):
            #    print("skip wrongly classified image no. {}, original class {}, classified as {}".format(i, np.argmax(labels), original_class[-1]))
            #    continue

            img_no += 1
            timestart = time.time()
            print(inputs.shape)
            print("shape target", targets.shape)
            print(targets)
            adv, const = attack.attack_batch(inputs, targets, img_no)
            if type(const) is list:
                const = const[0]
            if len(adv.shape) == 3:
                adv = adv.reshape((1, ) + adv.shape)
            timeend = time.time()
            l2_distortion = np.sum((adv - inputs)**2)**.5
            adversarial_predict = model.model.predict(adv)
            adversarial_predict = np.squeeze(adversarial_predict)
            adversarial_prob = np.sort(adversarial_predict)
            adversarial_class = np.argsort(adversarial_predict)
            print("adversarial probabilities:", adversarial_prob[-1:-6:-1])
            print("adversarial classification:", adversarial_class[-1:-6:-1])
            success = False
            if args['untargeted']:
                if adversarial_class[-1] != original_class[-1]:
                    success = True
            else:
                if adversarial_class[-1] == np.argmax(targets):
                    success = True
            if l2_distortion > 20.0:
                success = False
            if success:
                total_success += 1
                l2_total += l2_distortion
            suffix = "id{}_seq{}_prev{}_adv{}_{}_dist{}".format(
                all_true_ids[i], i, original_class[-1], adversarial_class[-1],
                success, l2_distortion)
            print("Saving to", suffix)
            show(
                inputs,
                "{}/{}/{}_original_{}.png".format(args['save'],
                                                  args['dataset'], img_no,
                                                  suffix))
            show(
                adv,
                "{}/{}/{}_adversarial_{}.png".format(args['save'],
                                                     args['dataset'], img_no,
                                                     suffix))
            show(
                adv - inputs,
                "{}/{}/{}_diff_{}.png".format(args['save'], args['dataset'],
                                              img_no, suffix))
            print(
                "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}"
                .format(img_no, i, all_true_ids[i], timeend - timestart,
                        success, const, original_class[-1],
                        adversarial_class[-1], l2_distortion,
                        total_success / float(img_no),
                        0 if total_success == 0 else l2_total / total_success))
            sys.stdout.flush()
Example #20
0
def main(args):

    with tf.Session() as sess:
        if args['dataset'] == 'mnist':
            data, model = MNIST(), MNISTModel("models/mnist", sess)
            handpick = False
            inception = False
        if args['dataset'] == "cifar":
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            handpick = True
            inception = False
        if args['dataset'] == "imagenet":
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess)
            handpick = True
            inception = True

        if args['adversarial'] != "none":
            model = MNISTModel("models/mnist_cwl2_admm" + str(args['adversarial']), sess)

        if args['temp'] and args['dataset'] == 'mnist':
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess)
        if args['temp'] and args['dataset'] == 'cifar':
            model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess)

        inputs, targets, labels, true_ids = generate_data_ST(data, model, samples=args['numimg'],
                                                             samplesT=args['numimgT'], targeted=True,
                                        start=0, inception=inception, handpick=handpick, seed=args['seed'])
        #print(true_ids)
        if args['attack'] == 'L2C':
            attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['binary_steps'],
                               abort_early=args['abort_early'])

        if args['attack'] == 'L2LA2':
            attack = LADMML2re(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'],
                               layernum=args['layer_number'], use_kernel=args['use_kernel'],
                               confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'],
                               abort_early=args['abort_early'])


        timestart = time.time()
        adv = attack.attack(inputs, targets)
        timeend = time.time()

        print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n")

        if args['conf'] != 0:
            model = MNISTModel("models/mnist-distilled-100", sess)

        if args['kernel_bias']:
            EP = evaluate_perturbation_kb(args, sess, model, inputs)
            scores, l2 = EP(inputs, targets, adv)
            EPT = evaluate_perturbation_testset(args, sess, model, data.test_data)
            test_scores = EPT(data.test_data, data.test_labels)
            EP2 = evaluate_perturbation_kb_restore(args, sess, model, inputs)
            scores2 = EP2(inputs, targets, adv)
            EPT2 = evaluate_perturbation_testset(args, sess, model, data.test_data)
            test_scores2 = EPT2(data.test_data, data.test_labels)
        else:
            EP = evaluate_perturbation(args, sess, model, inputs)
#        scores = EP(inputs, targets, adv)
#        scores2 = EP2(inputs, targets, adv)

        score_count = []
        score_count2 = []
        score_count3 = []

        score_count4 = []
        for e, (sc) in enumerate(scores):

            if np.argmax(sc) == np.argmax(targets[e]):
                score_count.append(1)
                if e < args['numimg']:
                    score_count4.append(1)
            else:
                score_count.append(0)
                if e < args['numimg']:
                    score_count4.append(0)

        for e, (sc) in enumerate(scores):
            if np.argmax(sc) == np.argmax(labels[e]):
                score_count3.append(1)
            else:
                score_count3.append(0)

        for e, (sc2) in enumerate(scores2):
            if np.argmax(sc2) == np.argmax(labels[e]):
                score_count2.append(1)
            else:
                score_count2.append(0)

        test_score_count = []
        test_score_count2 = []

        for e, (tsc) in enumerate(test_scores):

            if np.argmax(tsc) == np.argmax(data.test_labels[e]):
                test_score_count.append(1)
            else:
                test_score_count.append(0)

        for e, (tsc2) in enumerate(test_scores2):

            if np.argmax(tsc2) == np.argmax(data.test_labels[e]):
                test_score_count2.append(1)
            else:
                test_score_count2.append(0)

        l0s = np.count_nonzero(adv)
        successrate = np.mean(score_count)
        successrate2 = np.mean(score_count2)
        successrate3 = np.mean(score_count3)
        test_successrate = np.mean(test_score_count)
        test_successrate2 = np.mean(test_score_count2)

        print('original model, success rate of T images for the original labels:', successrate2)
        print('modified model, success rate of T images for the original labels:', successrate3)
        print('modified model, success rate of T images for the target labels:', successrate)
        print('modified model, success rate of S imges for the target labels:', np.mean(score_count4))

        print('modified model, success rate of test set for the original labels:', test_successrate)
        print('original model, success rate of test set for the original labels:', test_successrate2)
        print('l0 distance:', l0s)
        print('l2 distance:', l2)
Example #21
0
def main(args):
    with tf.Session() as sess:
        print("Loading data and classification model: {}".format(
            args["dataset"]))
        if args['dataset'] == "mnist":
            data, model = MNIST(), MNISTModel("models/mnist",
                                              sess,
                                              use_softmax=True)
        elif args['dataset'] == "cifar10":
            data, model = CIFAR(), CIFARModel("models/cifar",
                                              sess,
                                              use_softmax=True)
        elif args['dataset'] == "imagenet":
            # data, model = ImageNet(data_path=args["imagenet_dir"], targetFile=args["attack_single_img"]), InceptionModel(sess, use_softmax=True)
            data, model = ImageNetDataNP(), InceptionModel(sess,
                                                           use_softmax=True)
        # elif args['dataset'] == "imagenet_np":

        if len(data.test_labels) < args["num_img"]:
            raise Exception("No enough data, only have {} but need {}".format(
                len(data.test_labels), args["num_img"]))

        if args["attack_single_img"]:
            # manually setup attack set
            # attacking only one image with random attack]
            orig_img = data.test_data
            orig_labels = data.test_labels
            orig_img_id = np.array([1])

            if args["attack_type"] == "targeted":
                target_labels = [
                    np.eye(model.num_labels)[args["single_img_target_label"]]
                ]
            else:
                target_labels = orig_labels
        else:
            # generate attack set
            if args["dataset"] == "imagenet" or args[
                    "dataset"] == "imagenet_np":
                shift_index = True
            else:
                shift_index = False

        if args["random_target"] and (args["dataset"] == "imagenet"
                                      or args["dataset"] == "imagenet_np"):
            # find all possible class
            all_class = np.unique(np.argmax(data.test_labels, 1))
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                random_target_class=all_class,
                shift_index=shift_index)
        elif args["random_target"]:
            # random target on all possible classes
            class_num = data.test_labels.shape[1]
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                random_target_class=list(range(class_num)),
                shift_index=shift_index)
        else:
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                shift_index=shift_index)

            # check attack data
        # for i in range(len(orig_img_id)):
        #     tar_lab = np.argmax(target_labels[i])
        #     orig_lab = np.argmax(orig_labels[i])
        #     print("{}:, target label:{}, orig_label:{}, orig_img_id:{}".format(i, tar_lab, orig_lab, orig_img_id[i]))

        # attack related settings
        if args["attack_method"] == "zoo" or args[
                "attack_method"] == "autozoom_bilin":
            if args["img_resize"] is None:
                args["img_resize"] = model.image_size
                print(
                    "Argument img_resize is not set and not using autoencoder, set to image original size:{}"
                    .format(args["img_resize"]))

        if args["attack_method"] == "zoo" or args["attack_method"] == "zoo_ae":
            if args["batch_size"] is None:
                args["batch_size"] = 128
                print(
                    "Using zoo or zoo_ae attack, and batch_size is not set.\nSet batch_size to {}."
                    .format(args["batch_size"]))

        else:
            if args["batch_size"] is not None:
                print("Argument batch_size is not used")
                args["batch_size"] = 1  # force to be 1

        if args["attack_method"] == "zoo_ae" or args[
                "attack_method"] == "autozoom_ae":
            #_, decoder = util.load_codec(args["codec_prefix"])
            if args["dataset"] == "mnist" or args["dataset"] == "cifar10":
                codec = CODEC(model.image_size,
                              model.num_channels,
                              args["compress_mode"],
                              use_tanh=False)
            else:
                codec = CODEC(128, model.num_channels, args["compress_mode"])
            print(args["codec_prefix"])
            codec.load_codec(args["codec_prefix"])
            decoder = codec.decoder
            print(decoder.input_shape)
            args["img_resize"] = decoder.input_shape[1]
            print("Using autoencoder, set the attack image size to:{}".format(
                args["img_resize"]))

        # setup attack
        if args["attack_method"] == "zoo":
            blackbox_attack = ZOO(sess, model, args)
        elif args["attack_method"] == "zoo_ae":
            blackbox_attack = ZOO_AE(sess, model, args, decoder)
        elif args["attack_method"] == "autozoom_bilin":
            blackbox_attack = AutoZOOM_BiLIN(sess, model, args)
        elif args["attack_method"] == "autozoom_ae":
            blackbox_attack = AutoZOOM_AE(sess, model, args, decoder, codec)

        save_prefix = os.path.join(args["save_path"], args["dataset"],
                                   args["attack_method"], args["attack_type"])

        os.system("mkdir -p {}".format(save_prefix))

        total_success = 0
        l2_total = 0

        for i in range(all_orig_img_id.size):
            orig_img = all_orig_img[i:i + 1]
            target = all_target_labels[i:i + 1]
            label = all_orig_labels[i:i + 1]

            target_class = np.argmax(target)
            true_class = np.argmax(label)
            test_index = all_orig_img_id[i]

            # print information
            print(
                "[Info][Start]{}: test_index:{}, true label:{}, target label:{}"
                .format(i, test_index, true_class, target_class))
            if args["attack_method"] == "zoo_ae" or args[
                    "attack_method"] == "autozoom_ae":
                #print ae info
                if args["dataset"] == "mnist" or args["dataset"] == "cifar10":
                    temp_img = all_orig_img[i:i + 1]
                else:
                    temp_img = all_orig_img[i]
                    temp_img = (temp_img + 0.5) * 255
                    temp_img = scipy.misc.imresize(temp_img, (128, 128))
                    temp_img = temp_img / 255 - 0.5
                    temp_img = np.expand_dims(temp_img, axis=0)
                encode_img = codec.encoder.predict(temp_img)
                decode_img = codec.decoder.predict(encode_img)
                diff_img = (decode_img - temp_img)
                diff_mse = np.mean(diff_img.reshape(-1)**2)
                print("[Info][AE] MSE:{:.4f}".format(diff_mse))

            timestart = time.time()
            adv_img = blackbox_attack.attack(orig_img, target)
            timeend = time.time()

            if len(adv_img.shape) == 3:
                adv_img = np.expand_dims(adv_img, axis=0)

            l2_dist = np.sum((adv_img - orig_img)**2)**.5
            adv_class = np.argmax(model.model.predict(adv_img))

            success = False
            if args["attack_type"] == "targeted":
                if adv_class == target_class:
                    success = True
            else:
                if adv_class != true_class:
                    success = True

            if success:
                total_success += 1
                l2_total += l2_dist

            print(
                "[Info][End]{}: test_index:{}, true label:{}, adv label:{}, success:{}, distortion:{:.5f}, success_rate:{:.4f}, l2_avg:{:.4f}"
                .format(i, test_index, true_class, adv_class, success, l2_dist,
                        total_success / (i + 1),
                        0 if total_success == 0 else l2_total / total_success))

            # save images
            suffix = "id{}_testIndex{}_true{}_adv{}".format(
                i, test_index, true_class, adv_class)
            # original image
            save_name = os.path.join(save_prefix, "Orig_{}.png".format(suffix))
            util.save_img(orig_img, save_name)
            save_name = os.path.join(save_prefix, "Orig_{}.npy".format(suffix))
            np.save(save_name, orig_img)

            # adv image
            save_name = os.path.join(save_prefix, "Adv_{}.png".format(suffix))
            util.save_img(adv_img, save_name)
            save_name = os.path.join(save_prefix, "Adv_{}.npy".format(suffix))
            np.save(save_name, adv_img)

            # diff image
            save_name = os.path.join(save_prefix, "Diff_{}.png".format(suffix))
            util.save_img((adv_img - orig_img) / 2, save_name)
            save_name = os.path.join(save_prefix, "Diff_{}.npy".format(suffix))
            np.save(save_name, adv_img - orig_img)
Example #22
0

def inverse_func(x):
    return (x + 0.5) * 255


if __name__ == '__main__':
    path = os.getcwd()
    os.chdir('../../../')

    # avd_filename = data_folder + 'adversarial_labels_' + str(images_per_label) + '.pickle'
    # with open(avd_filename, 'rb') as handle:
    #     n = to_categorical(pickle.load(handle), 1000)

    with K.tf.Session() as sess:
        dataset, model =  CIFAR(), CIFARModel(path + '/models/cifar-distilled-80', sess, True).model
        model.trainable = False
        model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc'])
        len_test = len(dataset.test_data)

        p = np.arange(len_test, dtype=int)
        np.random.shuffle(p)
        p = p[:1000]
        data = dataset.test_data[p]
        label = dataset.test_labels[p]
        y_label = np.argmax(label, axis=1)

        r = model.predict(data[:2])

        n = np.random.randint(10, size=(1000,))
        while np.any(n == y_label):
Example #23
0
def run(args, restrict=True):
    if restrict:
        # Restrict the visible GPUs to the one for this subprocess
        id = np.int(multiprocessing.current_process().name.split("-")[1])
        os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1)

    # Load Parameters
    dataset = args[0]
    epsilon = float(args[1])
    mode = args[2]
    K = int(args[3])

    fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str(K)

    # Configure Keras/Tensorflow
    Keras.clear_session()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))

    sess = Keras.get_session()
    Keras.set_learning_phase(False)

    # Fix Random Seeds
    np.random.seed(1)
    tf.set_random_seed(
        1
    )  #Having this before keras.clear_session() causes it it hang for some reason

    # Load Model/Data and setup SPSA placeholders
    N = 50
    if dataset == "MNIST":
        # Base Model
        base_model = MNISTModel("../1-Models/MNIST")
        data = MNIST()
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
        # SPSA
        shape_spsa = (1, 28, 28, 1)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    elif dataset == "CIFAR":
        # Base Model
        base_model = CIFARModel("../1-Models/CIFAR")
        data = CIFAR()
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        # SPSA
        shape_spsa = (1, 32, 32, 3)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    y_spsa = tf.placeholder(tf.int32)

    # Load the hidden representations of the real and adversarial examples from the training set
    x_train_real = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy"))
    x_train_adv = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_adv_" + mode +
                ".npy"))

    n_train = x_train_real.shape[0]
    n_train_adv = x_train_adv.shape[0]
    x_train = np.float32(np.vstack((x_train_real, x_train_adv)))
    #print("Bounds ", np.max(np.abs(x_train)))
    y_train = np.float32(
        np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv))))

    # Create the defended model
    model_defended = DefendedModel(base_model, x_train, y_train, K)
    defended_logits = model_defended.get_logits(x)

    # Configure the attack
    attack = SPSA(model_defended, back="tf", sess=sess)
    with tf.name_scope("Attack") as scope:
        gen = attack.generate(x_spsa,
                              y=y_spsa,
                              epsilon=epsilon,
                              is_targeted=False,
                              num_steps=100,
                              batch_size=2048,
                              early_stop_loss_threshold=-5.0)

    # Run the attack
    f = open(fname + ".txt", "w")

    sample = np.random.choice(data.test_data.shape[0], N, replace=False)
    x_sample = data.test_data[sample]
    y_sample = np.argmax(data.test_labels[sample], axis=1)

    logits_nat = sess.run(defended_logits, {x: x_sample})
    f.write("Accuracy on Natural Images: " +
            str(np.mean(np.argmax(logits_nat, axis=1) == y_sample)) + "\n")

    pred_adv = -1.0 * np.ones((N))
    for i in range(N):
        x_real = x_sample[i].reshape(shape_spsa)
        x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: y_sample[i]})
        pred_adv[i] = np.argmax(sess.run(defended_logits, {x: x_adv}))

    f.write("Accuracy on Adversarial Images: " +
            str(np.mean(pred_adv == y_sample)))
    f.close()
Example #24
0
    def load_model(self,
                   dataset="mnist",
                   model_name="2-layer",
                   model=None,
                   batch_size=0,
                   compute_slope=False):
        """
        model: if set to None, then load dataset with model_name. Otherwise use the model directly.
        dataset: mnist, cifar and imagenet. recommend to use mnist and cifar as a starting point.
        model_name: possible options are 2-layer, distilled, and normal
        """
        import tensorflow as tf
        from setup_cifar import CIFAR, CIFARModel, TwoLayerCIFARModel
        from setup_mnist import MNIST, MNISTModel, TwoLayerMNISTModel
        from setup_imagenet import ImageNet, ImageNetModel

        # if set this to true, we will use the logit layer output instead of probability
        # the logit layer's gradients are usually larger and more stable
        output_logits = True
        self.dataset = dataset
        self.model_name = model_name

        if model is None:
            print('Loading model...')
            if dataset == "mnist":
                self.batch_size = 1024
                if model_name == "2-layer":
                    model = TwoLayerMNISTModel("models/mnist_2layer",
                                               self.sess, not output_logits)
                elif model_name == "normal":
                    model = MNISTModel("models/mnist", self.sess,
                                       not output_logits)
                elif model_name == "brelu":
                    model = MNISTModel("models/mnist_brelu",
                                       self.sess,
                                       not output_logits,
                                       use_brelu=True)
                elif model_name == "distilled":
                    model = MNISTModel("models/mnist-distilled-100", self.sess,
                                       not output_logits)
                else:
                    raise (RuntimeError("incorrect model option"))
            elif dataset == "cifar":
                self.batch_size = 1024
                if model_name == "2-layer":
                    model = TwoLayerCIFARModel("models/cifar_2layer",
                                               self.sess, not output_logits)
                elif model_name == "normal":
                    model = CIFARModel("models/cifar", self.sess,
                                       not output_logits)
                elif model_name == "brelu":
                    model = CIFARModel("models/cifar_brelu",
                                       self.sess,
                                       not output_logits,
                                       use_brelu=True)
                elif model_name == "distilled":
                    model = CIFARModel("models/cifar-distilled-100", self.sess,
                                       not output_logits)
                else:
                    raise (RuntimeError("incorrect model option"))
            elif dataset == "imagenet":
                self.batch_size = 32
                model = ImageNetModel(self.sess,
                                      use_softmax=not output_logits,
                                      model_name=model_name,
                                      create_prediction=False)
            else:
                raise (RuntimeError("dataset unknown"))

        self.model = model
        self.compute_slope = compute_slope
        if batch_size != 0:
            self.batch_size = batch_size

        # img is the placeholder for image input
        self.img = tf.placeholder(shape=[
            None, model.image_size, model.image_size, model.num_channels
        ],
                                  dtype=tf.float32)
        # output is the output tensor of the entire network
        self.output = model.predict(self.img)
        # create the graph to compute gradient
        # get the desired true label and target label
        self.true_label = tf.placeholder(dtype=tf.int32, shape=[])
        self.target_label = tf.placeholder(dtype=tf.int32, shape=[])
        true_output = self.output[:, self.true_label]
        target_output = self.output[:, self.target_label]
        # get the different
        self.objective = true_output - target_output
        # get the gradient
        self.grad_op = tf.gradients(self.objective, self.img)[0]
        # compute gradient norm
        grad_op_rs = tf.reshape(self.grad_op, (tf.shape(self.grad_op)[0], -1))
        self.grad_2_norm_op = tf.norm(grad_op_rs, axis=1)
        self.grad_1_norm_op = tf.norm(grad_op_rs, ord=1, axis=1)
        self.grad_inf_norm_op = tf.norm(grad_op_rs, ord=np.inf, axis=1)

        return self.img, self.output
Example #25
0
def main(args):
    with tf.Session() as sess:

        random.seed(SEED)
        np.random.seed(SEED)
        tf.set_random_seed(SEED)

        image_id_set = np.random.choice(range(1000),
                                        args["image_number"] * 3,
                                        replace=False)
        #image_id_set = np.random.randint(1, 1000, args["image_number"] )
        arg_max_iter = args['maxiter']  ### max number of iterations
        arg_init_const = args[
            'init_const']  ### regularization prior to attack loss
        arg_kappa = args['kappa']  ### attack confidence level
        arg_q = args['q']  ### number of random direction vectors
        arg_mode = args['mode']  ### algorithm name
        arg_save_iteration = args['save_iteration']
        arg_Dataset = args["dataset"]
        arg_targeted_attack = args["targeted_attack"]
        arg_bsz = args["mini_batch_sz"]
        idx_lr = args["lr_idx"]

        ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5]
        if (arg_Dataset == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess, True)
        elif (arg_Dataset == 'cifar10'):
            data, model = CIFAR(), CIFARModel("models/cifar", sess, True)
        elif (arg_Dataset == 'imagenet'):
            data, model = ImageNet(SEED), InceptionModel(sess, True)
        else:
            print('Please specify a valid dataset')

        succ_count, ii, iii = 0, 0, 0
        final_distortion_count,first_iteration_count, first_distortion_count = [], [], []
        while iii < args["image_number"]:
            ii = ii + 1
            image_id = image_id_set[ii]

            # if image_id!= 836: continue # for test only

            orig_prob, orig_class, orig_prob_str = util.model_prediction(
                model, np.expand_dims(data.test_data[image_id],
                                      axis=0))  ## orig_class: predicted label;

            if arg_targeted_attack:  ### target attack
                target_label = np.remainder(orig_class + 1, 10)
            else:
                target_label = orig_class

            orig_img, target = util.generate_data(data, image_id, target_label)
            # shape of orig_img is (1,28,28,1) in [-0.5, 0.5]

            true_label_list = np.argmax(data.test_labels, axis=1)
            true_label = true_label_list[image_id]

            print("Image ID:{}, infer label:{}, true label:{}".format(
                image_id, orig_class, true_label))
            if true_label != orig_class:
                print(
                    "True Label is different from the original prediction, pass!"
                )
                continue
            else:
                iii = iii + 1

            print('\n', iii, '/', args["image_number"])

            ##  parameter
            d = orig_img.size  # feature dim
            print("dimension = ", d)

            # mu=1/d**2  # smoothing parameter
            q = arg_q + 0
            I = arg_max_iter + 0
            kappa = arg_kappa + 0
            const = arg_init_const + 0

            ## flatten image to vec
            orig_img_vec = np.resize(orig_img, (1, d))
            delta_adv = np.zeros((1, d))  ### initialized adv. perturbation
            #delta_adv = np.random.uniform(-16/255,16/255,(1,d))

            ## w adv image initialization
            if args["constraint"] == 'uncons':
                # * 0.999999 to avoid +-0.5 return +-infinity
                w_ori_img_vec = np.arctanh(
                    2 * (orig_img_vec) * 0.999999
                )  # in real value, note that orig_img_vec in [-0.5, 0.5]
                w_img_vec = np.arctanh(
                    2 * (np.clip(orig_img_vec + delta_adv, -0.5, 0.5)) *
                    0.999999)
            else:
                w_ori_img_vec = orig_img_vec.copy()
                w_img_vec = np.clip(w_ori_img_vec + delta_adv, -0.5, 0.5)

            # ## test ##
            # for test_value in w_ori_img_vec[0, :]:
            #     if np.isnan(test_value) or np.isinf(test_value):
            #         print(test_value)

            # initialize the best solution & best loss
            best_adv_img = []  # successful adv image in [-0.5, 0.5]
            best_delta = []  # best perturbation
            best_distortion = (0.5 * d)**2  # threshold for best perturbation
            total_loss = np.zeros(I)  ## I: max iters
            l2s_loss_all = np.zeros(I)
            attack_flag = False
            first_flag = True  ## record first successful attack

            # parameter setting for ZO gradient estimation
            mu = args["mu"]  ### smoothing parameter

            ## learning rate
            base_lr = args["lr"]

            if arg_mode == "ZOAdaMM":
                ## parameter initialization for AdaMM
                v_init = 1e-7  #0.00001
                v_hat = v_init * np.ones((1, d))
                v = v_init * np.ones((1, d))

                m = np.zeros((1, d))
                # momentum parameter for first and second order moment
                beta_1 = 0.9
                beta_2 = 0.9  # only used by AMSGrad
                print(beta_1, beta_2)

            #for i in tqdm(range(I)):
            for i in range(I):

                if args["decay_lr"]:
                    base_lr = args["lr"] / np.sqrt(i + 1)

                ## Total loss evaluation
                if args["constraint"] == 'uncons':
                    total_loss[i], l2s_loss_all[
                        i] = function_evaluation_uncons(
                            w_img_vec, kappa, target_label, const, model,
                            orig_img, arg_targeted_attack)

                else:
                    total_loss[i], l2s_loss_all[i] = function_evaluation_cons(
                        w_img_vec, kappa, target_label, const, model, orig_img,
                        arg_targeted_attack)

                ## gradient estimation w.r.t. w_img_vec
                if arg_mode == "ZOSCD":
                    grad_est = grad_coord_estimation(mu, q, w_img_vec, d,
                                                     kappa, target_label,
                                                     const, model, orig_img,
                                                     arg_targeted_attack,
                                                     args["constraint"])
                elif arg_mode == "ZONES":
                    grad_est = gradient_estimation_NES(mu, q, w_img_vec, d,
                                                       kappa, target_label,
                                                       const, model, orig_img,
                                                       arg_targeted_attack,
                                                       args["constraint"])
                else:
                    grad_est = gradient_estimation_v2(mu, q, w_img_vec, d,
                                                      kappa, target_label,
                                                      const, model, orig_img,
                                                      arg_targeted_attack,
                                                      args["constraint"])

                # if np.remainder(i,50)==0:
                # print("total loss:",total_loss[i])
                # print(np.linalg.norm(grad_est, np.inf))

                ## ZO-Attack, unconstrained optimization formulation
                if arg_mode == "ZOSGD":
                    delta_adv = delta_adv - base_lr * grad_est
                if arg_mode == "ZOsignSGD":
                    delta_adv = delta_adv - base_lr * np.sign(grad_est)
                if arg_mode == "ZOSCD":
                    delta_adv = delta_adv - base_lr * grad_est
                if arg_mode == "ZOAdaMM":
                    m = beta_1 * m + (1 - beta_1) * grad_est
                    v = beta_2 * v + (1 - beta_2) * np.square(grad_est)  ### vt
                    v_hat = np.maximum(v_hat, v)
                    #print(np.mean(v_hat))
                    delta_adv = delta_adv - base_lr * m / np.sqrt(v_hat)
                    if args["constraint"] == 'cons':
                        tmp = delta_adv.copy()
                        #X_temp = orig_img_vec.reshape((-1,1))
                        #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10))
                        V_temp = np.sqrt(v_hat.reshape(1, -1))
                        delta_adv = projection_box(tmp, orig_img_vec, V_temp,
                                                   -0.5, 0.5)
                        #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5)
                    # v_init = 1e-2 #0.00001
                    # v = v_init * np.ones((1, d))
                    # m = np.zeros((1, d))
                    # # momentum parameter for first and second order moment
                    # beta_1 = 0.9
                    # beta_2 = 0.99  # only used by AMSGrad
                    # m = beta_1 * m + (1-beta_1) * grad_est
                    # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v)
                    # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10)
                    # if args["constraint"] == 'cons':
                    #     V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10))
                    #     X_temp = orig_img_vec.reshape((-1,1))
                    #     delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5)
                if arg_mode == "ZOSMD":
                    delta_adv = delta_adv - 0.5 * base_lr * grad_est
                    # delta_adv = delta_adv - base_lr* grad_est
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X_temp = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)
                if arg_mode == "ZOPSGD":
                    delta_adv = delta_adv - base_lr * grad_est
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X_temp = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)
                if arg_mode == "ZONES":
                    delta_adv = delta_adv - base_lr * np.sign(grad_est)
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)

                # if arg_mode == "ZO-AdaFom":
                #     m = beta_1 * m + (1-beta_1) * grad_est
                #     v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1)
                #     w_img_vec = w_img_vec - base_lr * m/np.sqrt(v)
                ##

                ### adv. example update
                w_img_vec = w_ori_img_vec + delta_adv

                ## covert back to adv_img in [-0.5 , 0.5]
                if args["constraint"] == 'uncons':
                    adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999  #
                else:
                    adv_img_vec = w_img_vec.copy()

                adv_img = np.resize(adv_img_vec, orig_img.shape)

                ## update the best solution in the iterations
                attack_prob, _, _ = util.model_prediction(model, adv_img)
                target_prob = attack_prob[0, target_label]
                attack_prob_tmp = attack_prob.copy()
                attack_prob_tmp[0, target_label] = 0
                other_prob = np.amax(attack_prob_tmp)

                if args["print_iteration"]:
                    if np.remainder(i + 1, 1) == 0:
                        if true_label != np.argmax(attack_prob):
                            print(
                                "Iter %d (Succ): ID = %d, lr = %3.5f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d"
                                % (i + 1, image_id, args["lr"],
                                   int(args["decay_lr"]), arg_mode,
                                   args["constraint"], total_loss[i],
                                   l2s_loss_all[i], true_label,
                                   np.argmax(attack_prob)))
                        else:
                            print(
                                "Iter %d (Fail): ID = %d, lr = %3.6f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d"
                                % (i + 1, image_id, args["lr"],
                                   int(args["decay_lr"]), arg_mode,
                                   args["constraint"], total_loss[i],
                                   l2s_loss_all[i], true_label,
                                   np.argmax(attack_prob)))

                if arg_save_iteration:
                    os.system("mkdir Examples")
                    if (np.logical_or(
                            true_label != np.argmax(attack_prob),
                            np.remainder(i + 1,
                                         10) == 0)):  ## every 10 iterations
                        suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format(
                            image_id, arg_mode, true_label,
                            np.argmax(attack_prob), i + 1)
                        # util.save_img(adv_img, "Examples/{}.png".format(suffix))

                if arg_targeted_attack:
                    if (np.log(target_prob + 1e-10) -
                            np.log(other_prob + 1e-10) >=
                            kappa):  # check attack confidence
                        if (distortion(adv_img, orig_img) <
                                best_distortion):  # check distortion
                            # print('best distortion obtained at',i,'-th iteration')
                            best_adv_img = adv_img
                            best_distortion = distortion(adv_img, orig_img)
                            best_delta = adv_img - orig_img
                            best_iteration = i + 1
                            adv_class = np.argmax(attack_prob)
                            attack_flag = True
                            ## Record first attack
                            if (first_flag):
                                first_flag = False  ### once gets into this, it will no longer record the next sucessful attack
                                first_adv_img = adv_img
                                first_distortion = distortion(
                                    adv_img, orig_img)
                                first_delta = adv_img - orig_img
                                first_class = adv_class
                                first_iteration = i + 1
                else:
                    if (np.log(other_prob + 1e-10) -
                            np.log(target_prob + 1e-10) >=
                            kappa):  # check attack confidence
                        if (distortion(adv_img, orig_img) <
                                best_distortion):  # check distortion
                            # print('best distortion obtained at',i,'-th iteration')
                            best_adv_img = adv_img
                            best_distortion = distortion(adv_img, orig_img)
                            best_delta = adv_img - orig_img
                            best_iteration = i + 1
                            adv_class = np.argmax(attack_prob)
                            attack_flag = True
                            ## Record first attack
                            if (first_flag):
                                first_flag = False
                                first_adv_img = adv_img
                                first_distortion = distortion(
                                    adv_img, orig_img)
                                first_delta = adv_img - orig_img
                                first_class = adv_class
                                first_iteration = i + 1

            if (attack_flag):
                # os.system("mkdir Results_SL")
                # ## best attack (final attack)
                # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, orig_class) ## orig_class, predicted label
                # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, adv_class)
                # suffix3 = "id_{}_Mode_{}".format(image_id, arg_mode)
                # ### save original image
                # util.save_img(orig_img, "Results_SL/id_{}.png".format(image_id))
                # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix))
                # ### adv. image
                # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2))
                # ### adv. perturbation
                # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3))
                #
                #
                # ## first attack
                # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, first_class)
                # ## first adv. imag
                # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4))
                # ### first adv. perturbation
                # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3))

                ## save data
                succ_count = succ_count + 1
                final_distortion_count.append(l2s_loss_all[-1])
                first_distortion_count.append(first_distortion)
                first_iteration_count.append(first_iteration)
                suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format(
                    image_id, arg_mode, args["constraint"], args["lr"],
                    int(args["decay_lr"]), args["exp_code"])
                np.savez("{}".format(suffix0),
                         id=image_id,
                         mode=arg_mode,
                         loss=total_loss,
                         perturbation=l2s_loss_all,
                         best_distortion=best_distortion,
                         first_distortion=first_distortion,
                         first_iteration=first_iteration,
                         best_iteation=best_iteration,
                         learn_rate=args["lr"],
                         decay_lr=args["decay_lr"],
                         attack_flag=attack_flag)
                ## print
                print("It takes {} iteations to find the first attack".format(
                    first_iteration))
                # print(total_loss)
            else:
                ## save data
                suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format(
                    image_id, arg_mode, args["constraint"], args["lr"],
                    int(args["decay_lr"]), args["exp_code"])
                np.savez("{}".format(suffix0),
                         id=image_id,
                         mode=arg_mode,
                         loss=total_loss,
                         perturbation=l2s_loss_all,
                         best_distortion=best_distortion,
                         learn_rate=args["lr"],
                         decay_lr=args["decay_lr"],
                         attack_flag=attack_flag)
                print("Attack Fails")

            sys.stdout.flush()
    print('succ rate:', succ_count / args["image_number"])
    print('average first success l2', np.mean(first_distortion_count))
    print('average first itrs', np.mean(first_iteration_count))
    print('average l2:', np.mean(final_distortion_count), ' best l2:',
          np.min(final_distortion_count), ' worst l2:',
          np.max(final_distortion_count))
Example #26
0
def main(args):
    #   gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)
    with tf.Session() as sess:
        if args['dataset'] == 'mnist':
            data, model = MNIST(), MNISTModel("models/mnist", sess)
            handpick = False
            inception = False
        if args['dataset'] == "cifar":
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            handpick = True
            inception = False
        if args['dataset'] == "imagenet":
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess)
            handpick = True
            inception = True

        if args['adversarial'] != "none":
            model = MNISTModel(
                "models/mnist_cwl2_admm" + str(args['adversarial']), sess)

        if args['temp'] and args['dataset'] == 'mnist':
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']),
                               sess)
        if args['temp'] and args['dataset'] == 'cifar':
            model = CIFARModel("models/cifar-distilled-" + str(args['temp']),
                               sess)

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            targeted=args['targeted'],
            start=0,
            inception=inception,
            handpick=handpick,
            seed=args['seed'])

        #print(true_ids)
        if args['attack'] == 'L2C':
            attack = CarliniL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               targeted=args['targeted'],
                               binary_search_steps=args['binary_steps'],
                               abort_early=args['abort_early'])

        if args['attack'] == 'L2BB':
            # score-based ZO-ADMM attack
            attack = LADMMBB(sess,
                             model,
                             batch_size=args['batch_size'],
                             max_iterations=args['maxiter'],
                             targeted=args['targeted'],
                             confidence=args['conf'],
                             binary_search_steps=args['iteration_steps'],
                             ro=args['ro'],
                             abort_early=args['abort_early'],
                             gama=args['gama'],
                             epi=args['epi'],
                             alpha=args['alpha'])

        timestart = time.time()
        #    adv = attack.attack(inputs, targets)
        adv, querycount, queryl2 = attack.attack(inputs, targets)
        timeend = time.time()
        print("Took", timeend - timestart, "seconds to run", len(inputs),
              "samples.\n")

        if args['train']:
            np.save('labels_train.npy', labels)
            np.save(str(args['attack']) + '_train.npy', adv)

        if (args['conf'] != 0):
            model = MNISTModel("models/mnist-distilled-100", sess)

        if args['targeted']:
            l1_l2_li_computation(args, data, model, adv, inception, inputs,
                                 targets, labels, true_ids, querycount,
                                 queryl2)
        else:
            l2_computation(args, data, model, adv, inception, inputs, targets,
                           labels, true_ids, querycount, queryl2)
Example #27
0
##
## This program is licenced under the BSD 2-Clause licence,
## contained in the LICENCE file in this directory.

from setup_cifar import CIFAR, CIFARModel
from setup_mnist import MNIST, MNISTModel
from setup_inception import ImageNet, InceptionModel

import tensorflow as tf
import numpy as np

BATCH_SIZE = 1

with tf.Session() as sess:
    data, model = MNIST(), MNISTModel("models/mnist", sess)
    data, model = CIFAR(), CIFARModel("models/cifar", sess)
    data, model = ImageNet(), InceptionModel(sess)

    x = tf.placeholder(
        tf.float32,
        (None, model.image_size, model.image_size, model.num_channels))
    y = model.predict(x)

    r = []
    for i in range(0, len(data.test_data), BATCH_SIZE):
        pred = sess.run(y, {x: data.test_data[i:i + BATCH_SIZE]})
        #print(pred)
        #print('real',data.test_labels[i],'pred',np.argmax(pred))
        r.append(
            np.argmax(pred, 1) == np.argmax(data.test_labels[i:i +
                                                             BATCH_SIZE], 1))
config.gpu_options.allow_growth=True
set_session(tf.Session(config=config))

sess = Keras.get_session()
Keras.set_learning_phase(False)

np.random.seed(1)
tf.set_random_seed(1)

if dataset == "MNIST":
    data = MNIST()
    model = MNISTModel("../1-Models/MNIST")
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
elif dataset == "CIFAR":
    data = CIFAR()
    model = CIFARModel("../1-Models/CIFAR")
    x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))

training_accuracy = np.mean(np.argmax(model.model.predict(data.train_data), axis = 1) == np.argmax(data.train_labels, axis = 1))
print("Training Accuracy: " + str(training_accuracy))
testing_accuracy = np.mean(np.argmax(model.model.predict(data.test_data), axis = 1) == np.argmax(data.test_labels, axis = 1))
print("Testing Accuracy: " + str(testing_accuracy))

X = data.train_data
X_adv = np.load("../2-AEs/" + dataset + "/train_" + mode + ".npy")

pred_original = model.model.predict(X)
pred_adv = model.model.predict(X_adv)
print("Adversarial Success Rate: " + str(1 - np.mean(np.argmax(pred_original) == np.argmax(pred_adv))))

delta = X - X_adv
Example #29
0
def main(args):
    with tf.Session() as sess:

        random.seed(SEED)
        np.random.seed(SEED)
        tf.set_random_seed(SEED)

        class_id = args['class_id']  ### input image (natural example)
        target_id = args[
            'target_id']  ### target images id (adv example) if target attack
        arg_max_iter = args['maxiter']  ### max number of iterations
        arg_init_const = args[
            'init_const']  ### regularization prior to attack loss
        arg_kappa = args['kappa']  ### attack confidence level
        arg_q = args['q']  ### number of random direction vectors
        arg_mode = args['mode']  ### algorithm name
        arg_save_iteration = args['save_iteration']
        arg_Dataset = args["dataset"]
        arg_targeted_attack = args["targeted_attack"]
        arg_bsz = args["mini_batch_sz"]
        idx_lr = args["lr_idx"]

        ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5]
        if (arg_Dataset == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess, True)
        elif (arg_Dataset == 'cifar10'):
            data, model = CIFAR(), CIFARModel("models/cifar", sess, True)
        elif (arg_Dataset == 'imagenet'):
            data, model = ImageNet_Universal(SEED), InceptionModel(sess, True)
            #model = InceptionModel(sess, True)
        else:
            print('Please specify a valid dataset')

        #orig_img = np.load('ori_img_backup.npy')
        orig_img = data.test_data[np.where(
            np.argmax(data.test_labels, 1) == class_id)]
        #np.save('ori_img_backup',orig_img)

        #true_label = data.test_labels[np.where(np.argmax(data.test_labels,1) == class_id)]
        _, orig_class = util.model_prediction_u(
            model, orig_img[:30]
        )  # take 30 or less images to make sure arg_bsz number of them are valid

        # filter out the images which misclassified already
        orig_img = orig_img[np.where(orig_class == class_id)]
        if orig_img.shape[0] < arg_bsz:
            assert 'no enough valid inputs'

        orig_img = orig_img[:arg_bsz]

        np.save('original_imgsID' + str(class_id), orig_img)
        #true_label = np.zeros((arg_bsz, 1001))
        #true_label[np.arange(arg_bsz), class_id] = 1
        true_label = class_id

        if arg_targeted_attack:  ### target attack
            #target_label = np.zeros((arg_bsz, 1001))
            #target_label[np.arange(arg_bsz), target_id] = 1
            target_label = target_id
        else:
            target_label = true_label

        #orig_img, target = util.generate_data(data, class_id, target_label)
        # shape of orig_img is (1,28,28,1) in [-0.5, 0.5]

        ##  parameter
        if orig_img.ndim == 3 or orig_img.shape[0] == 1:
            d = orig_img.size  # feature dim
        else:
            d = orig_img[0].size
        print("dimension = ", d)

        # mu=1/d**2  # smoothing parameter
        q = arg_q + 0
        I = arg_max_iter + 0
        kappa = arg_kappa + 0
        const = arg_init_const + 0

        ## flatten image to vec
        orig_img_vec = np.resize(orig_img, (arg_bsz, d))

        ## w adv image initialization
        if args["constraint"] == 'uncons':
            # * 0.999999 to avoid +-0.5 return +-infinity
            w_ori_img_vec = np.arctanh(
                2 * (orig_img_vec) * 0.999999
            )  # in real value, note that orig_img_vec in [-0.5, 0.5]
            w_img_vec = w_ori_img_vec.copy()
        else:
            w_ori_img_vec = orig_img_vec.copy()
            w_img_vec = w_ori_img_vec.copy()

        # ## test ##
        # for test_value in w_ori_img_vec[0, :]:
        #     if np.isnan(test_value) or np.isinf(test_value):
        #         print(test_value)

        delta_adv = np.zeros((1, d))  ### initialized adv. perturbation

        # initialize the best solution & best loss
        best_adv_img = []  # successful adv image in [-0.5, 0.5]
        best_delta = []  # best perturbation
        best_distortion = (0.5 * d)**2  # threshold for best perturbation
        total_loss = np.zeros(I)  ## I: max iters
        l2s_loss_all = np.zeros(I)
        attack_flag = False
        first_flag = True  ## record first successful attack

        # parameter setting for ZO gradient estimation
        mu = args["mu"]  ### smoothing parameter

        ## learning rate
        base_lr = args["lr"]

        if arg_mode == "ZOAdaMM":
            ## parameter initialization for AdaMM
            v_init = 1e-7  #0.00001
            v_hat = v_init * np.ones((1, d))
            v = v_init * np.ones((1, d))

            m = np.zeros((1, d))
            # momentum parameter for first and second order moment
            beta_1 = 0.9
            beta_2 = 0.3  # only used by AMSGrad
            print(beta_1, beta_2)

        #for i in tqdm(range(I)):
        for i in range(I):

            if args["decay_lr"]:
                base_lr = args["lr"] / np.sqrt(i + 1)

            ## Total loss evaluation
            if args["constraint"] == 'uncons':
                total_loss[i], l2s_loss_all[i] = function_evaluation_uncons(
                    w_img_vec, kappa, target_label, const, model, orig_img,
                    arg_targeted_attack)

            else:
                total_loss[i], l2s_loss_all[i] = function_evaluation_cons(
                    w_img_vec, kappa, target_label, const, model, orig_img,
                    arg_targeted_attack)

            ## gradient estimation w.r.t. w_img_vec
            if arg_mode == "ZOSCD":
                grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa,
                                                 target_label, const, model,
                                                 orig_img, arg_targeted_attack,
                                                 args["constraint"])
            elif arg_mode == "ZONES":
                grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa,
                                                   target_label, const, model,
                                                   orig_img,
                                                   arg_targeted_attack,
                                                   args["constraint"])
            else:
                grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa,
                                                  target_label, const, model,
                                                  orig_img,
                                                  arg_targeted_attack,
                                                  args["constraint"])

            # if np.remainder(i,50)==0:
            # print("total loss:",total_loss[i])
            # print(np.linalg.norm(grad_est, np.inf))

            ## ZO-Attack, unconstrained optimization formulation
            if arg_mode == "ZOSGD":
                delta_adv = delta_adv - base_lr * grad_est
            if arg_mode == "ZOsignSGD":
                delta_adv = delta_adv - base_lr * np.sign(grad_est)
            if arg_mode == "ZOSCD":
                delta_adv = delta_adv - base_lr * grad_est
            if arg_mode == "ZOAdaMM":
                m = beta_1 * m + (1 - beta_1) * grad_est
                v = beta_2 * v + (1 - beta_2) * np.square(grad_est)  ### vt
                #print(np.mean(np.abs(m)),np.mean(np.sqrt(v)))
                v_hat = np.maximum(v_hat, v)
                delta_adv = delta_adv - base_lr * m / np.sqrt(v)
                if args["constraint"] == 'cons':
                    tmp = delta_adv.copy()
                    #X_temp = orig_img_vec.reshape((-1,1))
                    #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10))
                    V_temp = np.sqrt(v_hat.reshape(1, -1))
                    delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5,
                                               0.5)
                    #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5)
                # v_init = 1e-2 #0.00001
                # v = v_init * np.ones((1, d))
                # m = np.zeros((1, d))
                # # momentum parameter for first and second order moment
                # beta_1 = 0.9
                # beta_2 = 0.99  # only used by AMSGrad
                # m = beta_1 * m + (1-beta_1) * grad_est
                # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v)
                # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10)
                # if args["constraint"] == 'cons':
                #     V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10))
                #     X_temp = orig_img_vec.reshape((-1,1))
                #     delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5)
            if arg_mode == "ZOSMD":
                delta_adv = delta_adv - 0.5 * base_lr * grad_est
                # delta_adv = delta_adv - base_lr* grad_est
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X_temp = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)
            if arg_mode == "ZOPSGD":
                delta_adv = delta_adv - base_lr * grad_est
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X_temp = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)
            if arg_mode == "ZONES":
                delta_adv = delta_adv - base_lr * np.sign(grad_est)
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)

            # if arg_mode == "ZO-AdaFom":
            #     m = beta_1 * m + (1-beta_1) * grad_est
            #     v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1)
            #     w_img_vec = w_img_vec - base_lr * m/np.sqrt(v)
            ##

            ### adv. example update
            w_img_vec = w_ori_img_vec + delta_adv

            ## covert back to adv_img in [-0.5 , 0.5]
            if args["constraint"] == 'uncons':
                adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999  #
            else:
                adv_img_vec = w_img_vec.copy()

            adv_img = np.resize(adv_img_vec, orig_img.shape)

            ## update the best solution in the iterations
            attack_prob, _, _ = util.model_prediction(model, adv_img)
            target_prob = attack_prob[:, target_label]
            attack_prob_tmp = attack_prob.copy()
            attack_prob_tmp[:, target_label] = 0
            other_prob = np.amax(attack_prob_tmp, 1)

            if i % 1000 == 0 and i != 0:
                if arg_mode == "ZOAdaMM": print(beta_1, beta_2)
                print("save delta_adv")
                np.save(
                    'retimgs/' + str(i) + 'itrs' +
                    str(np.argmax(attack_prob, 1)) + arg_mode +
                    str(args["lr"]), delta_adv)

            if args["print_iteration"]:
                if np.remainder(i + 1, 20) == 0:
                    if (true_label != np.argmax(attack_prob, 1)).all():
                        print(
                            "Iter %d (Succ): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s"
                            % (i + 1, class_id, args["lr"],
                               int(args["decay_lr"]), arg_mode,
                               args["constraint"], total_loss[i],
                               l2s_loss_all[i], true_label,
                               np.argmax(attack_prob, 1)))
                    else:
                        sr = np.sum(
                            true_label != np.argmax(attack_prob, 1)) / arg_bsz
                        print(
                            "Iter %d (Fail): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s, succ rate = %.2f"
                            % (i + 1, class_id, args["lr"],
                               int(args["decay_lr"]), arg_mode,
                               args["constraint"], total_loss[i],
                               l2s_loss_all[i], true_label,
                               np.argmax(attack_prob, 1), sr))

            if arg_save_iteration:
                os.system("mkdir Examples")
                if (np.logical_or(
                        true_label != np.argmax(attack_prob, 1),
                        np.remainder(i + 1, 10) == 0)):  ## every 10 iterations
                    suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format(
                        class_id, arg_mode, true_label,
                        np.argmax(attack_prob, 1), i + 1)
                    # util.save_img(adv_img, "Examples/{}.png".format(suffix))

            if arg_targeted_attack:
                if ((np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10))
                        >= kappa).all():  # check attack confidence
                    if (distortion(adv_img, orig_img) <
                            best_distortion):  # check distortion
                        # print('best distortion obtained at',i,'-th iteration')
                        best_adv_img = adv_img
                        best_distortion = distortion(adv_img, orig_img)
                        #best_delta = adv_img - orig_img
                        best_iteration = i + 1
                        adv_class = np.argmax(attack_prob, 1)
                        attack_flag = True
                        ## Record first attack
                        if (first_flag):
                            first_flag = False  ### once gets into this, it will no longer record the next sucessful attack
                            first_adv_img = adv_img
                            first_distortion = distortion(adv_img, orig_img)
                            #first_delta = adv_img - orig_img
                            first_class = adv_class
                            first_iteration = i + 1
            else:
                if ((np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10))
                        >= kappa).all():  # check attack confidence
                    if (distortion(adv_img, orig_img) <
                            best_distortion):  # check distortion
                        # print('best distortion obtained at',i,'-th iteration')
                        best_adv_img = adv_img
                        best_distortion = distortion(adv_img, orig_img)
                        #best_delta = adv_img - orig_img
                        best_iteration = i + 1
                        adv_class = np.argmax(attack_prob, 1)
                        attack_flag = True
                        ## Record first attack
                        if (first_flag):
                            first_flag = False
                            first_adv_img = adv_img
                            first_distortion = distortion(adv_img, orig_img)
                            #first_delta = adv_img - orig_img
                            first_class = adv_class
                            first_iteration = i + 1

        if (attack_flag):
            # os.system("mkdir Results_SL")
            # ## best attack (final attack)
            # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, orig_class) ## orig_class, predicted label
            # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, adv_class)
            # suffix3 = "id_{}_Mode_{}".format(class_id, arg_mode)
            # ### save original image
            # util.save_img(orig_img, "Results_SL/id_{}.png".format(class_id))
            # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix))
            # ### adv. image
            # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2))
            # ### adv. perturbation
            # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3))
            #
            #
            # ## first attack
            # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, first_class)
            # ## first adv. imag
            # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4))
            # ### first adv. perturbation
            # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3))

            ## save data
            suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format(
                class_id, arg_mode, args["constraint"], str(args["lr"]),
                int(args["decay_lr"]), args["exp_code"], args["init_const"])
            np.savez("{}".format(suffix0),
                     id=class_id,
                     mode=arg_mode,
                     loss=total_loss,
                     perturbation=l2s_loss_all,
                     best_distortion=best_distortion,
                     first_distortion=first_distortion,
                     first_iteration=first_iteration,
                     best_iteation=best_iteration,
                     learn_rate=args["lr"],
                     decay_lr=args["decay_lr"],
                     attack_flag=attack_flag)
            ## print
            print("It takes {} iteations to find the first attack".format(
                first_iteration))
            # print(total_loss)
        else:
            ## save data
            suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format(
                class_id, arg_mode, args["constraint"], str(args["lr"]),
                int(args["decay_lr"]), args["exp_code"], args["init_const"])
            np.savez("{}".format(suffix0),
                     id=class_id,
                     mode=arg_mode,
                     loss=total_loss,
                     perturbation=l2s_loss_all,
                     best_distortion=best_distortion,
                     learn_rate=args["lr"],
                     decay_lr=args["decay_lr"],
                     attack_flag=attack_flag)
            print("Attack Fails")

        sys.stdout.flush()
Example #30
0
    targets = np.array(targets)
    labels = np.array(labels)
    return inputs, targets, labels


if __name__ == "__main__":
    dataset = "mnist"  #"cifar"
    Targeted = True
    Iterations = 500
    with tf.Session() as sess:
        if dataset == "mnist":
            data = MNIST()
            model = MNISTModel("/models/mnist", sess)
        elif dataset == "cifar":
            data = CIFAR()
            model = CIFARModel("models/cifar", sess)
        else:
            raise Exception("Invalid dataset!", dataset)

        # attack = CarliniL0(sess, model,targeted = Targeted,max_iterations=1000)
        # attack = Leastpixel_attack(sess, model, max_iterations=1000)
        attack = CarliniL0_batch(sess,
                                 model,
                                 targeted=Targeted,
                                 max_iterations=1000)
        # attack = LPA_attack(sess, model, max_iterations=2000, targeted=Targeted)
        # attack = LPA_attack2(sess, model, max_iterations=4000,targeted=Targeted)
        # attack = LPA_attack_batch(sess, model, max_iterations=2000,targeted=Targeted)
        if Targeted:
            length = 10
        else: