def main(_):
    with tf.Session() as sess:
        K.set_session(sess)
        if FLAGS.dataset == 'MNIST':
            data, model =  MNIST(), MNISTModel("models/mnist", sess)
        elif FLAGS.datset == 'Cifar':
            data, model =  CIFAR(), CIFARModel("models/cifar", sess)


        def _model_fn(x, logits=False):
            ybar, logits_ = model.predict(x)
            if logits:
                return ybar, logits_
            return ybar

        
        if FLAGS.dataset == 'MNIST':
            x_adv = fgsm(_model_fn, x, epochs=9, eps=0.02)
        elif FLAGS.datset == 'Cifar':
            x_adv = fgsm(_model_fn, x, epochs=4, eps=0.01)

        X_adv_test = attack(x_adv, data.test_data, data.test_labels, sess)
        X_adv_train = attack(x_adv, data.train_data, data.train_labels, sess)

        np.save('adversarial_outputs/fgsm_train_' + FLAGS.dataset.lower() + '.npy', X_adv_train)
        np.save('adversarial_outputs/fgsm_test_' + FLAGS.dataset.lower() + '.npy', X_adv_test)
        print("Legit/Adversarial training set")
        model.evaluate(data.train_data, data.train_labels)
        model.evaluate(X_adv_train, data.train_labels)
        
        print("Legit/Adversarial test set")
        model.evaluate(data.test_data, data.test_labels)
        model.evaluate(X_adv_test, data.test_labels)
Example #2
0
def main():

    batch_size = 128
    tag = "GBP_0"

    model = CIFARModel().model  # pure resnet
    data = CIFAR(tag)

    sgd = SGD(lr=0.00, momentum=0.9, nesterov=False)
    schedule = LearningRateScheduler(get_lr)

    model.compile(loss=fn, optimizer=sgd, metrics=['accuracy'])

    datagen = ImageDataGenerator(rotation_range=10,
                                 width_shift_range=0.1,
                                 height_shift_range=0.1,
                                 horizontal_flip=True)

    datagen.fit(data.train_data)

    model.fit_generator(datagen.flow(data.train_data,
                                     data.train_labels,
                                     batch_size=batch_size),
                        steps_per_epoch=data.train_data.shape[0] // batch_size,
                        epochs=300,
                        verbose=1,
                        validation_data=(data.test_data, data.test_labels),
                        callbacks=[schedule])

    model.save_weights('Models/{}'.format(tag))
def load_model_and_dataset(dataset):
    if dataset == 'mnist':
        import mnist_NiN_bn
        model = mnist_NiN_bn.NiN_Model()
        saver = tf.train.Saver()
        checkpoint = tf.train.latest_checkpoint(
            '/home/bull/home/zmn/insight/sparse-imperceivable-attacks-master/models/mnist_NiN/'
        )
        saver.restore(sess, checkpoint)
        data = MNIST()
    elif dataset == "mnist2":
        import mnist_model
        model = mnist_model.MNISTModel()
        data = MNIST()
    elif dataset == 'cifar10':
        import cifar_NiN_bn
        model = cifar_NiN_bn.NiN_Model()
        saver = tf.train.Saver()
        checkpoint = tf.train.latest_checkpoint(
            '/home/bull/home/zmn/insight/sparse-imperceivable-attacks-master/models/cifar_NiN/'
        )
        saver.restore(sess, checkpoint)

        data = CIFAR()
    else:
        raise ValueError('unknown dataset')

    return model, data
def ATTACK(attack,dataset,first_index,settype, last_index, batch_size):
    """
    Applies the saliency map attack against the specified model.

    Parameters
    ----------
    model: str
        The name of the model used.
    attack: str
        The type of used attack (either "jsma", "wjsma" or "tjsma").
    set_type: str
        The type of set used (either "train" or "test").
    first_index:
        The index of the first image attacked.
    last_index: int
        The index of the last image attacked.
    batch_size: int
        The size of the image batches.
    """

    if dataset == 'mnist':
        from cleverhans.dataset import MNIST

        x_set, y_set = MNIST(train_start=0, train_end=60000, test_start=0, test_end=10000).get_set(settype)
        print(x_set.shape)
        gamma = 0.155
        file_path="/models/mnist"
    #elif model in CIFAR10_SETS:
    else:
        #from cleverhans.dataset import CIFAR10
        #x_set, y_set = CIFAR10(train_start=0, train_end=50000, test_start=0, test_end=10000).get_set(settype)
        #gamma = 0.155
        from setup_cifar import CIFAR
        data = CIFAR()
        x_set,y_set = data.test_data,data.test_labels
        print(x_set.shape)
        print(y_set)
        gamma = 0.155
        file_path="./Least_pixel_attack/models/cifar"
    #else:
    #    raise ValueError("Invalid model: " + model)

    generate_attacks(
        save_path="./Least_pixel_attack/models/data",
        file_path=file_path,
        dataset = dataset,
        x_set=x_set,
        y_set=y_set,
        attack=attack,
        gamma=gamma,
        first_index=first_index,
        last_index=last_index,
        batch_size=batch_size
    )
Example #5
0
def main(args):
    if not os.path.isdir('models'):
        os.makedirs('models')

    if args['dataset'] == "mnist" or args['dataset'] == "all":
        train(MNIST(),
              "models/mnist", [32, 32, 64, 64, 200, 200],
              num_epochs=50)
    if args['dataset'] == 'cifar' or args['dataset'] == 'all':
        train(CIFAR(),
              "models/cifar", [64, 64, 128, 128, 256, 256],
              num_epochs=50)
def main(args):
    # load data
    print("Loading data", args["dataset"])
    if args["dataset"] == "mnist":
        data = MNIST()
        if args["train_data_source"]:
            print("Using data from {}".format(args["train_data_source"]))
            img = np.load("{}_data.npy".format(args["train_data_source"]))
            labels = np.load("{}_data.npy".format(args["train_data_source"]))
            data.validation_data = img
            data.validation_labels = labels

    elif args["dataset"] == "cifar10":
        data = CIFAR()
    elif args["dataset"] == "fe":
        data = FACIAL()
    elif args["dataset"] == "imagenet":
        # use ImageDataGenerate provided by Keras
        data = ImageNetDataGen(args["imagenet_train_dir"],
                               args["imagenet_validation_dir"],
                               data_augmentation=False)

    print("Done...")

    if args["dataset"] == "imagenet":
        data_shape = (None, 299, 299, 3)
        resize = 256
    else:
        data_shape = data.train_data.shape
        resize = None

    print("Start training autoencoder")
    codec = CODEC(img_size=data_shape[1],
                  num_channels=data_shape[3],
                  compress_mode=args["compress_mode"],
                  resize=resize)
    train_autoencoder(data,
                      codec,
                      batch_size=args["batch_size"],
                      epochs=args["epochs"],
                      saveFilePrefix=args["save_prefix"],
                      train_imagenet=(args["dataset"] == "imagenet"))
Example #7
0
def cw_attack(file_name, norm, sess, num_image=10, cifar = False, tinyimagenet = False):
    np.random.seed(1215)
    tf.set_random_seed(1215)
    random.seed(1215)
    if norm == '1':
        attack = EADL1
        norm_fn = lambda x: np.sum(np.abs(x),axis=(1,2,3))
    elif norm == '2':
        attack = CarliniL2
        norm_fn = lambda x: np.sum(x**2,axis=(1,2,3))
    elif norm == 'i':
        attack = CarliniLi
        norm_fn = lambda x: np.max(np.abs(x),axis=(1,2,3))

    if cifar:
        data = CIFAR()
    elif tinyimagenet:
        data = tinyImagenet()
    else:
        data = MNIST()
    model = load_model(file_name, custom_objects={'fn':loss,'tf':tf, 'ResidualStart' : ResidualStart, 'ResidualStart2' : ResidualStart2})
    inputs, targets, true_labels, true_ids, img_info = generate_data(data, samples=num_image, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.predict, start=0)
    model.predict = model
    model.num_labels = 10
    if cifar:
        model.image_size = 32
        model.num_channels = 3
    elif tinyimagenet:
        model.image_size = 64
        model.num_channels = 3
        model.num_labels = 200
    else:
        model.image_size = 28
        model.num_channels = 1
        
    
    start_time = timer.time()
    attack = attack(sess, model, max_iterations = 1000)
    perturbed_input = attack.attack(inputs, targets)
    UB = np.average(norm_fn(perturbed_input-inputs))
    return UB, (timer.time()-start_time)/len(inputs)
Example #8
0
def main():

    tags = ["GBP_0", "GBP_1", "GBP_2", "GBP_3", "GBP_4", "ORI"]

    model = prepare_resnet(load_weights='random', sess=None, num_classes=100)

    for tag in tags:

        data = CIFAR(tag)

        print(
            'Accuracy on {} - Training : '.format(tag),
            np.mean(
                np.argmax(model.predict(data.train_data), axis=1) == np.argmax(
                    data.train_labels, axis=1)))

        print(
            'Accuracy on {} - Testing : '.format(tag),
            np.mean(
                np.argmax(model.predict(data.test_data), axis=1) == np.argmax(
                    data.test_labels, axis=1)))
Example #9
0
def convert(file_name, new_name, cifar=False):
    if not cifar:
        eq_weights, new_params = get_weights(file_name)
        data = MNIST()
    else:
        eq_weights, new_params = get_weights(file_name, inp_shape=(32, 32, 3))
        data = CIFAR()
    model = Sequential()
    model.add(Flatten(input_shape=data.train_data.shape[1:]))
    for param in new_params:
        model.add(Dense(param))
        model.add(Lambda(lambda x: tf.nn.relu(x)))
    model.add(Dense(10))

    for i in range(len(eq_weights)):
        try:
            print(eq_weights[i][0].shape)
        except:
            pass
        model.layers[i].set_weights(eq_weights[i])

    sgd = SGD(lr=0.01, decay=1e-5, momentum=0.9, nesterov=True)

    model.compile(loss=fn, optimizer=sgd, metrics=['accuracy'])

    model.save(new_name)
    acc = model.evaluate(data.validation_data, data.validation_labels)[1]
    printlog("Converting CNN to MLP")
    nlayer = file_name.split('_')[-3][0]
    filters = file_name.split('_')[-2]
    kernel_size = file_name.split('_')[-1]
    printlog(
        "model name = {0}, numlayer = {1}, filters = {2}, kernel size = {3}".
        format(file_name, nlayer, filters, kernel_size))
    printlog("Model accuracy: {:.3f}".format(acc))
    printlog("-----------------------------------")
    return acc
Example #10
0
                targets.append(np.eye(data.test_labels.shape[1])[j])
        else:
            inputs.append(data.test_data[start + i])
            targets.append(data.test_labels[start + i])

    inputs = np.array(inputs)
    targets = np.array(targets)

    return inputs, targets


if __name__ == "__main__":

    with tf.Session() as sess:

        data = CIFAR("ORI")

        Model = CIFARModel(restore="Models/CIFAR10_End2End_Trainable",
                           end2end=True)

        attack = CarliniL2(sess,
                           Model,
                           batch_size=9,
                           max_iterations=1000,
                           confidence=0)

        inputs, targets = generate_data(data,
                                        samples=1,
                                        targeted=True,
                                        start=0,
                                        inception=False)
            r = np.random.random_integers(0,9)
        l[i,r] = 1
    return l

def attack(data, name):
    sess = K.get_session()
    model = load_model("models/"+name, custom_objects={'fn': fn})
    class Wrap:
        image_size = 28 if "mnist" in name else 32
        num_labels = 10
        num_channels = 1 if "mnist" in name else 3
        def predict(self, x):
            return model(x)
    attack = CarliniL2(sess, Wrap(), batch_size=100,
                       max_iterations=10000, binary_search_steps=5,
                       initial_const=1, targeted=True)
    adv = attack.attack(data.test_data[:100],
                        get_labs(data.test_labels[:100]))
    np.save("/tmp/"+name, adv)
    print(np.mean(np.sum((adv-data.test_data[:100])**2,axis=(1,2,3))**.5))
    
attack(MNIST(), "mnist")
attack(MNIST(), "mnist_brelu")
attack(MNIST(), "mnist_gaussian")
attack(MNIST(), "mnist_gaussian_brelu")

attack(CIFAR(), "cifar")
attack(CIFAR(), "cifar_brelu")
attack(CIFAR(), "cifar_gaussian")
attack(CIFAR(), "cifar_gaussian_brelu")
Example #12
0
def run(args, restrict=True):
    if restrict:
        # Restrict the visible GPUs to the one for this subprocess
        id = np.int(multiprocessing.current_process().name.split("-")[1])
        os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1)

    # Load Parameters
    dataset = args[0]
    epsilon = float(args[1])
    mode = args[2]
    K = int(args[3])

    fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str(K)

    # Configure Keras/Tensorflow
    Keras.clear_session()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))

    sess = Keras.get_session()
    Keras.set_learning_phase(False)

    # Fix Random Seeds
    np.random.seed(1)
    tf.set_random_seed(
        1
    )  #Having this before keras.clear_session() causes it it hang for some reason

    # Load Model/Data and setup SPSA placeholders
    N = 50
    if dataset == "MNIST":
        # Base Model
        base_model = MNISTModel("../1-Models/MNIST")
        data = MNIST()
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
        # SPSA
        shape_spsa = (1, 28, 28, 1)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    elif dataset == "CIFAR":
        # Base Model
        base_model = CIFARModel("../1-Models/CIFAR")
        data = CIFAR()
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        # SPSA
        shape_spsa = (1, 32, 32, 3)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    y_spsa = tf.placeholder(tf.int32)

    # Load the hidden representations of the real and adversarial examples from the training set
    x_train_real = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy"))
    x_train_adv = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_adv_" + mode +
                ".npy"))

    n_train = x_train_real.shape[0]
    n_train_adv = x_train_adv.shape[0]
    x_train = np.float32(np.vstack((x_train_real, x_train_adv)))
    #print("Bounds ", np.max(np.abs(x_train)))
    y_train = np.float32(
        np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv))))

    # Create the defended model
    model_defended = DefendedModel(base_model, x_train, y_train, K)
    defended_logits = model_defended.get_logits(x)

    # Configure the attack
    attack = SPSA(model_defended, back="tf", sess=sess)
    with tf.name_scope("Attack") as scope:
        gen = attack.generate(x_spsa,
                              y=y_spsa,
                              epsilon=epsilon,
                              is_targeted=False,
                              num_steps=100,
                              batch_size=2048,
                              early_stop_loss_threshold=-5.0)

    # Run the attack
    f = open(fname + ".txt", "w")

    sample = np.random.choice(data.test_data.shape[0], N, replace=False)
    x_sample = data.test_data[sample]
    y_sample = np.argmax(data.test_labels[sample], axis=1)

    logits_nat = sess.run(defended_logits, {x: x_sample})
    f.write("Accuracy on Natural Images: " +
            str(np.mean(np.argmax(logits_nat, axis=1) == y_sample)) + "\n")

    pred_adv = -1.0 * np.ones((N))
    for i in range(N):
        x_real = x_sample[i].reshape(shape_spsa)
        x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: y_sample[i]})
        pred_adv[i] = np.argmax(sess.run(defended_logits, {x: x_adv}))

    f.write("Accuracy on Adversarial Images: " +
            str(np.mean(pred_adv == y_sample)))
    f.close()
Example #13
0
def main(args):
    temp_encoder = encoder(level=args['level'])
    with tf.Session() as sess:
        use_log = not args['use_zvalue']
        is_inception = args['dataset'] == "imagenet"
        # load network
        print('Loading model', args['dataset'])
        if args['dataset'] == "mnist":
            data, model = MNIST(), MNISTModel("models/mnist", sess, use_log)
            # data, model =  MNIST(), MNISTModel("models/mnist-distilled-100", sess, use_log)
        elif args['dataset'] == "cifar10":
            #data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log)
            # data, model = CIFAR(), CIFARModel("models/cifar-distilled-100", sess, use_log)
            data, model = CIFAR(), CIFAR_WIDE("models/wide_resnet", sess,
                                              use_log)
        elif args['dataset'] == "imagenet":
            data, model = ImageNet(), InceptionModel(sess, use_log)
        print('Done...')
        if args['numimg'] == 0:
            args['numimg'] = len(data.test_labels) - args['firstimg']
        print('Using', args['numimg'], 'test images')
        # load attack module
        if args['attack'] == "white":
            # batch size 1, optimize on 1 image at a time, rather than optimizing images jointly
            attack = CarliniL2(sess,
                               model,
                               batch_size=1,
                               max_iterations=args['maxiter'],
                               print_every=args['print_every'],
                               early_stop_iters=args['early_stop_iters'],
                               confidence=0,
                               learning_rate=args['lr'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               use_log=use_log,
                               adam_beta1=args['adam_beta1'],
                               adam_beta2=args['adam_beta2'])
        else:
            # batch size 128, optimize on 128 coordinates of a single image
            attack = BlackBoxL2(sess,
                                model,
                                batch_size=128,
                                max_iterations=args['maxiter'],
                                print_every=args['print_every'],
                                early_stop_iters=args['early_stop_iters'],
                                confidence=0,
                                learning_rate=args['lr'],
                                initial_const=args['init_const'],
                                binary_search_steps=args['binary_steps'],
                                targeted=not args['untargeted'],
                                use_log=use_log,
                                use_tanh=args['use_tanh'],
                                use_resize=args['use_resize'],
                                adam_beta1=args['adam_beta1'],
                                adam_beta2=args['adam_beta2'],
                                reset_adam_after_found=args['reset_adam'],
                                solver=args['solver'],
                                save_ckpts=args['save_ckpts'],
                                load_checkpoint=args['load_ckpt'],
                                start_iter=args['start_iter'],
                                init_size=args['init_size'],
                                use_importance=not args['uniform'])

        random.seed(args['seed'])
        np.random.seed(args['seed'])
        print('Generate data')
        all_inputs, all_targets, all_labels, all_true_ids, encoding_all = generate_data(
            data,
            samples=args['numimg'],
            targeted=not args['untargeted'],
            start=args['firstimg'],
            inception=is_inception)
        print('Done...')
        #print('all_inputs : ', all_inputs.shape)
        #print('encoding_all : ',encoding_all.shape)
        os.system("mkdir -p {}/{}".format(args['save'], args['dataset']))
        img_no = 0
        total_success = 0
        l2_total = 0.0
        origin_correct = 0
        adv_correct = 0
        for i in range(all_true_ids.size):
            print(' adversarial_image_no: ', i)
            inputs = all_inputs[i:i + 1]
            encoding_inputs = encoding_all[i:i + 1]
            #print('encoding_inputs shape: ', encoding_inputs)
            targets = all_targets[i:i + 1]
            labels = all_labels[i:i + 1]
            print("true labels:", np.argmax(labels), labels)
            print("target:", np.argmax(targets), targets)
            # test if the image is correctly classified
            original_predict = model.model.predict(encoding_inputs)
            original_predict = np.squeeze(original_predict)
            original_prob = np.sort(original_predict)
            original_class = np.argsort(original_predict)
            print("original probabilities:", original_prob[-1:-6:-1])
            print("original classification:", original_class[-1:-6:-1])
            print("original probabilities (most unlikely):", original_prob[:6])
            print("original classification (most unlikely):",
                  original_class[:6])
            if original_class[-1] != np.argmax(labels):
                print(
                    "skip wrongly classified image no. {}, original class {}, classified as {}"
                    .format(i, np.argmax(labels), original_class[-1]))
                continue
            origin_correct += np.argmax(labels, 1) == original_class[-1]

            img_no += 1
            timestart = time.time()
            adv, const = attack.attack_batch(inputs, targets)
            if type(const) is list:
                const = const[0]
            if len(adv.shape) == 3:
                adv = adv.reshape((1, ) + adv.shape)
            timeend = time.time()
            l2_distortion = np.sum((adv - inputs)**2)**.5

            ##### llj
            encode_adv = np.transpose(adv, axes=(0, 3, 1, 2))
            channel0, channel1, channel2 = encode_adv[:,
                                                      0, :, :], encode_adv[:,
                                                                           1, :, :], encode_adv[:,
                                                                                                2, :, :]
            channel0, channel1, channel2 = temp_encoder.tempencoding(
                channel0), temp_encoder.tempencoding(
                    channel1), temp_encoder.tempencoding(channel2)
            encode_adv = np.concatenate([channel0, channel1, channel2], axis=1)
            encode_adv = np.transpose(encode_adv, axes=(0, 2, 3, 1))

            #### llj
            adversarial_predict = model.model.predict(encode_adv)
            adversarial_predict = np.squeeze(adversarial_predict)
            adversarial_prob = np.sort(adversarial_predict)
            adversarial_class = np.argsort(adversarial_predict)
            print("adversarial probabilities:", adversarial_prob[-1:-6:-1])
            print("adversarial classification:", adversarial_class[-1:-6:-1])

            adv_correct += np.argmax(labels, 1) == adversarial_class[-1]

            success = False
            if args['untargeted']:
                if adversarial_class[-1] != original_class[-1]:
                    success = True
            else:
                if adversarial_class[-1] == np.argmax(targets):
                    success = True
            if l2_distortion > 20.0:
                success = False
            if success:
                total_success += 1
                l2_total += l2_distortion
            suffix = "id{}_seq{}_prev{}_adv{}_{}_dist{}".format(
                all_true_ids[i], i, original_class[-1], adversarial_class[-1],
                success, l2_distortion)
            print("Saving to", suffix)
            show(
                inputs,
                "{}/{}/{}_original_{}.png".format(args['save'],
                                                  args['dataset'], img_no,
                                                  suffix))
            show(
                adv,
                "{}/{}/{}_adversarial_{}.png".format(args['save'],
                                                     args['dataset'], img_no,
                                                     suffix))
            show(
                adv - inputs,
                "{}/{}/{}_diff_{}.png".format(args['save'], args['dataset'],
                                              img_no, suffix))
            print(
                "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}"
                .format(img_no, i, all_true_ids[i], timeend - timestart,
                        success, const, original_class[-1],
                        adversarial_class[-1], l2_distortion,
                        total_success / float(img_no),
                        0 if total_success == 0 else l2_total / total_success))
            sys.stdout.flush()

        print(' origin accuracy : ',
              100.0 * origin_correct / all_true_ids.size)
        print(' adv accuracy : ', 100.0 * adv_correct / all_true_ids.size)
def run(file_name, n_samples, p_n, q_n, activation = 'relu', cifar=False, tinyimagenet=False):
    np.random.seed(1215)
    tf.set_random_seed(1215)
    random.seed(1215)
    keras_model = load_model(file_name, custom_objects={'fn':fn, 'tf':tf})
    if tinyimagenet:
        model = CNNModel(keras_model, inp_shape = (64,64,3))
    elif cifar:
        model = CNNModel(keras_model, inp_shape = (32,32,3))
    else:
        model = CNNModel(keras_model)

    #Set correct linear_bounds function
    global linear_bounds
    if activation == 'relu':
        linear_bounds = relu_linear_bounds
    elif activation == 'ada':
        linear_bounds = ada_linear_bounds
    elif activation == 'sigmoid':
        linear_bounds = sigmoid_linear_bounds
    elif activation == 'tanh':
        linear_bounds = tanh_linear_bounds
    elif activation == 'arctan':
        linear_bounds = atan_linear_bounds
    upper_bound_conv.recompile()
    lower_bound_conv.recompile()
    compute_bounds.recompile()

    if cifar:
        inputs, targets, true_labels, true_ids, img_info = generate_data(CIFAR(), samples=n_samples, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.model.predict, start=0)
    elif tinyimagenet:
        inputs, targets, true_labels, true_ids, img_info = generate_data(tinyImagenet(), samples=n_samples, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.model.predict, start=0)
    else:
        inputs, targets, true_labels, true_ids, img_info = generate_data(MNIST(), samples=n_samples, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.model.predict, start=0)
    #0b01111 <- all
    #0b0010 <- random
    #0b0001 <- top2
    #0b0100 <- least

    steps = 15
    eps_0 = 0.05
    summation = 0
    warmup(model, inputs[0].astype(np.float32), eps_0, p_n, find_output_bounds)
        
    start_time = time.time()
    for i in range(len(inputs)):
        print('--- CNN-Cert: Computing eps for input image ' + str(i)+ '---')
        predict_label = np.argmax(true_labels[i])
        target_label = np.argmax(targets[i])
        weights = model.weights[:-1]
        biases = model.biases[:-1]
        shapes = model.shapes[:-1]
        W, b, s = model.weights[-1], model.biases[-1], model.shapes[-1]
        last_weight = (W[predict_label,:,:,:]-W[target_label,:,:,:]).reshape([1]+list(W.shape[1:]))
        weights.append(last_weight)
        biases.append(np.asarray([b[predict_label]-b[target_label]]))
        shapes.append((1,1,1))

        #Perform binary search
        log_eps = np.log(eps_0)
        log_eps_min = -np.inf
        log_eps_max = np.inf
        for j in range(steps):
            LB, UB = find_output_bounds(weights, biases, shapes, model.pads, model.strides, inputs[i].astype(np.float32), np.exp(log_eps), p_n)
            print("Step {}, eps = {:.5f}, {:.6s} <= f_c - f_t <= {:.6s}".format(j,np.exp(log_eps),str(np.squeeze(LB)),str(np.squeeze(UB))))
            if LB > 0: #Increase eps
                log_eps_min = log_eps
                log_eps = np.minimum(log_eps+1, (log_eps_max+log_eps_min)/2)
            else: #Decrease eps
                log_eps_max = log_eps
                log_eps = np.maximum(log_eps-1, (log_eps_max+log_eps_min)/2)
        
        if p_n == 105:
            str_p_n = 'i'
        else:
            str_p_n = str(p_n)
        
        print("[L1] method = CNN-Cert-{}, model = {}, image no = {}, true_id = {}, target_label = {}, true_label = {}, norm = {}, robustness = {:.5f}".format(activation,file_name, i, true_ids[i],target_label,predict_label,str_p_n,np.exp(log_eps_min)))
        summation += np.exp(log_eps_min)
    K.clear_session()
    
    eps_avg = summation/len(inputs)
    total_time = (time.time()-start_time)/len(inputs)
    print("[L0] method = CNN-Cert-{}, model = {}, total images = {}, norm = {}, avg robustness = {:.5f}, avg runtime = {:.2f}".format(activation,file_name,len(inputs),str_p_n,eps_avg,total_time))
    return eps_avg, total_time
Example #15
0
def main(args):
    with tf.Session() as sess:

        random.seed(SEED)
        np.random.seed(SEED)
        tf.set_random_seed(SEED)

        class_id = args['class_id']  ### input image (natural example)
        target_id = args[
            'target_id']  ### target images id (adv example) if target attack
        arg_max_iter = args['maxiter']  ### max number of iterations
        arg_init_const = args[
            'init_const']  ### regularization prior to attack loss
        arg_kappa = args['kappa']  ### attack confidence level
        arg_q = args['q']  ### number of random direction vectors
        arg_mode = args['mode']  ### algorithm name
        arg_save_iteration = args['save_iteration']
        arg_Dataset = args["dataset"]
        arg_targeted_attack = args["targeted_attack"]
        arg_bsz = args["mini_batch_sz"]
        idx_lr = args["lr_idx"]

        ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5]
        if (arg_Dataset == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess, True)
        elif (arg_Dataset == 'cifar10'):
            data, model = CIFAR(), CIFARModel("models/cifar", sess, True)
        elif (arg_Dataset == 'imagenet'):
            data, model = ImageNet_Universal(SEED), InceptionModel(sess, True)
            #model = InceptionModel(sess, True)
        else:
            print('Please specify a valid dataset')

        #orig_img = np.load('ori_img_backup.npy')
        orig_img = data.test_data[np.where(
            np.argmax(data.test_labels, 1) == class_id)]
        #np.save('ori_img_backup',orig_img)

        #true_label = data.test_labels[np.where(np.argmax(data.test_labels,1) == class_id)]
        _, orig_class = util.model_prediction_u(
            model, orig_img[:30]
        )  # take 30 or less images to make sure arg_bsz number of them are valid

        # filter out the images which misclassified already
        orig_img = orig_img[np.where(orig_class == class_id)]
        if orig_img.shape[0] < arg_bsz:
            assert 'no enough valid inputs'

        orig_img = orig_img[:arg_bsz]

        np.save('original_imgsID' + str(class_id), orig_img)
        #true_label = np.zeros((arg_bsz, 1001))
        #true_label[np.arange(arg_bsz), class_id] = 1
        true_label = class_id

        if arg_targeted_attack:  ### target attack
            #target_label = np.zeros((arg_bsz, 1001))
            #target_label[np.arange(arg_bsz), target_id] = 1
            target_label = target_id
        else:
            target_label = true_label

        #orig_img, target = util.generate_data(data, class_id, target_label)
        # shape of orig_img is (1,28,28,1) in [-0.5, 0.5]

        ##  parameter
        if orig_img.ndim == 3 or orig_img.shape[0] == 1:
            d = orig_img.size  # feature dim
        else:
            d = orig_img[0].size
        print("dimension = ", d)

        # mu=1/d**2  # smoothing parameter
        q = arg_q + 0
        I = arg_max_iter + 0
        kappa = arg_kappa + 0
        const = arg_init_const + 0

        ## flatten image to vec
        orig_img_vec = np.resize(orig_img, (arg_bsz, d))

        ## w adv image initialization
        if args["constraint"] == 'uncons':
            # * 0.999999 to avoid +-0.5 return +-infinity
            w_ori_img_vec = np.arctanh(
                2 * (orig_img_vec) * 0.999999
            )  # in real value, note that orig_img_vec in [-0.5, 0.5]
            w_img_vec = w_ori_img_vec.copy()
        else:
            w_ori_img_vec = orig_img_vec.copy()
            w_img_vec = w_ori_img_vec.copy()

        # ## test ##
        # for test_value in w_ori_img_vec[0, :]:
        #     if np.isnan(test_value) or np.isinf(test_value):
        #         print(test_value)

        delta_adv = np.zeros((1, d))  ### initialized adv. perturbation

        # initialize the best solution & best loss
        best_adv_img = []  # successful adv image in [-0.5, 0.5]
        best_delta = []  # best perturbation
        best_distortion = (0.5 * d)**2  # threshold for best perturbation
        total_loss = np.zeros(I)  ## I: max iters
        l2s_loss_all = np.zeros(I)
        attack_flag = False
        first_flag = True  ## record first successful attack

        # parameter setting for ZO gradient estimation
        mu = args["mu"]  ### smoothing parameter

        ## learning rate
        base_lr = args["lr"]

        if arg_mode == "ZOAdaMM":
            ## parameter initialization for AdaMM
            v_init = 1e-7  #0.00001
            v_hat = v_init * np.ones((1, d))
            v = v_init * np.ones((1, d))

            m = np.zeros((1, d))
            # momentum parameter for first and second order moment
            beta_1 = 0.9
            beta_2 = 0.3  # only used by AMSGrad
            print(beta_1, beta_2)

        #for i in tqdm(range(I)):
        for i in range(I):

            if args["decay_lr"]:
                base_lr = args["lr"] / np.sqrt(i + 1)

            ## Total loss evaluation
            if args["constraint"] == 'uncons':
                total_loss[i], l2s_loss_all[i] = function_evaluation_uncons(
                    w_img_vec, kappa, target_label, const, model, orig_img,
                    arg_targeted_attack)

            else:
                total_loss[i], l2s_loss_all[i] = function_evaluation_cons(
                    w_img_vec, kappa, target_label, const, model, orig_img,
                    arg_targeted_attack)

            ## gradient estimation w.r.t. w_img_vec
            if arg_mode == "ZOSCD":
                grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa,
                                                 target_label, const, model,
                                                 orig_img, arg_targeted_attack,
                                                 args["constraint"])
            elif arg_mode == "ZONES":
                grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa,
                                                   target_label, const, model,
                                                   orig_img,
                                                   arg_targeted_attack,
                                                   args["constraint"])
            else:
                grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa,
                                                  target_label, const, model,
                                                  orig_img,
                                                  arg_targeted_attack,
                                                  args["constraint"])

            # if np.remainder(i,50)==0:
            # print("total loss:",total_loss[i])
            # print(np.linalg.norm(grad_est, np.inf))

            ## ZO-Attack, unconstrained optimization formulation
            if arg_mode == "ZOSGD":
                delta_adv = delta_adv - base_lr * grad_est
            if arg_mode == "ZOsignSGD":
                delta_adv = delta_adv - base_lr * np.sign(grad_est)
            if arg_mode == "ZOSCD":
                delta_adv = delta_adv - base_lr * grad_est
            if arg_mode == "ZOAdaMM":
                m = beta_1 * m + (1 - beta_1) * grad_est
                v = beta_2 * v + (1 - beta_2) * np.square(grad_est)  ### vt
                #print(np.mean(np.abs(m)),np.mean(np.sqrt(v)))
                v_hat = np.maximum(v_hat, v)
                delta_adv = delta_adv - base_lr * m / np.sqrt(v)
                if args["constraint"] == 'cons':
                    tmp = delta_adv.copy()
                    #X_temp = orig_img_vec.reshape((-1,1))
                    #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10))
                    V_temp = np.sqrt(v_hat.reshape(1, -1))
                    delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5,
                                               0.5)
                    #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5)
                # v_init = 1e-2 #0.00001
                # v = v_init * np.ones((1, d))
                # m = np.zeros((1, d))
                # # momentum parameter for first and second order moment
                # beta_1 = 0.9
                # beta_2 = 0.99  # only used by AMSGrad
                # m = beta_1 * m + (1-beta_1) * grad_est
                # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v)
                # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10)
                # if args["constraint"] == 'cons':
                #     V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10))
                #     X_temp = orig_img_vec.reshape((-1,1))
                #     delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5)
            if arg_mode == "ZOSMD":
                delta_adv = delta_adv - 0.5 * base_lr * grad_est
                # delta_adv = delta_adv - base_lr* grad_est
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X_temp = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)
            if arg_mode == "ZOPSGD":
                delta_adv = delta_adv - base_lr * grad_est
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X_temp = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)
            if arg_mode == "ZONES":
                delta_adv = delta_adv - base_lr * np.sign(grad_est)
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)

            # if arg_mode == "ZO-AdaFom":
            #     m = beta_1 * m + (1-beta_1) * grad_est
            #     v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1)
            #     w_img_vec = w_img_vec - base_lr * m/np.sqrt(v)
            ##

            ### adv. example update
            w_img_vec = w_ori_img_vec + delta_adv

            ## covert back to adv_img in [-0.5 , 0.5]
            if args["constraint"] == 'uncons':
                adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999  #
            else:
                adv_img_vec = w_img_vec.copy()

            adv_img = np.resize(adv_img_vec, orig_img.shape)

            ## update the best solution in the iterations
            attack_prob, _, _ = util.model_prediction(model, adv_img)
            target_prob = attack_prob[:, target_label]
            attack_prob_tmp = attack_prob.copy()
            attack_prob_tmp[:, target_label] = 0
            other_prob = np.amax(attack_prob_tmp, 1)

            if i % 1000 == 0 and i != 0:
                if arg_mode == "ZOAdaMM": print(beta_1, beta_2)
                print("save delta_adv")
                np.save(
                    'retimgs/' + str(i) + 'itrs' +
                    str(np.argmax(attack_prob, 1)) + arg_mode +
                    str(args["lr"]), delta_adv)

            if args["print_iteration"]:
                if np.remainder(i + 1, 20) == 0:
                    if (true_label != np.argmax(attack_prob, 1)).all():
                        print(
                            "Iter %d (Succ): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s"
                            % (i + 1, class_id, args["lr"],
                               int(args["decay_lr"]), arg_mode,
                               args["constraint"], total_loss[i],
                               l2s_loss_all[i], true_label,
                               np.argmax(attack_prob, 1)))
                    else:
                        sr = np.sum(
                            true_label != np.argmax(attack_prob, 1)) / arg_bsz
                        print(
                            "Iter %d (Fail): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s, succ rate = %.2f"
                            % (i + 1, class_id, args["lr"],
                               int(args["decay_lr"]), arg_mode,
                               args["constraint"], total_loss[i],
                               l2s_loss_all[i], true_label,
                               np.argmax(attack_prob, 1), sr))

            if arg_save_iteration:
                os.system("mkdir Examples")
                if (np.logical_or(
                        true_label != np.argmax(attack_prob, 1),
                        np.remainder(i + 1, 10) == 0)):  ## every 10 iterations
                    suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format(
                        class_id, arg_mode, true_label,
                        np.argmax(attack_prob, 1), i + 1)
                    # util.save_img(adv_img, "Examples/{}.png".format(suffix))

            if arg_targeted_attack:
                if ((np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10))
                        >= kappa).all():  # check attack confidence
                    if (distortion(adv_img, orig_img) <
                            best_distortion):  # check distortion
                        # print('best distortion obtained at',i,'-th iteration')
                        best_adv_img = adv_img
                        best_distortion = distortion(adv_img, orig_img)
                        #best_delta = adv_img - orig_img
                        best_iteration = i + 1
                        adv_class = np.argmax(attack_prob, 1)
                        attack_flag = True
                        ## Record first attack
                        if (first_flag):
                            first_flag = False  ### once gets into this, it will no longer record the next sucessful attack
                            first_adv_img = adv_img
                            first_distortion = distortion(adv_img, orig_img)
                            #first_delta = adv_img - orig_img
                            first_class = adv_class
                            first_iteration = i + 1
            else:
                if ((np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10))
                        >= kappa).all():  # check attack confidence
                    if (distortion(adv_img, orig_img) <
                            best_distortion):  # check distortion
                        # print('best distortion obtained at',i,'-th iteration')
                        best_adv_img = adv_img
                        best_distortion = distortion(adv_img, orig_img)
                        #best_delta = adv_img - orig_img
                        best_iteration = i + 1
                        adv_class = np.argmax(attack_prob, 1)
                        attack_flag = True
                        ## Record first attack
                        if (first_flag):
                            first_flag = False
                            first_adv_img = adv_img
                            first_distortion = distortion(adv_img, orig_img)
                            #first_delta = adv_img - orig_img
                            first_class = adv_class
                            first_iteration = i + 1

        if (attack_flag):
            # os.system("mkdir Results_SL")
            # ## best attack (final attack)
            # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, orig_class) ## orig_class, predicted label
            # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, adv_class)
            # suffix3 = "id_{}_Mode_{}".format(class_id, arg_mode)
            # ### save original image
            # util.save_img(orig_img, "Results_SL/id_{}.png".format(class_id))
            # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix))
            # ### adv. image
            # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2))
            # ### adv. perturbation
            # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3))
            #
            #
            # ## first attack
            # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, first_class)
            # ## first adv. imag
            # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4))
            # ### first adv. perturbation
            # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3))

            ## save data
            suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format(
                class_id, arg_mode, args["constraint"], str(args["lr"]),
                int(args["decay_lr"]), args["exp_code"], args["init_const"])
            np.savez("{}".format(suffix0),
                     id=class_id,
                     mode=arg_mode,
                     loss=total_loss,
                     perturbation=l2s_loss_all,
                     best_distortion=best_distortion,
                     first_distortion=first_distortion,
                     first_iteration=first_iteration,
                     best_iteation=best_iteration,
                     learn_rate=args["lr"],
                     decay_lr=args["decay_lr"],
                     attack_flag=attack_flag)
            ## print
            print("It takes {} iteations to find the first attack".format(
                first_iteration))
            # print(total_loss)
        else:
            ## save data
            suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format(
                class_id, arg_mode, args["constraint"], str(args["lr"]),
                int(args["decay_lr"]), args["exp_code"], args["init_const"])
            np.savez("{}".format(suffix0),
                     id=class_id,
                     mode=arg_mode,
                     loss=total_loss,
                     perturbation=l2s_loss_all,
                     best_distortion=best_distortion,
                     learn_rate=args["lr"],
                     decay_lr=args["decay_lr"],
                     attack_flag=attack_flag)
            print("Attack Fails")

        sys.stdout.flush()
Example #16
0
##
## This program is licenced under the BSD 2-Clause licence,
## contained in the LICENCE file in this directory.

from setup_cifar import CIFAR, CIFARModel
from setup_mnist import MNIST, MNISTModel
from setup_inception import ImageNet, InceptionModel

import tensorflow as tf
import numpy as np

BATCH_SIZE = 1

with tf.Session() as sess:
    data, model = MNIST(), MNISTModel("models/mnist", sess)
    data, model = CIFAR(), CIFARModel("models/cifar", sess)
    data, model = ImageNet(), InceptionModel(sess)

    x = tf.placeholder(
        tf.float32,
        (None, model.image_size, model.image_size, model.num_channels))
    y = model.predict(x)

    r = []
    for i in range(0, len(data.test_data), BATCH_SIZE):
        pred = sess.run(y, {x: data.test_data[i:i + BATCH_SIZE]})
        #print(pred)
        #print('real',data.test_labels[i],'pred',np.argmax(pred))
        r.append(
            np.argmax(pred, 1) == np.argmax(data.test_labels[i:i +
                                                             BATCH_SIZE], 1))
Example #17
0
        base_lr = 0.1

    datagen.fit(data.train_data)

    model.fit_generator(datagen.flow(data.train_data,
                                     data.train_labels,
                                     batch_size=batch_size),
                        steps_per_epoch=data.train_data.shape[0] // batch_size,
                        epochs=num_epochs,
                        verbose=1,
                        validation_data=(data.validation_data,
                                         data.validation_labels),
                        callbacks=[schedule])

    print(
        'Test accuracy:',
        np.mean(
            np.argmax(model.predict(data.test_data), axis=1) == np.argmax(
                data.test_labels, axis=1)))

    if file_name != None:
        model.save_weights(file_name)

    return model


if dataset == "MNIST":
    train(MNIST(), MNISTModel, "MNIST", num_epochs=30)
elif dataset == "CIFAR":
    train(CIFAR(), CIFARModel, "CIFAR", num_epochs=300)
def run(args, restrict=True):
    if restrict:
        # Restrict the visible GPUs to the one for this subprocess
        id = np.int(multiprocessing.current_process().name.split("-")[1])
        os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1)

    # Load Parameters
    dataset = args[0]
    epsilon = float(args[1])
    mode = args[2]
    K = int(args[3])
    bias = float(args[4])

    fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str(
        K) + "_" + str(bias)

    # Configure Keras/Tensorflow
    Keras.clear_session()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))

    sess = Keras.get_session()
    Keras.set_learning_phase(False)

    # Fix Random Seeds
    np.random.seed(1)
    tf.set_random_seed(
        1
    )  #Having this before keras.clear_session() causes it it hang for some reason

    # Load Model/Data and setup SPSA placeholders
    N = 1000
    if dataset == "MNIST":
        # Base Model
        base_model = MNISTModel("../1-Models/MNIST")
        data = MNIST()
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
        # SPSA
        shape_spsa = (1, 28, 28, 1)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    elif dataset == "CIFAR":
        # Base Model
        base_model = CIFARModel("../1-Models/CIFAR")
        data = CIFAR()
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        # SPSA
        shape_spsa = (1, 32, 32, 3)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    y_spsa = tf.placeholder(tf.int32)

    # Load the hidden representations of the real and adversarial examples from the training set
    x_train_real = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy"))
    x_train_adv = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_adv_" + mode +
                ".npy"))

    n_train = x_train_real.shape[0]
    n_train_adv = x_train_adv.shape[0]
    x_train = np.float32(np.vstack((x_train_real, x_train_adv)))
    #print("Bounds ", np.max(np.abs(x_train)))
    y_train = np.float32(
        np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv))))

    # Create the defended model
    model_defended = DefendedModel(base_model, x_train, y_train, K, bias=bias)
    defended_logits = model_defended.get_logits(x)

    # Get the predictions on the original images
    labels = np.argmax(data.test_labels[:N], axis=1)
    logits_real = sess.run(defended_logits, {x: data.test_data[:N]})
    fp = (np.argmax(logits_real,
                    axis=1) == 10)  #False positives of the defense
    pred_undefended = np.argmax(np.delete(logits_real, -1, axis=1),
                                axis=1)  #Original model prediction

    # Configure the attack
    attack = SPSA(model_defended, back="tf", sess=sess)
    with tf.name_scope("Attack") as scope:
        gen = attack.generate(x_spsa,
                              y_target=y_spsa,
                              epsilon=epsilon,
                              is_targeted=True,
                              num_steps=100,
                              batch_size=2048,
                              early_stop_loss_threshold=-5.0)

    # Run the attack
    pred_adv = -1.0 * np.ones((N, 10))
    for i in range(N):
        if i % 10 == 0:
            print(fname, " ", i)
            out = {}
            out["FP"] = fp
            out["Labels"] = labels
            out["UndefendedPrediction"] = pred_undefended
            out["AdversarialPredictions"] = pred_adv
            file = open(fname, "wb")
            pickle.dump(out, file)
            file.close()

        x_real = data.test_data[i].reshape(shape_spsa)

        # Try a targeted attack for each class other than the original network prediction and the adversarial class
        for y in range(10):
            if y != pred_undefended[i]:
                x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: y})
                pred_adv[i,
                         y] = np.argmax(sess.run(defended_logits, {x: x_adv}))

    out = {}
    out["FP"] = fp
    out["Labels"] = labels
    out["UndefendedPrediction"] = pred_undefended
    out["AdversarialPredictions"] = pred_adv
    file = open(fname, "wb")
    pickle.dump(out, file)
    file.close()

    analysis(fname)
Example #19
0
    # run training with given dataset, and print progress
    model.fit(data.train_data,
              data.train_labels,
              batch_size=batch_size,
              validation_data=(data.validation_data, data.validation_labels),
              epochs=num_epochs,
              shuffle=True)

    # save model to a file
    if file_name != None:
        model.save(file_name)

    return model


if not os.path.isdir('models'):
    os.makedirs('models')

if __name__ == "__main__":
    import argparse
    ap = argparse.ArgumentParser()
    ap.add_argument('-d', '--dataset', type=str, default="mnist")
    args = vars(ap.parse_args())
    if "mnist" in args["dataset"]:
        MNIST()
    if "cifar" in args["dataset"]:
        CIFAR()
    #train(MNIST(), file_name="models/mnist_2layer", params=[1024], num_epochs=1, lr=0.1, decay=1e-3)
    #train(CIFAR(), file_name="models/cifar_2layer", params=[1024], num_epochs=1, lr=0.2, decay=1e-3)
Example #20
0
    # train the student model at temperature t
    student = train(data,
                    file_name,
                    params,
                    num_epochs,
                    batch_size,
                    train_temp,
                    init=file_name + "_init")

    # and finally we predict at temperature 1
    predicted = student.predict(data.train_data)

    print(predicted)


if not os.path.isdir('models'):
    os.makedirs('models')

train(CIFAR(), "models/cifar", [64, 64, 128, 128, 256, 256], num_epochs=2)
train(MNIST(), "models/mnist", [32, 32, 64, 64, 200, 200], num_epochs=2)

train_distillation(MNIST(),
                   "models/mnist-distilled-100", [32, 32, 64, 64, 200, 200],
                   num_epochs=2,
                   train_temp=100)
train_distillation(CIFAR(),
                   "models/cifar-distilled-100", [64, 64, 128, 128, 256, 256],
                   num_epochs=2,
                   train_temp=100)
Example #21
0
def main(args):
    with tf.Session() as sess:

        random.seed(SEED)
        np.random.seed(SEED)
        tf.set_random_seed(SEED)

        image_id_set = np.random.choice(range(1000),
                                        args["image_number"] * 3,
                                        replace=False)
        #image_id_set = np.random.randint(1, 1000, args["image_number"] )
        arg_max_iter = args['maxiter']  ### max number of iterations
        arg_init_const = args[
            'init_const']  ### regularization prior to attack loss
        arg_kappa = args['kappa']  ### attack confidence level
        arg_q = args['q']  ### number of random direction vectors
        arg_mode = args['mode']  ### algorithm name
        arg_save_iteration = args['save_iteration']
        arg_Dataset = args["dataset"]
        arg_targeted_attack = args["targeted_attack"]
        arg_bsz = args["mini_batch_sz"]
        idx_lr = args["lr_idx"]

        ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5]
        if (arg_Dataset == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess, True)
        elif (arg_Dataset == 'cifar10'):
            data, model = CIFAR(), CIFARModel("models/cifar", sess, True)
        elif (arg_Dataset == 'imagenet'):
            data, model = ImageNet(SEED), InceptionModel(sess, True)
        else:
            print('Please specify a valid dataset')

        succ_count, ii, iii = 0, 0, 0
        final_distortion_count,first_iteration_count, first_distortion_count = [], [], []
        while iii < args["image_number"]:
            ii = ii + 1
            image_id = image_id_set[ii]

            # if image_id!= 836: continue # for test only

            orig_prob, orig_class, orig_prob_str = util.model_prediction(
                model, np.expand_dims(data.test_data[image_id],
                                      axis=0))  ## orig_class: predicted label;

            if arg_targeted_attack:  ### target attack
                target_label = np.remainder(orig_class + 1, 10)
            else:
                target_label = orig_class

            orig_img, target = util.generate_data(data, image_id, target_label)
            # shape of orig_img is (1,28,28,1) in [-0.5, 0.5]

            true_label_list = np.argmax(data.test_labels, axis=1)
            true_label = true_label_list[image_id]

            print("Image ID:{}, infer label:{}, true label:{}".format(
                image_id, orig_class, true_label))
            if true_label != orig_class:
                print(
                    "True Label is different from the original prediction, pass!"
                )
                continue
            else:
                iii = iii + 1

            print('\n', iii, '/', args["image_number"])

            ##  parameter
            d = orig_img.size  # feature dim
            print("dimension = ", d)

            # mu=1/d**2  # smoothing parameter
            q = arg_q + 0
            I = arg_max_iter + 0
            kappa = arg_kappa + 0
            const = arg_init_const + 0

            ## flatten image to vec
            orig_img_vec = np.resize(orig_img, (1, d))
            delta_adv = np.zeros((1, d))  ### initialized adv. perturbation
            #delta_adv = np.random.uniform(-16/255,16/255,(1,d))

            ## w adv image initialization
            if args["constraint"] == 'uncons':
                # * 0.999999 to avoid +-0.5 return +-infinity
                w_ori_img_vec = np.arctanh(
                    2 * (orig_img_vec) * 0.999999
                )  # in real value, note that orig_img_vec in [-0.5, 0.5]
                w_img_vec = np.arctanh(
                    2 * (np.clip(orig_img_vec + delta_adv, -0.5, 0.5)) *
                    0.999999)
            else:
                w_ori_img_vec = orig_img_vec.copy()
                w_img_vec = np.clip(w_ori_img_vec + delta_adv, -0.5, 0.5)

            # ## test ##
            # for test_value in w_ori_img_vec[0, :]:
            #     if np.isnan(test_value) or np.isinf(test_value):
            #         print(test_value)

            # initialize the best solution & best loss
            best_adv_img = []  # successful adv image in [-0.5, 0.5]
            best_delta = []  # best perturbation
            best_distortion = (0.5 * d)**2  # threshold for best perturbation
            total_loss = np.zeros(I)  ## I: max iters
            l2s_loss_all = np.zeros(I)
            attack_flag = False
            first_flag = True  ## record first successful attack

            # parameter setting for ZO gradient estimation
            mu = args["mu"]  ### smoothing parameter

            ## learning rate
            base_lr = args["lr"]

            if arg_mode == "ZOAdaMM":
                ## parameter initialization for AdaMM
                v_init = 1e-7  #0.00001
                v_hat = v_init * np.ones((1, d))
                v = v_init * np.ones((1, d))

                m = np.zeros((1, d))
                # momentum parameter for first and second order moment
                beta_1 = 0.9
                beta_2 = 0.9  # only used by AMSGrad
                print(beta_1, beta_2)

            #for i in tqdm(range(I)):
            for i in range(I):

                if args["decay_lr"]:
                    base_lr = args["lr"] / np.sqrt(i + 1)

                ## Total loss evaluation
                if args["constraint"] == 'uncons':
                    total_loss[i], l2s_loss_all[
                        i] = function_evaluation_uncons(
                            w_img_vec, kappa, target_label, const, model,
                            orig_img, arg_targeted_attack)

                else:
                    total_loss[i], l2s_loss_all[i] = function_evaluation_cons(
                        w_img_vec, kappa, target_label, const, model, orig_img,
                        arg_targeted_attack)

                ## gradient estimation w.r.t. w_img_vec
                if arg_mode == "ZOSCD":
                    grad_est = grad_coord_estimation(mu, q, w_img_vec, d,
                                                     kappa, target_label,
                                                     const, model, orig_img,
                                                     arg_targeted_attack,
                                                     args["constraint"])
                elif arg_mode == "ZONES":
                    grad_est = gradient_estimation_NES(mu, q, w_img_vec, d,
                                                       kappa, target_label,
                                                       const, model, orig_img,
                                                       arg_targeted_attack,
                                                       args["constraint"])
                else:
                    grad_est = gradient_estimation_v2(mu, q, w_img_vec, d,
                                                      kappa, target_label,
                                                      const, model, orig_img,
                                                      arg_targeted_attack,
                                                      args["constraint"])

                # if np.remainder(i,50)==0:
                # print("total loss:",total_loss[i])
                # print(np.linalg.norm(grad_est, np.inf))

                ## ZO-Attack, unconstrained optimization formulation
                if arg_mode == "ZOSGD":
                    delta_adv = delta_adv - base_lr * grad_est
                if arg_mode == "ZOsignSGD":
                    delta_adv = delta_adv - base_lr * np.sign(grad_est)
                if arg_mode == "ZOSCD":
                    delta_adv = delta_adv - base_lr * grad_est
                if arg_mode == "ZOAdaMM":
                    m = beta_1 * m + (1 - beta_1) * grad_est
                    v = beta_2 * v + (1 - beta_2) * np.square(grad_est)  ### vt
                    v_hat = np.maximum(v_hat, v)
                    #print(np.mean(v_hat))
                    delta_adv = delta_adv - base_lr * m / np.sqrt(v_hat)
                    if args["constraint"] == 'cons':
                        tmp = delta_adv.copy()
                        #X_temp = orig_img_vec.reshape((-1,1))
                        #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10))
                        V_temp = np.sqrt(v_hat.reshape(1, -1))
                        delta_adv = projection_box(tmp, orig_img_vec, V_temp,
                                                   -0.5, 0.5)
                        #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5)
                    # v_init = 1e-2 #0.00001
                    # v = v_init * np.ones((1, d))
                    # m = np.zeros((1, d))
                    # # momentum parameter for first and second order moment
                    # beta_1 = 0.9
                    # beta_2 = 0.99  # only used by AMSGrad
                    # m = beta_1 * m + (1-beta_1) * grad_est
                    # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v)
                    # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10)
                    # if args["constraint"] == 'cons':
                    #     V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10))
                    #     X_temp = orig_img_vec.reshape((-1,1))
                    #     delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5)
                if arg_mode == "ZOSMD":
                    delta_adv = delta_adv - 0.5 * base_lr * grad_est
                    # delta_adv = delta_adv - base_lr* grad_est
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X_temp = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)
                if arg_mode == "ZOPSGD":
                    delta_adv = delta_adv - base_lr * grad_est
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X_temp = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)
                if arg_mode == "ZONES":
                    delta_adv = delta_adv - base_lr * np.sign(grad_est)
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)

                # if arg_mode == "ZO-AdaFom":
                #     m = beta_1 * m + (1-beta_1) * grad_est
                #     v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1)
                #     w_img_vec = w_img_vec - base_lr * m/np.sqrt(v)
                ##

                ### adv. example update
                w_img_vec = w_ori_img_vec + delta_adv

                ## covert back to adv_img in [-0.5 , 0.5]
                if args["constraint"] == 'uncons':
                    adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999  #
                else:
                    adv_img_vec = w_img_vec.copy()

                adv_img = np.resize(adv_img_vec, orig_img.shape)

                ## update the best solution in the iterations
                attack_prob, _, _ = util.model_prediction(model, adv_img)
                target_prob = attack_prob[0, target_label]
                attack_prob_tmp = attack_prob.copy()
                attack_prob_tmp[0, target_label] = 0
                other_prob = np.amax(attack_prob_tmp)

                if args["print_iteration"]:
                    if np.remainder(i + 1, 1) == 0:
                        if true_label != np.argmax(attack_prob):
                            print(
                                "Iter %d (Succ): ID = %d, lr = %3.5f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d"
                                % (i + 1, image_id, args["lr"],
                                   int(args["decay_lr"]), arg_mode,
                                   args["constraint"], total_loss[i],
                                   l2s_loss_all[i], true_label,
                                   np.argmax(attack_prob)))
                        else:
                            print(
                                "Iter %d (Fail): ID = %d, lr = %3.6f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d"
                                % (i + 1, image_id, args["lr"],
                                   int(args["decay_lr"]), arg_mode,
                                   args["constraint"], total_loss[i],
                                   l2s_loss_all[i], true_label,
                                   np.argmax(attack_prob)))

                if arg_save_iteration:
                    os.system("mkdir Examples")
                    if (np.logical_or(
                            true_label != np.argmax(attack_prob),
                            np.remainder(i + 1,
                                         10) == 0)):  ## every 10 iterations
                        suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format(
                            image_id, arg_mode, true_label,
                            np.argmax(attack_prob), i + 1)
                        # util.save_img(adv_img, "Examples/{}.png".format(suffix))

                if arg_targeted_attack:
                    if (np.log(target_prob + 1e-10) -
                            np.log(other_prob + 1e-10) >=
                            kappa):  # check attack confidence
                        if (distortion(adv_img, orig_img) <
                                best_distortion):  # check distortion
                            # print('best distortion obtained at',i,'-th iteration')
                            best_adv_img = adv_img
                            best_distortion = distortion(adv_img, orig_img)
                            best_delta = adv_img - orig_img
                            best_iteration = i + 1
                            adv_class = np.argmax(attack_prob)
                            attack_flag = True
                            ## Record first attack
                            if (first_flag):
                                first_flag = False  ### once gets into this, it will no longer record the next sucessful attack
                                first_adv_img = adv_img
                                first_distortion = distortion(
                                    adv_img, orig_img)
                                first_delta = adv_img - orig_img
                                first_class = adv_class
                                first_iteration = i + 1
                else:
                    if (np.log(other_prob + 1e-10) -
                            np.log(target_prob + 1e-10) >=
                            kappa):  # check attack confidence
                        if (distortion(adv_img, orig_img) <
                                best_distortion):  # check distortion
                            # print('best distortion obtained at',i,'-th iteration')
                            best_adv_img = adv_img
                            best_distortion = distortion(adv_img, orig_img)
                            best_delta = adv_img - orig_img
                            best_iteration = i + 1
                            adv_class = np.argmax(attack_prob)
                            attack_flag = True
                            ## Record first attack
                            if (first_flag):
                                first_flag = False
                                first_adv_img = adv_img
                                first_distortion = distortion(
                                    adv_img, orig_img)
                                first_delta = adv_img - orig_img
                                first_class = adv_class
                                first_iteration = i + 1

            if (attack_flag):
                # os.system("mkdir Results_SL")
                # ## best attack (final attack)
                # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, orig_class) ## orig_class, predicted label
                # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, adv_class)
                # suffix3 = "id_{}_Mode_{}".format(image_id, arg_mode)
                # ### save original image
                # util.save_img(orig_img, "Results_SL/id_{}.png".format(image_id))
                # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix))
                # ### adv. image
                # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2))
                # ### adv. perturbation
                # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3))
                #
                #
                # ## first attack
                # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, first_class)
                # ## first adv. imag
                # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4))
                # ### first adv. perturbation
                # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3))

                ## save data
                succ_count = succ_count + 1
                final_distortion_count.append(l2s_loss_all[-1])
                first_distortion_count.append(first_distortion)
                first_iteration_count.append(first_iteration)
                suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format(
                    image_id, arg_mode, args["constraint"], args["lr"],
                    int(args["decay_lr"]), args["exp_code"])
                np.savez("{}".format(suffix0),
                         id=image_id,
                         mode=arg_mode,
                         loss=total_loss,
                         perturbation=l2s_loss_all,
                         best_distortion=best_distortion,
                         first_distortion=first_distortion,
                         first_iteration=first_iteration,
                         best_iteation=best_iteration,
                         learn_rate=args["lr"],
                         decay_lr=args["decay_lr"],
                         attack_flag=attack_flag)
                ## print
                print("It takes {} iteations to find the first attack".format(
                    first_iteration))
                # print(total_loss)
            else:
                ## save data
                suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format(
                    image_id, arg_mode, args["constraint"], args["lr"],
                    int(args["decay_lr"]), args["exp_code"])
                np.savez("{}".format(suffix0),
                         id=image_id,
                         mode=arg_mode,
                         loss=total_loss,
                         perturbation=l2s_loss_all,
                         best_distortion=best_distortion,
                         learn_rate=args["lr"],
                         decay_lr=args["decay_lr"],
                         attack_flag=attack_flag)
                print("Attack Fails")

            sys.stdout.flush()
    print('succ rate:', succ_count / args["image_number"])
    print('average first success l2', np.mean(first_distortion_count))
    print('average first itrs', np.mean(first_iteration_count))
    print('average l2:', np.mean(final_distortion_count), ' best l2:',
          np.min(final_distortion_count), ' worst l2:',
          np.max(final_distortion_count))
Example #22
0
def main(args):
    with tf.Session() as sess:
        print("Loading data and classification model: {}".format(
            args["dataset"]))
        if args['dataset'] == "mnist":
            data, model = MNIST(), MNISTModel("models/mnist",
                                              sess,
                                              use_softmax=True)
        elif args['dataset'] == "cifar10":
            data, model = CIFAR(), CIFARModel("models/cifar",
                                              sess,
                                              use_softmax=True)
        elif args['dataset'] == "imagenet":
            # data, model = ImageNet(data_path=args["imagenet_dir"], targetFile=args["attack_single_img"]), InceptionModel(sess, use_softmax=True)
            data, model = ImageNetDataNP(), InceptionModel(sess,
                                                           use_softmax=True)
        # elif args['dataset'] == "imagenet_np":

        if len(data.test_labels) < args["num_img"]:
            raise Exception("No enough data, only have {} but need {}".format(
                len(data.test_labels), args["num_img"]))

        if args["attack_single_img"]:
            # manually setup attack set
            # attacking only one image with random attack]
            orig_img = data.test_data
            orig_labels = data.test_labels
            orig_img_id = np.array([1])

            if args["attack_type"] == "targeted":
                target_labels = [
                    np.eye(model.num_labels)[args["single_img_target_label"]]
                ]
            else:
                target_labels = orig_labels
        else:
            # generate attack set
            if args["dataset"] == "imagenet" or args[
                    "dataset"] == "imagenet_np":
                shift_index = True
            else:
                shift_index = False

        if args["random_target"] and (args["dataset"] == "imagenet"
                                      or args["dataset"] == "imagenet_np"):
            # find all possible class
            all_class = np.unique(np.argmax(data.test_labels, 1))
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                random_target_class=all_class,
                shift_index=shift_index)
        elif args["random_target"]:
            # random target on all possible classes
            class_num = data.test_labels.shape[1]
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                random_target_class=list(range(class_num)),
                shift_index=shift_index)
        else:
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                shift_index=shift_index)

            # check attack data
        # for i in range(len(orig_img_id)):
        #     tar_lab = np.argmax(target_labels[i])
        #     orig_lab = np.argmax(orig_labels[i])
        #     print("{}:, target label:{}, orig_label:{}, orig_img_id:{}".format(i, tar_lab, orig_lab, orig_img_id[i]))

        # attack related settings
        if args["attack_method"] == "zoo" or args[
                "attack_method"] == "autozoom_bilin":
            if args["img_resize"] is None:
                args["img_resize"] = model.image_size
                print(
                    "Argument img_resize is not set and not using autoencoder, set to image original size:{}"
                    .format(args["img_resize"]))

        if args["attack_method"] == "zoo" or args["attack_method"] == "zoo_ae":
            if args["batch_size"] is None:
                args["batch_size"] = 128
                print(
                    "Using zoo or zoo_ae attack, and batch_size is not set.\nSet batch_size to {}."
                    .format(args["batch_size"]))

        else:
            if args["batch_size"] is not None:
                print("Argument batch_size is not used")
                args["batch_size"] = 1  # force to be 1

        if args["attack_method"] == "zoo_ae" or args[
                "attack_method"] == "autozoom_ae":
            #_, decoder = util.load_codec(args["codec_prefix"])
            if args["dataset"] == "mnist" or args["dataset"] == "cifar10":
                codec = CODEC(model.image_size,
                              model.num_channels,
                              args["compress_mode"],
                              use_tanh=False)
            else:
                codec = CODEC(128, model.num_channels, args["compress_mode"])
            print(args["codec_prefix"])
            codec.load_codec(args["codec_prefix"])
            decoder = codec.decoder
            print(decoder.input_shape)
            args["img_resize"] = decoder.input_shape[1]
            print("Using autoencoder, set the attack image size to:{}".format(
                args["img_resize"]))

        # setup attack
        if args["attack_method"] == "zoo":
            blackbox_attack = ZOO(sess, model, args)
        elif args["attack_method"] == "zoo_ae":
            blackbox_attack = ZOO_AE(sess, model, args, decoder)
        elif args["attack_method"] == "autozoom_bilin":
            blackbox_attack = AutoZOOM_BiLIN(sess, model, args)
        elif args["attack_method"] == "autozoom_ae":
            blackbox_attack = AutoZOOM_AE(sess, model, args, decoder, codec)

        save_prefix = os.path.join(args["save_path"], args["dataset"],
                                   args["attack_method"], args["attack_type"])

        os.system("mkdir -p {}".format(save_prefix))

        total_success = 0
        l2_total = 0

        for i in range(all_orig_img_id.size):
            orig_img = all_orig_img[i:i + 1]
            target = all_target_labels[i:i + 1]
            label = all_orig_labels[i:i + 1]

            target_class = np.argmax(target)
            true_class = np.argmax(label)
            test_index = all_orig_img_id[i]

            # print information
            print(
                "[Info][Start]{}: test_index:{}, true label:{}, target label:{}"
                .format(i, test_index, true_class, target_class))
            if args["attack_method"] == "zoo_ae" or args[
                    "attack_method"] == "autozoom_ae":
                #print ae info
                if args["dataset"] == "mnist" or args["dataset"] == "cifar10":
                    temp_img = all_orig_img[i:i + 1]
                else:
                    temp_img = all_orig_img[i]
                    temp_img = (temp_img + 0.5) * 255
                    temp_img = scipy.misc.imresize(temp_img, (128, 128))
                    temp_img = temp_img / 255 - 0.5
                    temp_img = np.expand_dims(temp_img, axis=0)
                encode_img = codec.encoder.predict(temp_img)
                decode_img = codec.decoder.predict(encode_img)
                diff_img = (decode_img - temp_img)
                diff_mse = np.mean(diff_img.reshape(-1)**2)
                print("[Info][AE] MSE:{:.4f}".format(diff_mse))

            timestart = time.time()
            adv_img = blackbox_attack.attack(orig_img, target)
            timeend = time.time()

            if len(adv_img.shape) == 3:
                adv_img = np.expand_dims(adv_img, axis=0)

            l2_dist = np.sum((adv_img - orig_img)**2)**.5
            adv_class = np.argmax(model.model.predict(adv_img))

            success = False
            if args["attack_type"] == "targeted":
                if adv_class == target_class:
                    success = True
            else:
                if adv_class != true_class:
                    success = True

            if success:
                total_success += 1
                l2_total += l2_dist

            print(
                "[Info][End]{}: test_index:{}, true label:{}, adv label:{}, success:{}, distortion:{:.5f}, success_rate:{:.4f}, l2_avg:{:.4f}"
                .format(i, test_index, true_class, adv_class, success, l2_dist,
                        total_success / (i + 1),
                        0 if total_success == 0 else l2_total / total_success))

            # save images
            suffix = "id{}_testIndex{}_true{}_adv{}".format(
                i, test_index, true_class, adv_class)
            # original image
            save_name = os.path.join(save_prefix, "Orig_{}.png".format(suffix))
            util.save_img(orig_img, save_name)
            save_name = os.path.join(save_prefix, "Orig_{}.npy".format(suffix))
            np.save(save_name, orig_img)

            # adv image
            save_name = os.path.join(save_prefix, "Adv_{}.png".format(suffix))
            util.save_img(adv_img, save_name)
            save_name = os.path.join(save_prefix, "Adv_{}.npy".format(suffix))
            np.save(save_name, adv_img)

            # diff image
            save_name = os.path.join(save_prefix, "Diff_{}.png".format(suffix))
            util.save_img((adv_img - orig_img) / 2, save_name)
            save_name = os.path.join(save_prefix, "Diff_{}.npy".format(suffix))
            np.save(save_name, adv_img - orig_img)
Example #23
0
    if init != None:
        model.load_weights(init)

    def fn(correct, predicted):
        return tf.nn.softmax_cross_entropy_with_logits(labels=correct,
                                                       logits=predicted /
                                                       train_temp)

    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

    model.compile(loss=fn, optimizer=sgd, metrics=['accuracy'])

    model.fit(data.train_data,
              data.train_labels,
              batch_size=batch_size,
              validation_data=(data.validation_data, data.validation_labels),
              nb_epoch=num_epochs,
              shuffle=True)

    if file_name != None:
        model.save(file_name)

    return model


if not os.path.isdir('models'):
    os.makedirs('models')

train(CIFAR(), "models/cifar", [64, 64, 128, 128, 256, 256], num_epochs=50)
train(MNIST(), "models/mnist", [32, 32, 64, 64, 200, 200], num_epochs=50)
Example #24
0
    model.fit(data.train_data,
              data.train_labels,
              batch_size=batch_size,
              validation_data=(data.validation_data, data.validation_labels),
              epochs=num_epochs,
              shuffle=True)

    # save model to a file
    if file_name != None:
        model.save(file_name)

    return model


if not os.path.isdir('models'):
    os.makedirs('models')

if __name__ == "__main__":
    train(MNIST(),
          file_name="models/mnist_2layer",
          params=[1024],
          num_epochs=50,
          lr=0.1,
          decay=1e-3)
    train(CIFAR(),
          file_name="models/cifar_2layer",
          params=[1024],
          num_epochs=50,
          lr=0.2,
          decay=1e-3)
Example #25
0
                        help='number of epochs')
    parser.add_argument('--overwrite',
                        action='store_true',
                        help='overwrite output file')
    args = parser.parse_args()
    print(args)
    nlayers = len(args.layer_parameters) + 1
    if not args.modelfile:
        file_name = args.modelpath + "/" + args.model + "_" + str(
            nlayers
        ) + "layer_" + args.activation + "_" + args.layer_parameters[0]
    else:
        file_name = args.modelfile
    print("Model will be saved to", file_name)
    if os.path.isfile(file_name) and not args.overwrite:
        raise RuntimeError("model {} exists.".format(file_name))
    if args.model == "mnist":
        data = MNIST()
    elif args.model == "cifar":
        data = CIFAR()
    train(data,
          file_name=file_name,
          params=args.layer_parameters,
          num_epochs=args.epochs,
          lr=args.lr,
          decay=args.wd,
          activation=args.activation,
          activation_param=args.leaky_slope,
          grad_reg=args.gradreg,
          dropout_rate=args.dropout)
Example #26
0
def main(args):
    if args["model_type"] == "normal":
        load_robust = False
    else:
        load_robust = True
    simple_target_model = args[
        "simple_target_model"]  # if true, target model is simple CIAR10 model (LeNet)
    simple_local_model = True  # if true, local models are simple CIFAR10 models (LeNet)

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(args["seed"])
    data = CIFAR()
    if not hasattr(K, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    x_test, y_test = CIFAR().test_data, CIFAR().test_labels

    all_trans_rate_ls = []  # store transfer rate of all seeds
    remain_trans_rate_ls = [
    ]  # store transfer rate of remaining seeds, used only in local model fine-tuning

    # Define input TF placeholders
    class_num = 10
    image_size = 32
    num_channels = 3
    test_batch_size = 100
    x = tf.placeholder(tf.float32,
                       shape=(None, image_size, image_size, num_channels))
    y = tf.placeholder(tf.float32, shape=(None, class_num))
    # required by the local robust densenet model
    is_training = tf.placeholder(tf.bool, shape=[])
    keep_prob = tf.placeholder(tf.float32)
    ########################### load the target model ##########################################
    if not load_robust:
        if simple_target_model:
            target_model_name = 'modelA'
            target_model = cifar10_models_simple(sess,test_batch_size, 0, use_softmax=True,x = x, y = y,\
            load_existing=True,model_name=target_model_name)
        else:
            target_model_name = 'densenet'
            target_model = cifar10_models(sess,0,test_batch_size = test_batch_size,use_softmax=True,x = x, y = y,\
            load_existing=True,model_name=target_model_name)
        accuracy = target_model.calcu_acc(x_test, y_test)
        print('Test accuracy of target model {}: {:.4f}'.format(
            target_model_name, accuracy))
    else:
        if args["robust_type"] == "madry":
            target_model_name = 'madry_robust'
            model_dir = "CIFAR10_models/Robust_Deep_models/Madry_robust_target_model"  # TODO: pur your own madry robust target model directory here
            target_model = Load_Madry_Model(sess,
                                            model_dir,
                                            bias=0.5,
                                            scale=255)
        elif args["robust_type"] == "zico":
            # Note: add zico cifar10 model will added in future
            target_model_name = 'zico_robust'
            model_dir = ""  # TODO: put your own robust zico target model directory here
            target_model = Load_Zico_Model(model_dir=model_dir,
                                           bias=0.5,
                                           scale=255)
        else:
            raise NotImplementedError
        corr_preds = target_model.correct_prediction(x_test,
                                                     np.argmax(y_test, axis=1))
        print('Test accuracy of target robust model :{:.4f}'.format(
            np.sum(corr_preds) / len(x_test)))
    ##################################### end of load target model ###################################
    local_model_names = args["local_model_names"]
    robust_indx = []
    normal_local_types = []
    for loc_model_name in local_model_names:
        if loc_model_name == "adv_densenet" or loc_model_name == "adv_vgg" or loc_model_name == "adv_resnet":
            # normal_local_types.append(0)
            robust_indx.append(1)
        else:
            robust_indx.append(0)
            if loc_model_name == "modelB":
                normal_local_types.append(1)
            elif loc_model_name == "modelD":
                normal_local_types.append(3)
            elif loc_model_name == "modelE":
                normal_local_types.append(4)
    print("robust index: ", robust_indx)
    print("normal model types:", normal_local_types)

    local_model_folder = ''
    for ii in range(len(local_model_names)):
        if ii != len(local_model_names) - 1:
            local_model_folder += local_model_names[ii] + '_'
        else:
            local_model_folder += local_model_names[ii]

    nb_imgs = args["num_img"]
    # local model attack related params
    clip_min = -0.5
    clip_max = 0.5
    li_eps = args["cost_threshold"]
    alpha = 1.0
    k = 100
    a = 0.01

    load_existing = True  # load pretrained local models, if false, random model will be given
    with_local = args[
        "with_local"]  # if true, hybrid attack, otherwise, only baseline attacks
    if args["no_tune_local"]:
        stop_fine_tune_flag = True
        load_existing = True
    else:
        stop_fine_tune_flag = False

    if with_local:
        if load_existing:
            loc_adv = 'adv_with_tune'
        if args["no_tune_local"]:
            loc_adv = 'adv_no_tune'
    else:
        loc_adv = 'orig'

    # target type
    if args["attack_type"] == "targeted":
        is_targeted = True
    else:
        is_targeted = False

    sub_epochs = args["nb_epochs_sub"]  # epcohs for local model training
    use_loc_adv_thres = args[
        "use_loc_adv_thres"]  # threshold for transfer attack success rate, it is used when we need to start from local adversarial seeds
    use_loc_adv_flag = True  # flag for using local adversarial examples
    fine_tune_freq = args[
        "fine_tune_freq"]  # fine-tune the model every K images to save total model training time

    # store the attack input files (e.g., original image, target class)
    input_file_prefix = os.path.join(args["local_path"], target_model_name,
                                     args["attack_type"])
    os.system("mkdir -p {}".format(input_file_prefix))
    # save locally generated information
    local_info_file_prefix = os.path.join(args["local_path"],
                                          target_model_name,
                                          args["attack_type"],
                                          local_model_folder,
                                          str(args["seed"]))
    os.system("mkdir -p {}".format(local_info_file_prefix))
    # attack_input_file_prefix = os.path.join(args["local_path"],target_model_name,
    # 											args["attack_type"])
    # save bbox attack information
    out_dir_prefix = os.path.join(args["save_path"], args["attack_method"],
                                  target_model_name, args["attack_type"],
                                  local_model_folder, str(args["seed"]))
    os.system("mkdir -p {}".format(out_dir_prefix))

    #### generate the original images and target classes ####
    target_ys_one_hot,orig_images,target_ys,orig_labels,_, trans_test_images = \
    generate_attack_inputs(sess,target_model,x_test,y_test,class_num,nb_imgs,\
     load_imgs=args["load_imgs"],load_robust=load_robust,\
      file_path = input_file_prefix)
    #### end of genarating original images and target classes ####

    start_points = np.copy(
        orig_images)  # either start from orig seed or local advs
    # store attack statistical info
    dist_record = np.zeros(len(orig_labels), dtype=float)
    query_num_vec = np.zeros(len(orig_labels), dtype=int)
    success_vec = np.zeros(len(orig_labels), dtype=bool)
    adv_classes = np.zeros(len(orig_labels), dtype=int)

    # local model related variables
    if simple_target_model:
        local_model_file_name = "cifar10_simple"
    elif load_robust:
        local_model_file_name = "cifar10_robust"
    else:
        local_model_file_name = "cifar10"
    # save_dir = 'model/'+local_model_file_name + '/'
    callbacks_ls = []
    attacked_flag = np.zeros(len(orig_labels), dtype=bool)

    local_model_ls = []
    if with_local:
        ###################### start loading local models ###############################
        local_model_names_all = []  # help to store complete local model names
        sss = 0
        for model_name in local_model_names:
            if model_name == "adv_densenet" or model_name == "adv_vgg" or model_name == "adv_resnet":
                # tensoflow based robust local models
                loc_model = cifar10_tf_robust_models(sess, test_batch_size = test_batch_size, x = x,y = y, is_training=is_training,keep_prob=keep_prob,\
                 load_existing = True, model_name = model_name,loss = args["loss_function"])
                accuracy = loc_model.calcu_acc(x_test, y_test)
                local_model_ls.append(loc_model)
                print('Test accuracy of model {}: {:.4f}'.format(
                    model_name, accuracy))
                sss += 1
            else:
                # keras based local normal models
                if simple_local_model:
                    type_num = normal_local_types[sss]
                if model_name == 'resnet_v1' or model_name == 'resnet_v2':
                    depth_s = [20, 50, 110]
                else:
                    depth_s = [0]
                for depth in depth_s:
                    # model_name used for loading models
                    if model_name == 'resnet_v1' or model_name == 'resnet_v2':
                        model_load_name = model_name + str(depth)
                    else:
                        model_load_name = model_name
                    local_model_names_all.append(model_load_name)
                    if not simple_local_model:
                        loc_model = cifar10_models(sess,depth,test_batch_size = test_batch_size,use_softmax = True, x = x,y = y,\
                        load_existing = load_existing, model_name = model_name,loss = args["loss_function"])
                    else:
                        loc_model = cifar10_models_simple(sess,test_batch_size,type_num,use_softmax = True, x = x,y = y,\
                        is_training=is_training,keep_prob=keep_prob,load_existing = load_existing, model_name = model_name, loss = args["loss_function"])
                    local_model_ls.append(loc_model)

                    opt = keras.optimizers.SGD(lr=0.01,
                                               decay=1e-6,
                                               momentum=0.9,
                                               nesterov=True)
                    loc_model.model.compile(loss='categorical_crossentropy',
                                            optimizer=opt,
                                            metrics=['accuracy'])
                    orig_images_nw = orig_images
                    orig_labels_nw = orig_labels
                    if args["no_save_model"]:
                        if not load_existing:
                            loc_model.model.fit(
                                orig_images_nw,
                                orig_labels_nw,
                                batch_size=args["train_batch_size"],
                                epochs=sub_epochs,
                                verbose=0,
                                validation_data=(x_test, y_test),
                                shuffle=True)
                    else:
                        print(
                            "Saving local model is yet to be implemented, please check back later, system exiting!"
                        )
                        sys.exit(0)
                        # TODO: fix the issue of loading pretrained model first and then finetune the model
                        # if load_existing:
                        # 	filepath = save_dir + model_load_name + '_pretrained.h5'
                        # else:
                        # 	filepath = save_dir + model_load_name + '.h5'
                        # checkpoint = ModelCheckpoint(filepath=filepath,
                        # 							monitor='val_acc',
                        # 							verbose=0,
                        # 							save_best_only=True)
                        # callbacks = [checkpoint]
                        # callbacks_ls.append(callbacks)
                        # if not load_existing:
                        # 	print("Train on %d data and validate on %d data" % (len(orig_labels_nw),len(y_test)))
                        # 	loc_model.model.fit(orig_images_nw, orig_labels_nw,
                        # 		batch_size=args["train_batch_size"],
                        # 		epochs=sub_epochs,
                        # 		verbose=0,
                        # 		validation_data=(x_test, y_test),
                        # 		shuffle = True,
                        # 		callbacks = callbacks)
                    scores = loc_model.model.evaluate(x_test,
                                                      y_test,
                                                      verbose=0)
                    accuracy = scores[1]
                    print('Test accuracy of model {}: {:.4f}'.format(
                        model_load_name, accuracy))
                    sss += 1
        ##################### end of loading local models ######################################

        ##################### Define Attack Graphs of local PGD attack ###############################
        local_attack_graph = LinfPGDAttack(local_model_ls,
                                           epsilon=li_eps,
                                           k=k,
                                           a=a,
                                           random_start=True,
                                           loss_func=args["loss_function"],
                                           targeted=is_targeted,
                                           robust_indx=robust_indx,
                                           x=x,
                                           y=y,
                                           is_training=is_training,
                                           keep_prob=keep_prob)

        ##################### end of definining graphsof PGD attack ##########################

        ##################### generate local adversarial examples and also store the local attack information #####################
        if not args["load_local_AEs"]:
            # first do the transfer check to obtain local adversarial samples
            # generated local info can be used for batch attacks,
            # max_loss, min_loss, max_gap, min_gap etc are other metrics we explored for scheduling seeds based on local information
            if is_targeted:
                all_trans_rate, pred_labs, local_aes,pgd_cnt_mat, max_loss, min_loss, ave_loss, max_gap, min_gap, ave_gap\
                  = local_attack_in_batches(sess,start_points[np.logical_not(attacked_flag)],\
                target_ys_one_hot[np.logical_not(attacked_flag)],eval_batch_size = test_batch_size,\
                attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust)
            else:
                all_trans_rate, pred_labs, local_aes,pgd_cnt_mat, max_loss, min_loss, ave_loss, max_gap, min_gap, ave_gap\
                  = local_attack_in_batches(sess,start_points[np.logical_not(attacked_flag)],\
                orig_labels[np.logical_not(attacked_flag)],eval_batch_size = test_batch_size,\
                attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust)
            # calculate local adv loss used for scheduling seeds in batch attack...
            if is_targeted:
                adv_img_loss, free_idx = compute_cw_loss(sess,target_model,local_aes,\
                target_ys_one_hot,targeted=is_targeted,load_robust=load_robust)
            else:
                adv_img_loss, free_idx = compute_cw_loss(sess,target_model,local_aes,\
                orig_labels,targeted=is_targeted,load_robust=load_robust)

            # calculate orig img loss for scheduling seeds in baseline attack
            if is_targeted:
                orig_img_loss, free_idx = compute_cw_loss(sess,target_model,orig_images,\
                target_ys_one_hot,targeted=is_targeted,load_robust=load_robust)
            else:
                orig_img_loss, free_idx = compute_cw_loss(sess,target_model,orig_images,\
                orig_labels,targeted=is_targeted,load_robust=load_robust)

            pred_labs = np.argmax(target_model.predict_prob(local_aes), axis=1)
            if is_targeted:
                transfer_flag = np.argmax(target_ys_one_hot,
                                          axis=1) == pred_labs
            else:
                transfer_flag = np.argmax(orig_labels, axis=1) != pred_labs
            # save local aes
            np.save(local_info_file_prefix + '/local_aes.npy', local_aes)
            # store local info of local aes and original seeds: used for scheduling seeds in batch attacks
            np.savetxt(local_info_file_prefix + '/pgd_cnt_mat.txt',
                       pgd_cnt_mat)
            np.savetxt(local_info_file_prefix + '/orig_img_loss.txt',
                       orig_img_loss)
            np.savetxt(local_info_file_prefix + '/adv_img_loss.txt',
                       adv_img_loss)
            np.savetxt(local_info_file_prefix + '/ave_gap.txt', ave_gap)
        else:
            local_aes = np.load(local_info_file_prefix + '/local_aes.npy')
            if is_targeted:
                tmp_labels = target_ys_one_hot
            else:
                tmp_labels = orig_labels
            pred_labs = np.argmax(target_model.predict_prob(
                np.array(local_aes)),
                                  axis=1)
            print('correct number',
                  np.sum(pred_labs == np.argmax(tmp_labels, axis=1)))
            all_trans_rate = accuracy_score(np.argmax(tmp_labels, axis=1),
                                            pred_labs)
        ################################ end of generating local AEs and storing related information #######################################

        if not is_targeted:
            all_trans_rate = 1 - all_trans_rate
        print('** Transfer Rate: **' + str(all_trans_rate))

        if all_trans_rate > use_loc_adv_thres:
            print("Updated the starting points to local AEs....")
            start_points[np.logical_not(attacked_flag)] = local_aes
            use_loc_adv_flag = True

        # independent test set for checking transferability: for experiment purpose and does not count for query numbers
        if is_targeted:
            ind_all_trans_rate,_,_,_,_,_,_,_,_,_ = local_attack_in_batches(sess,trans_test_images,target_ys_one_hot,eval_batch_size = test_batch_size,\
            attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust)
        else:
            ind_all_trans_rate,_,_,_,_,_,_,_,_,_ = local_attack_in_batches(sess,trans_test_images,orig_labels,eval_batch_size = test_batch_size,\
            attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust)

        # record the queries spent by quering the local samples
        query_num_vec[np.logical_not(attacked_flag)] += 1
        if not is_targeted:
            ind_all_trans_rate = 1 - ind_all_trans_rate
        print('** (Independent Set) Transfer Rate: **' +
              str(ind_all_trans_rate))
        all_trans_rate_ls.append(ind_all_trans_rate)

    S = np.copy(start_points)
    S_label = target_model.predict_prob(S)
    S_label_cate = np.argmax(S_label, axis=1)
    S_label_cate = np_utils.to_categorical(S_label_cate, class_num)

    pre_free_idx = []
    candi_idx_ls = []  # store the indices of images in the order attacked

    # these parameters are used to make sure equal number of instances from each class are selected
    # such that diversity of fine-tuning set is improved. However, it is not effective...
    per_cls_cnt = 0
    cls_order = 0
    change_limit = False
    max_lim_num = int(fine_tune_freq / class_num)

    # define the autozoom bbox attack graph
    if args["attack_method"] == "autozoom":
        # setup the autoencoders for autozoom attack
        codec = 0
        args["img_resize"] = 8
        # replace with your directory
        codec_dir = 'CIFAR10_models/cifar10_autoencoder/'  # TODO: replace with your own cifar10 autoencoder directory
        encoder = load_model(codec_dir + 'whole_cifar10_encoder.h5')
        decoder = load_model(codec_dir + 'whole_cifar10_decoder.h5')

        encode_img = encoder.predict(data.test_data[100:101])
        decode_img = decoder.predict(encode_img)
        diff_img = (decode_img - data.test_data[100:101])
        diff_mse = np.mean(diff_img.reshape(-1)**2)

        # diff_mse = np.mean(np.sum(diff_img.reshape(-1,784)**2,axis = 1))
        print("[Info][AE] MSE:{:.4f}".format(diff_mse))
        encode_img = encoder.predict(data.test_data[0:1])
        decode_img = decoder.predict(encode_img)
        diff_img = (decode_img - data.test_data[0:1])
        diff_mse = np.mean(diff_img.reshape(-1)**2)
        print("[Info][AE] MSE:{:.4f}".format(diff_mse))

    if args["attack_method"] == "autozoom":
        # define black-box model graph of autozoom
        autozoom_graph = AutoZOOM(sess, target_model, args, decoder, codec,
                                  num_channels, image_size, class_num)

    # main loop of hybrid attacks
    for itr in range(len(orig_labels)):
        print("#------------ Substitue training round {} ----------------#".
              format(itr))
        # computer loss functions of seeds: no query is needed here because seeds are already queried before...
        if is_targeted:
            img_loss, free_idx = compute_cw_loss(sess,target_model,start_points,\
            target_ys_one_hot,targeted=is_targeted,load_robust=load_robust)
        else:
            img_loss, free_idx = compute_cw_loss(sess,target_model,start_points,\
            orig_labels,targeted=is_targeted,load_robust=load_robust)
        free_idx_diff = list(set(free_idx) - set(pre_free_idx))
        print("new free idx found:", free_idx_diff)
        if len(free_idx_diff) > 0:
            candi_idx_ls.extend(free_idx_diff)
        pre_free_idx = free_idx
        if with_local:
            if len(free_idx) > 0:
                # free attacks are found
                attacked_flag[free_idx] = 1
                success_vec[free_idx] = 1
                # update dist and adv class
                if args['dist_metric'] == 'l2':
                    dist = np.sum(
                        (start_points[free_idx] - orig_images[free_idx])**2,
                        axis=(1, 2, 3))**.5
                elif args['dist_metric'] == 'li':
                    dist = np.amax(np.abs(start_points[free_idx] -
                                          orig_images[free_idx]),
                                   axis=(1, 2, 3))
                # print(start_points[free_idx].shape)
                adv_class = target_model.pred_class(start_points[free_idx])
                adv_classes[free_idx] = adv_class
                dist_record[free_idx] = dist
                if np.amax(
                        dist
                ) >= args["cost_threshold"] + args["cost_threshold"] / 10:
                    print(
                        "there are some problems in setting the perturbation distance!"
                    )
                    sys.exit(0)
        print("Number of Unattacked Seeds: ",
              np.sum(np.logical_not(attacked_flag)))
        if attacked_flag.all():
            # early stop when all seeds are sucessfully attacked
            break

        # define the seed generation process as a functon
        if args["sort_metric"] == "min":
            img_loss[attacked_flag] = 1e10
        elif args["sort_metric"] == "max":
            img_loss[attacked_flag] = -1e10
        candi_idx, per_cls_cnt, cls_order,change_limit,max_lim_num = select_next_seed(img_loss,attacked_flag,args["sort_metric"],\
        args["by_class"],fine_tune_freq,class_num,per_cls_cnt,cls_order,change_limit,max_lim_num)

        print(candi_idx)
        candi_idx_ls.append(candi_idx)

        input_img = start_points[candi_idx:candi_idx + 1]
        if args["attack_method"] == "autozoom":
            # encoder decoder performance check
            encode_img = encoder.predict(input_img)
            decode_img = decoder.predict(encode_img)
            diff_img = (decode_img - input_img)
            diff_mse = np.mean(diff_img.reshape(-1)**2)
        else:
            diff_mse = 0.0

        print("[Info][Start]: test_index:{}, true label:{}, target label:{}, MSE:{}".format(candi_idx, np.argmax(orig_labels[candi_idx]),\
         np.argmax(target_ys_one_hot[candi_idx]),diff_mse))

        ################## BEGIN: bbox attacks ############################
        if args["attack_method"] == "autozoom":
            # perform bbox attacks
            if is_targeted:
                x_s, ae, query_num = autozoom_attack(
                    autozoom_graph, input_img,
                    orig_images[candi_idx:candi_idx + 1],
                    target_ys_one_hot[candi_idx])
            else:
                x_s, ae, query_num = autozoom_attack(
                    autozoom_graph, input_img,
                    orig_images[candi_idx:candi_idx + 1],
                    orig_labels[candi_idx])
        else:
            if is_targeted:
                x_s, query_num, ae = nes_attack(args,target_model,input_img,orig_images[candi_idx:candi_idx+1],\
                 np.argmax(target_ys_one_hot[candi_idx]), lower = clip_min, upper = clip_max)
            else:
                x_s, query_num, ae = nes_attack(args,target_model,input_img,orig_images[candi_idx:candi_idx+1],\
                 np.argmax(orig_labels[candi_idx]), lower = clip_min, upper = clip_max)
            x_s = np.squeeze(np.array(x_s), axis=1)
        ################## END: bbox attacks ############################

        attacked_flag[candi_idx] = 1

        # fill the query info, etc
        if len(ae.shape) == 3:
            ae = np.expand_dims(ae, axis=0)
        if args['dist_metric'] == 'l2':
            dist = np.sum((ae - orig_images[candi_idx])**2)**.5
        elif args['dist_metric'] == 'li':
            dist = np.amax(np.abs(ae - orig_images[candi_idx]))
        adv_class = target_model.pred_class(ae)
        adv_classes[candi_idx] = adv_class
        dist_record[candi_idx] = dist

        if args["attack_method"] == "autozoom":
            # autozoom utilizes the query info of attack input, which is already done at the begining.
            added_query = query_num - 1
        else:
            added_query = query_num

        query_num_vec[candi_idx] += added_query
        if dist >= args["cost_threshold"] + args["cost_threshold"] / 10:
            print("the distance is not optimized properly")
            sys.exit(0)

        if is_targeted:
            if adv_class == np.argmax(target_ys_one_hot[candi_idx]):
                success_vec[candi_idx] = 1
        else:
            if adv_class != np.argmax(orig_labels[candi_idx]):
                success_vec[candi_idx] = 1
        if attacked_flag.all():
            print(
                "Early termination because all seeds are successfully attacked!"
            )
            break
        ##############################################################
        ## Starts the section of substitute training and local advs ##
        ##############################################################
        if with_local:
            if not stop_fine_tune_flag:
                # augment the local model training data with target model labels
                print(np.array(x_s).shape)
                print(S.shape)
                S = np.concatenate((S, np.array(x_s)), axis=0)
                S_label_add = target_model.predict_prob(np.array(x_s))
                S_label_add_cate = np.argmax(S_label_add, axis=1)
                S_label_add_cate = np_utils.to_categorical(
                    S_label_add_cate, class_num)
                S_label_cate = np.concatenate((S_label_cate, S_label_add_cate),
                                              axis=0)
                # empirically, tuning with model prediction probabilities given slightly better results.
                # if your bbox attack is decision based, only use the prediction labels
                S_label = np.concatenate((S_label, S_label_add), axis=0)
                # fine-tune the model
                if itr % fine_tune_freq == 0 and itr != 0:
                    if len(S_label) > args["train_inst_lim"]:
                        curr_len = len(S_label)
                        rand_idx = np.random.choice(len(S_label),
                                                    args["train_inst_lim"],
                                                    replace=False)
                        S = S[rand_idx]
                        S_label = S_label[rand_idx]
                        S_label_cate = S_label_cate[rand_idx]
                        print(
                            "current num: %d, max train instance limit %d is reached, performed random sampling to get %d samples!"
                            % (curr_len, len(S_label), len(rand_idx)))
                    sss = 0

                    for loc_model in local_model_ls:
                        model_name = local_model_names_all[sss]
                        if args["use_mixup"]:
                            print(
                                "Updates the training data with mixup strayegy!"
                            )
                            S_nw = np.copy(S)
                            S_label_nw = np.copy(S_label)
                            S_nw, S_label_nw, _ = mixup_data(S_nw,
                                                             S_label_nw,
                                                             alpha=alpha)
                        else:
                            S_nw = S
                            S_label_nw = S_label
                        print("Train on %d data and validate on %d data" %
                              (len(S_label_nw), len(y_test)))
                        if args["no_save_model"]:
                            loc_model.model.fit(
                                S_nw,
                                S_label_nw,
                                batch_size=args["train_batch_size"],
                                epochs=sub_epochs,
                                verbose=0,
                                validation_data=(x_test, y_test),
                                shuffle=True)
                        else:
                            print(
                                "Saving local model is yet to be implemented, please check back later, system exiting!"
                            )
                            sys.exit(0)
                            # callbacks = callbacks_ls[sss]
                            # loc_model.model.fit(S_nw, S_label_nw,
                            # 	batch_size=args["train_batch_size"],
                            # 	epochs=sub_epochs,
                            # 	verbose=0,
                            # 	validation_data=(x_test, y_test),
                            # 	shuffle = True,
                            # 	callbacks = callbacks)
                        scores = loc_model.model.evaluate(x_test,
                                                          y_test,
                                                          verbose=0)
                        print('Test accuracy of model {}: {:.4f}'.format(
                            model_name, scores[1]))
                        sss += 1
                    if not attacked_flag.all():
                        # first check for not attacked seeds
                        if is_targeted:
                            remain_trans_rate, _, remain_local_aes,_, _, _, _, _, _, _\
                              = local_attack_in_batches(sess,orig_images[np.logical_not(attacked_flag)],\
                            target_ys_one_hot[np.logical_not(attacked_flag)],eval_batch_size = test_batch_size,\
                            attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust)
                        else:
                            remain_trans_rate, pred_labs, remain_local_aes,_, _, _, _, _, _, _\
                              = local_attack_in_batches(sess,orig_images[np.logical_not(attacked_flag)],\
                            orig_labels[np.logical_not(attacked_flag)],eval_batch_size = test_batch_size,\
                            attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust)
                        if not is_targeted:
                            remain_trans_rate = 1 - remain_trans_rate
                        print('<<Ramaining Seed Transfer Rate>>:**' +
                              str(remain_trans_rate))
                        # if transfer rate is higher than threshold, use local advs as starting points
                        if remain_trans_rate <= 0 and use_loc_adv_flag:
                            print(
                                "No improvement for substitue training, stop fine-tuning!"
                            )
                            stop_fine_tune_flag = False

                        # transfer rate check with independent test examples
                        if is_targeted:
                            all_trans_rate, _, _, _, _, _, _, _, _, _\
                              = local_attack_in_batches(sess,trans_test_images,target_ys_one_hot,eval_batch_size = test_batch_size,\
                            attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust)
                        else:
                            all_trans_rate, _, _, _, _, _, _, _, _, _\
                              = local_attack_in_batches(sess,trans_test_images,orig_labels,eval_batch_size = test_batch_size,\
                            attack_graph = local_attack_graph,model = target_model,clip_min=clip_min,clip_max=clip_max,load_robust=load_robust)
                        if not is_targeted:
                            all_trans_rate = 1 - all_trans_rate
                        print('<<Overall Transfer Rate>>: **' +
                              str(all_trans_rate))

                        # if trans rate is not high enough, still start from orig seed; start from loc adv only
                        # when trans rate is high enough, useful when you start with random model
                        if not use_loc_adv_flag:
                            if remain_trans_rate > use_loc_adv_thres:
                                use_loc_adv_flag = True
                                print("Updated the starting points....")
                                start_points[np.logical_not(
                                    attacked_flag)] = remain_local_aes
                            # record the queries spent on checking newly generated loc advs
                            query_num_vec += 1
                        else:
                            print("Updated the starting points....")
                            start_points[np.logical_not(
                                attacked_flag)] = remain_local_aes
                            # record the queries spent on checking newly generated loc advs
                            query_num_vec[np.logical_not(attacked_flag)] += 1
                        remain_trans_rate_ls.append(remain_trans_rate)
                        all_trans_rate_ls.append(all_trans_rate)
                np.set_printoptions(precision=4)
                print("all_trans_rate:")
                print(all_trans_rate_ls)
                print("remain_trans_rate")
                print(remain_trans_rate_ls)

    # save the query information of all classes
    if not args["no_save_text"]:
        save_name_file = os.path.join(out_dir_prefix,
                                      "{}_num_queries.txt".format(loc_adv))
        np.savetxt(save_name_file, query_num_vec, fmt='%d', delimiter=' ')
        save_name_file = os.path.join(out_dir_prefix,
                                      "{}_success_flags.txt".format(loc_adv))
        np.savetxt(save_name_file, success_vec, fmt='%d', delimiter=' ')
Example #27
0
def main(args):
    with tf.Session() as sess:
        if (args['dataset'] == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess)
            handpick = False
            inception = False
        if (args['dataset'] == "cifar"):
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            handpick = True
            inception = False
        if (args['dataset'] == "imagenet"):
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess)
            handpick = True
            inception = True

        if (args['adversarial'] != "none"):
            model = MNISTModel("models/mnist_cw" + str(args['adversarial']),
                               sess)

        if (args['temp'] and args['dataset'] == 'mnist'):
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']),
                               sess)
        if (args['temp'] and args['dataset'] == 'cifar'):
            model = CIFARModel("models/cifar-distilled-" + str(args['temp']),
                               sess)

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            inception=inception,
            handpick=handpick,
            train=args['train'],
            seed=args['seed'])
        timestart = time.time()
        if (args['attack'] == 'L2'):
            attack = CarliniL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['binary_steps'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'L1'):
            attack = EADL1(sess,
                           model,
                           batch_size=args['batch_size'],
                           max_iterations=args['maxiter'],
                           confidence=args['conf'],
                           binary_search_steps=args['binary_steps'],
                           beta=args['beta'],
                           abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'EN'):
            attack = EADEN(sess,
                           model,
                           batch_size=args['batch_size'],
                           max_iterations=args['maxiter'],
                           confidence=args['conf'],
                           binary_search_steps=args['binary_steps'],
                           beta=args['beta'],
                           abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        """If untargeted, pass labels instead of targets"""
        if (args['attack'] == 'FGSM'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=np.inf,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'FGML1'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=1,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'FGML2'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=2,
                         inception=inception)
            adv = attack.attack(inputs, targets)

        if (args['attack'] == 'IFGSM'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=np.inf,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'IFGML1'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=1,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'IFGML2'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=2,
                         inception=inception)
            adv = attack.attack(inputs, targets)

        timeend = time.time()
        print("Took", timeend - timestart, "seconds to run",
              len(inputs) / args['batch_size'], "random instances.")

        if (args['train']):
            np.save('labels_train.npy', labels)
            np.save(str(args['attack']) + '_train.npy', adv)
            return

        r_best = []
        d_best_l1 = []
        d_best_l2 = []
        d_best_linf = []
        r_average = []
        d_average_l1 = []
        d_average_l2 = []
        d_average_linf = []
        r_worst = []
        d_worst_l1 = []
        d_worst_l2 = []
        d_worst_linf = []

        if (args['conf'] != 0):
            model = MNISTModel("models/mnist-distilled-100", sess)

        if (args['show']):
            if not os.path.exists(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack'])):
                os.makedirs(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack']))

        for i in range(0, len(inputs), args['batch_size']):

            pred = []
            for j in range(i, i + args['batch_size']):
                if inception:
                    pred.append(
                        np.reshape(model.model.predict(adv[j:j + 1]),
                                   (data.test_labels[0:1].shape)))
                else:
                    pred.append(model.model.predict(adv[j:j + 1]))

            dist_l1 = 1e10
            dist_l2 = 1e10
            dist_linf = 1e10
            dist_l1_index = 1e10
            dist_l2_index = 1e10
            dist_linf_index = 1e10
            for k, j in enumerate(range(i, i + args['batch_size'])):
                if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)):
                    if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1):
                        dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                        dist_l1_index = j
                    if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf):
                        dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                        dist_linf_index = j
                    if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2):
                        dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                        dist_l2_index = j
            if (dist_l1_index != 1e10):
                d_best_l2.append((np.sum(
                    (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                d_best_l1.append(
                    np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index])))
                d_best_linf.append(
                    np.amax(
                        np.abs(adv[dist_linf_index] -
                               inputs[dist_linf_index])))
                r_best.append(1)
            else:
                r_best.append(0)

            rand_int = np.random.randint(i, i + args['batch_size'])
            if inception:
                pred_r = np.reshape(
                    model.model.predict(adv[rand_int:rand_int + 1]),
                    (data.test_labels[0:1].shape))
            else:
                pred_r = model.model.predict(adv[rand_int:rand_int + 1])
            if (np.argmax(pred_r,
                          1) == np.argmax(targets[rand_int:rand_int + 1], 1)):
                r_average.append(1)
                d_average_l2.append(
                    np.sum((adv[rand_int] - inputs[rand_int])**2)**.5)
                d_average_l1.append(
                    np.sum(np.abs(adv[rand_int] - inputs[rand_int])))
                d_average_linf.append(
                    np.amax(np.abs(adv[rand_int] - inputs[rand_int])))

            else:
                r_average.append(0)

            dist_l1 = 0
            dist_l1_index = 1e10
            dist_linf = 0
            dist_linf_index = 1e10
            dist_l2 = 0
            dist_l2_index = 1e10
            for k, j in enumerate(range(i, i + args['batch_size'])):
                if (np.argmax(pred[k], 1) != np.argmax(targets[j:j + 1], 1)):
                    r_worst.append(0)
                    dist_l1_index = 1e10
                    dist_l2_index = 1e10
                    dist_linf_index = 1e10
                    break
                else:
                    if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1):
                        dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                        dist_l1_index = j
                    if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf):
                        dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                        dist_linf_index = j
                    if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2):
                        dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                        dist_l2_index = j
            if (dist_l1_index != 1e10):
                d_worst_l2.append((np.sum(
                    (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                d_worst_l1.append(
                    np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index])))
                d_worst_linf.append(
                    np.amax(
                        np.abs(adv[dist_linf_index] -
                               inputs[dist_linf_index])))
                r_worst.append(1)

            if (args['show']):
                for j in range(i, i + args['batch_size']):
                    target_id = np.argmax(targets[j:j + 1], 1)
                    label_id = np.argmax(labels[j:j + 1], 1)
                    prev_id = np.argmax(
                        np.reshape(model.model.predict(inputs[j:j + 1]),
                                   (data.test_labels[0:1].shape)), 1)
                    adv_id = np.argmax(
                        np.reshape(model.model.predict(adv[j:j + 1]),
                                   (data.test_labels[0:1].shape)), 1)
                    suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format(
                        true_ids[i], target_id, label_id, prev_id,
                        adv_id, adv_id == target_id,
                        np.sum(np.abs(adv[j] - inputs[j])),
                        np.sum((adv[j] - inputs[j])**2)**.5,
                        np.amax(np.abs(adv[j] - inputs[j])))

                    show(
                        inputs[j:j + 1],
                        str(args['save']) + "/" + str(args['dataset']) + "/" +
                        str(args['attack']) +
                        "/original_{}.png".format(suffix))
                    show(
                        adv[j:j + 1],
                        str(args['save']) + "/" + str(args['dataset']) + "/" +
                        str(args['attack']) +
                        "/adversarial_{}.png".format(suffix))

        print('best_case_L1_mean', np.mean(d_best_l1))
        print('best_case_L2_mean', np.mean(d_best_l2))
        print('best_case_Linf_mean', np.mean(d_best_linf))
        print('best_case_prob', np.mean(r_best))
        print('average_case_L1_mean', np.mean(d_average_l1))
        print('average_case_L2_mean', np.mean(d_average_l2))
        print('average_case_Linf_mean', np.mean(d_average_linf))
        print('average_case_prob', np.mean(r_average))
        print('worst_case_L1_mean', np.mean(d_worst_l1))
        print('worst_case_L2_mean', np.mean(d_worst_l2))
        print('worst_case_Linf_mean', np.mean(d_worst_linf))
        print('worst_case_prob', np.mean(r_worst))
Example #28
0
    
    # now train the teacher at the given temperature
    teacher = train(data, file_name+"_teacher", params, num_epochs, batch_size, train_temp,
                    init=file_name+"_init")

    # evaluate the labels at temperature t
    predicted = teacher.predict(data.train_data)
    y = tf.nn.softmax(predicted/train_temp)
    print(y)
    data.train_labels = y

    # train the student model at temperature t
    student = train(data, file_name, params, num_epochs, batch_size, train_temp,
                    init=file_name+"_init")

    # and finally we predict at temperature 1
    predicted = student.predict(data.train_data)

    print(predicted)
    
if not os.path.isdir('models'):
    os.makedirs('models')

train(CIFAR(), "models/cifar", [64, 64, 128, 128, 256, 256], num_epochs=50)
train(MNIST(), "models/mnist", [32, 32, 64, 64, 200, 200], num_epochs=50)

train_distillation(MNIST(), "models/mnist-distilled-100", [32, 32, 64, 64, 200, 200],
                   num_epochs=50, train_temp=100)
train_distillation(CIFAR(), "models/cifar-distilled-100", [64, 64, 128, 128, 256, 256],
                   num_epochs=50, train_temp=100)
Example #29
0
from setup_cifar import CIFAR, CIFARModel, CIFAR_WIDE
from setup_mnist import MNIST, MNISTModel
from setup_inception import ImageNet, InceptionModel

import tensorflow as tf
import numpy as np
from encoder import encoder

BATCH_SIZE = 1

with tf.Session() as sess:
    #data, model = MNIST(), MNISTModel("models/mnist", sess)
    #data, model = CIFAR(), CIFARModel("models/cifar", sess)
    #data, model = ImageNet(), InceptionModel(sess)
    data, model = CIFAR(), CIFAR_WIDE("models/wide_resnet", sess)
    x = tf.placeholder(
        tf.float32,
        (None, model.image_size, model.image_size, model.num_channels))
    y = model.predict(x)

    r = []
    for i in range(0, len(data.encoding_test_data), BATCH_SIZE):
        pred = sess.run(y, {x: data.encoding_test_data[i:i + BATCH_SIZE]})
        #print(pred)
        #print('real',data.test_labels[i],'pred',np.argmax(pred))
        r.append(
            np.argmax(pred, 1) == np.argmax(data.test_labels[i:i +
                                                             BATCH_SIZE], 1))
        print(np.mean(r))
def main(args):
    with tf.Session() as sess:
        if (args['dataset'] == 'mnist'):
            data = MNIST()
            inception = False
            if (args['adversarial'] != "none"):
                model = MNISTModel(
                    "models/mnist_cw" + str(args['adversarial']), sess)
            elif (args['temp']):
                model = MNISTModel(
                    "models/mnist-distilled-" + str(args['temp']), sess)
            else:
                model = MNISTModel("models/mnist", sess)
        if (args['dataset'] == "cifar"):
            data = CIFAR()
            inception = False
            if (args['adversarial'] != "none"):
                model = CIFARModel(
                    "models/cifar_cw" + str(args['adversarial']), sess)
            elif (args['temp']):
                model = CIFARModel(
                    "models/cifar-distilled-" + str(args['temp']), sess)
            else:
                model = CIFARModel("models/cifar", sess)
        if (args['dataset'] == "imagenet"):
            data, model = ImageNet(args['seed_imagenet'],
                                   2 * args['numimg']), InceptionModel(sess)
            inception = True

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            targeted=not args['untargeted'],
            target_num=args['targetnum'],
            inception=inception,
            train=args['train'],
            seed=args['seed'])
        timestart = time.time()
        if (args['restore_np']):
            if (args['train']):
                adv = np.load(
                    str(args['dataset']) + '_' + str(args['attack']) +
                    '_train.npy')
            else:
                adv = np.load(
                    str(args['dataset']) + '_' + str(args['attack']) + '.npy')
        else:
            if (args['attack'] == 'L2'):
                attack = CarliniL2(sess,
                                   model,
                                   batch_size=args['batch_size'],
                                   max_iterations=args['maxiter'],
                                   confidence=args['conf'],
                                   initial_const=args['init_const'],
                                   binary_search_steps=args['binary_steps'],
                                   targeted=not args['untargeted'],
                                   beta=args['beta'],
                                   abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'L1'):
                attack = EADL1(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'EN'):
                attack = EADEN(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            """If untargeted, pass labels instead of targets"""
            if (args['attack'] == 'FGSM'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=np.inf,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'FGML1'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=1,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'FGML2'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=2,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)

            if (args['attack'] == 'IFGSM'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=np.inf,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'IFGML1'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=1,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'IFGML2'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=2,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)

        timeend = time.time()

        if args['untargeted']:
            num_targets = 1
        else:
            num_targets = args['targetnum']
        print("Took", timeend - timestart, "seconds to run",
              len(inputs) / num_targets, "random instances.")

        if (args['save_np']):
            if (args['train']):
                np.save(str(args['dataset']) + '_labels_train.npy', labels)
                np.save(
                    str(args['dataset']) + '_' + str(args['attack']) +
                    '_train.npy', adv)
            else:
                np.save(
                    str(args['dataset']) + '_' + str(args['attack'] + '.npy'),
                    adv)

        r_best_ = []
        d_best_l1_ = []
        d_best_l2_ = []
        d_best_linf_ = []
        r_average_ = []
        d_average_l1_ = []
        d_average_l2_ = []
        d_average_linf_ = []
        r_worst_ = []
        d_worst_l1_ = []
        d_worst_l2_ = []
        d_worst_linf_ = []

        #Transferability Tests
        model_ = []
        model_.append(model)
        if (args['targetmodel'] != "same"):
            if (args['targetmodel'] == "dd_100"):
                model_.append(MNISTModel("models/mnist-distilled-100", sess))
        num_models = len(model_)

        if (args['show']):
            if not os.path.exists(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack'])):
                os.makedirs(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack']))
        for m, model in enumerate(model_):
            r_best = []
            d_best_l1 = []
            d_best_l2 = []
            d_best_linf = []
            r_average = []
            d_average_l1 = []
            d_average_l2 = []
            d_average_linf = []
            r_worst = []
            d_worst_l1 = []
            d_worst_l2 = []
            d_worst_linf = []
            for i in range(0, len(inputs), num_targets):
                pred = []
                for j in range(i, i + num_targets):
                    if inception:
                        pred.append(
                            np.reshape(model.model.predict(adv[j:j + 1]),
                                       (data.test_labels[0:1].shape)))
                    else:
                        pred.append(model.model.predict(adv[j:j + 1]))

                dist_l1 = 1e10
                dist_l1_index = 1e10
                dist_linf = 1e10
                dist_linf_index = 1e10
                dist_l2 = 1e10
                dist_l2_index = 1e10
                for k, j in enumerate(range(i, i + num_targets)):
                    success = False
                    if (args['untargeted']):
                        if (np.argmax(pred[k], 1) != np.argmax(
                                targets[j:j + 1], 1)):
                            success = True
                    else:
                        if (np.argmax(pred[k],
                                      1) == np.argmax(targets[j:j + 1], 1)):
                            success = True
                    if (success):
                        if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1):
                            dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                            dist_l1_index = j
                        if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf):
                            dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                            dist_linf_index = j
                        if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2):
                            dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                            dist_l2_index = j
                if (dist_l1_index != 1e10):
                    d_best_l2.append((np.sum(
                        (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                    d_best_l1.append(
                        np.sum(
                            np.abs(adv[dist_l1_index] -
                                   inputs[dist_l1_index])))
                    d_best_linf.append(
                        np.amax(
                            np.abs(adv[dist_linf_index] -
                                   inputs[dist_linf_index])))
                    r_best.append(1)
                else:
                    r_best.append(0)

                rand_int = np.random.randint(i, i + num_targets)
                if inception:
                    pred_r = np.reshape(
                        model.model.predict(adv[rand_int:rand_int + 1]),
                        (data.test_labels[0:1].shape))
                else:
                    pred_r = model.model.predict(adv[rand_int:rand_int + 1])
                success_average = False
                if (args['untargeted']):
                    if (np.argmax(pred_r, 1) != np.argmax(
                            targets[rand_int:rand_int + 1], 1)):
                        success_average = True
                else:
                    if (np.argmax(pred_r, 1) == np.argmax(
                            targets[rand_int:rand_int + 1], 1)):
                        success_average = True
                if success_average:
                    r_average.append(1)
                    d_average_l2.append(
                        np.sum((adv[rand_int] - inputs[rand_int])**2)**.5)
                    d_average_l1.append(
                        np.sum(np.abs(adv[rand_int] - inputs[rand_int])))
                    d_average_linf.append(
                        np.amax(np.abs(adv[rand_int] - inputs[rand_int])))

                else:
                    r_average.append(0)

                dist_l1 = 0
                dist_l1_index = 1e10
                dist_linf = 0
                dist_linf_index = 1e10
                dist_l2 = 0
                dist_l2_index = 1e10
                for k, j in enumerate(range(i, i + num_targets)):
                    failure = True
                    if (args['untargeted']):
                        if (np.argmax(pred[k], 1) != np.argmax(
                                targets[j:j + 1], 1)):
                            failure = False
                    else:
                        if (np.argmax(pred[k],
                                      1) == np.argmax(targets[j:j + 1], 1)):
                            failure = False
                    if failure:
                        r_worst.append(0)
                        dist_l1_index = 1e10
                        dist_l2_index = 1e10
                        dist_linf_index = 1e10
                        break
                    else:
                        if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1):
                            dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                            dist_l1_index = j
                        if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf):
                            dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                            dist_linf_index = j
                        if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2):
                            dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                            dist_l2_index = j
                if (dist_l1_index != 1e10):
                    d_worst_l2.append((np.sum(
                        (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                    d_worst_l1.append(
                        np.sum(
                            np.abs(adv[dist_l1_index] -
                                   inputs[dist_l1_index])))
                    d_worst_linf.append(
                        np.amax(
                            np.abs(adv[dist_linf_index] -
                                   inputs[dist_linf_index])))
                    r_worst.append(1)

                if (args['show'] and m == (num_models - 1)):
                    for j in range(i, i + num_targets):
                        target_id = np.argmax(targets[j:j + 1], 1)
                        label_id = np.argmax(labels[j:j + 1], 1)
                        prev_id = np.argmax(
                            np.reshape(model.model.predict(inputs[j:j + 1]),
                                       (data.test_labels[0:1].shape)), 1)
                        adv_id = np.argmax(
                            np.reshape(model.model.predict(adv[j:j + 1]),
                                       (data.test_labels[0:1].shape)), 1)
                        suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format(
                            true_ids[i], target_id, label_id, prev_id, adv_id,
                            adv_id == target_id,
                            np.sum(np.abs(adv[j] - inputs[j])),
                            np.sum((adv[j] - inputs[j])**2)**.5,
                            np.amax(np.abs(adv[j] - inputs[j])))

                        show(
                            inputs[j:j + 1],
                            str(args['save']) + "/" + str(args['dataset']) +
                            "/" + str(args['attack']) +
                            "/original_{}.png".format(suffix))
                        show(
                            adv[j:j + 1],
                            str(args['save']) + "/" + str(args['dataset']) +
                            "/" + str(args['attack']) +
                            "/adversarial_{}.png".format(suffix))
            if (m != (num_models - 1)):
                lbl = "Src_"
                if (num_models > 2):
                    lbl += str(m) + "_"
            else:
                lbl = "Tgt_"
            if (num_targets > 1):
                print(lbl + 'best_case_L1_mean', np.mean(d_best_l1))
                print(lbl + 'best_case_L2_mean', np.mean(d_best_l2))
                print(lbl + 'best_case_Linf_mean', np.mean(d_best_linf))
                print(lbl + 'best_case_prob', np.mean(r_best))
                print(lbl + 'average_case_L1_mean', np.mean(d_average_l1))
                print(lbl + 'average_case_L2_mean', np.mean(d_average_l2))
                print(lbl + 'average_case_Linf_mean', np.mean(d_average_linf))
                print(lbl + 'average_case_prob', np.mean(r_average))
                print(lbl + 'worst_case_L1_mean', np.mean(d_worst_l1))
                print(lbl + 'worst_case_L2_mean', np.mean(d_worst_l2))
                print(lbl + 'worst_case_Linf_mean', np.mean(d_worst_linf))
                print(lbl + 'worst_case_prob', np.mean(r_worst))
            else:
                print(lbl + 'L1_mean', np.mean(d_average_l1))
                print(lbl + 'L2_mean', np.mean(d_average_l2))
                print(lbl + 'Linf_mean', np.mean(d_average_linf))
                print(lbl + 'success_prob', np.mean(r_average))