def main(_):
    with tf.Session() as sess:
        K.set_session(sess)
        if FLAGS.dataset == 'MNIST':
            data, model =  MNIST(), MNISTModel("models/mnist", sess)
        elif FLAGS.datset == 'Cifar':
            data, model =  CIFAR(), CIFARModel("models/cifar", sess)


        def _model_fn(x, logits=False):
            ybar, logits_ = model.predict(x)
            if logits:
                return ybar, logits_
            return ybar

        
        if FLAGS.dataset == 'MNIST':
            x_adv = fgsm(_model_fn, x, epochs=9, eps=0.02)
        elif FLAGS.datset == 'Cifar':
            x_adv = fgsm(_model_fn, x, epochs=4, eps=0.01)

        X_adv_test = attack(x_adv, data.test_data, data.test_labels, sess)
        X_adv_train = attack(x_adv, data.train_data, data.train_labels, sess)

        np.save('adversarial_outputs/fgsm_train_' + FLAGS.dataset.lower() + '.npy', X_adv_train)
        np.save('adversarial_outputs/fgsm_test_' + FLAGS.dataset.lower() + '.npy', X_adv_test)
        print("Legit/Adversarial training set")
        model.evaluate(data.train_data, data.train_labels)
        model.evaluate(X_adv_train, data.train_labels)
        
        print("Legit/Adversarial test set")
        model.evaluate(data.test_data, data.test_labels)
        model.evaluate(X_adv_test, data.test_labels)
def main():

    data, model =  MNIST(), MNISTModel(restore="models/mnist", use_log=True)
    origImgs, origLabels, origImgID = util.generate_attack_data_set(data, model, MGR)

    delImgAT_Init = np.zeros(origImgs[0].shape)
    objfunc = ObjectiveFunc.OBJFUNC(MGR, model, origImgs, origLabels)

    MGR.Add_Parameter('eta', MGR.parSet['alpha']/origImgs[0].size)
    MGR.Log_MetaData()

    if(MGR.parSet['optimizer'] == 'ZOSVRG'):
        delImgAT = svrg.ZOSVRG(delImgAT_Init, MGR, objfunc)
    elif(MGR.parSet['optimizer'] == 'ZOSGD'):
        delImgAT = sgd.ZOSGD(delImgAT_Init, MGR, objfunc)
    else:
        print('Please specify a valid optimizer')


    for idx_ImgID in range(MGR.parSet['nFunc']):
        currentID = origImgID[idx_ImgID]
        orig_prob = model.model.predict(np.expand_dims(origImgs[idx_ImgID], axis=0))
        advImg = np.tanh(np.arctanh(origImgs[idx_ImgID]*1.9999999)+delImgAT)/2.0
        adv_prob  = model.model.predict(np.expand_dims(advImg, axis=0))

        suffix = "id{}_Orig{}_Adv{}".format(currentID, np.argmax(orig_prob), np.argmax(adv_prob))
        util.save_img(advImg, "{}/Adv_{}.png".format(MGR.parSet['save_path'], suffix))
    util.save_img(np.tanh(delImgAT)/2.0, "{}/Delta.png".format(MGR.parSet['save_path']))

    sys.stdout.flush()
    MGR.logHandler.close()
def prep_bbox(sess, x, y, X_train, Y_train, X_test, Y_test, nb_epochs,
              batch_size, learning_rate):
    """
    Define and train a model that simulates the "remote"
    black-box oracle described in the original paper.
    :param sess: the TF session
    :param x: the input placeholder for MNIST
    :param y: the ouput placeholder for MNIST
    :param X_train: the training data for the oracle
    :param Y_train: the training labels for the oracle
    :param X_test: the testing data for the oracle
    :param Y_test: the testing labels for the oracle
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :return:
    """

    # Define TF model graph (for the black-box model)
    if DATASET == "mnist":
        model = MNISTModel(use_log=True).model
    else:
        model = CIFARModel(use_log=True).model
    predictions = model(x)
    print("Defined TensorFlow model graph.")

    # Train an MNIST model
    if FLAGS.load_pretrain:
        tf_model_load(sess)
    else:
        train_params = {
            'nb_epochs': nb_epochs,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        model_train(sess,
                    x,
                    y,
                    predictions,
                    X_train,
                    Y_train,
                    verbose=True,
                    save=True,
                    args=train_params)

    # Print out the accuracy on legitimate data
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          X_test,
                          Y_test,
                          args=eval_params)
    print('Test accuracy of black-box on legitimate test '
          'examples: ' + str(accuracy))

    return model, predictions, accuracy
def run_pca(Data, num_components=10, invert=False):
    data = Data()

    sess = K.get_session()

    K.set_learning_phase(False)

    shape = (-1, 784)
    
    pca = sklearn.decomposition.PCA(n_components=num_components)

    pca.fit(data.train_data.reshape(shape)) # [:10000]

    if invert:
        model = MNISTModel("models/mnist-pca-cnn-top-"+str(num_components))
    else:
        model = make_model(num_components)
        model.load_weights("models/mnist-pca-top-"+str(num_components))
        model = Wrap(model,pca)

    tf_mean = tf.constant(pca.mean_,dtype=tf.float32)
    tf_components = tf.constant(pca.components_.T,dtype=tf.float32)

    def new_predict(xs):
        # map to PCA space
        xs = tf.reshape(xs,(-1,784))
        xs -= tf_mean
        xs = tf.matmul(xs, tf_components)
    
        # map back
        xs = tf.matmul(xs, tf.transpose(tf_components))
        xs += tf_mean
        xs = tf.reshape(xs, (-1, 28, 28, 1))
        return model.model(xs)

    if invert:
        model.predict = new_predict

    attack = CarliniL2(sess, model, batch_size=100, max_iterations=3000, 
                       binary_search_steps=6, targeted=False,
                       initial_const=1)

    N = 100

    test_adv = attack.attack(data.test_data[:N], data.test_labels[:N])

    print('accuracy',np.mean(np.argmax(sess.run(model.predict(tf.constant(data.test_data,dtype=np.float32))),axis=1)==np.argmax(data.test_labels,axis=1)))

    print(list(test_adv[0].flatten()))

    print('dist',np.mean(np.sum((test_adv-data.test_data[:N])**2,axis=(1,2,3))**.5))

    it = np.argmax(sess.run(model.predict(tf.constant(test_adv))),axis=1)
    print('success',np.mean(it==np.argmax(data.test_labels,axis=1)[:N]))
def compare_baseline():
    data = MNIST()
    model = MNISTModel("models/mnist")
    sess = K.get_session()

    attack = CarliniL2(sess, model, batch_size=100, max_iterations=3000, 
                       binary_search_steps=4, targeted=False,
                       initial_const=10)

    N = 100
    test_adv = attack.attack(data.test_data[:N], data.test_labels[:N])
    print('dist',np.mean(np.sum((test_adv-data.test_data[:N])**2,axis=(1,2,3))**.5))
def train(data, file_name, components=100, num_epochs=20, batch_size=256, pca=None, invert=False):
    """
    Standard neural network training procedure.
    """

    shape = (-1, data.train_data.shape[1]*data.train_data.shape[2]*data.train_data.shape[3])

    train_data = pca.transform(data.train_data.reshape(shape))[:,:components]
    validation_data = pca.transform(data.validation_data.reshape(shape))[:,:components]
    test_data = pca.transform(data.test_data.reshape(shape))[:,:components]

    print(train_data.shape)

    if invert:
        train_data = pca.inverse_transform(train_data).reshape((-1, 28, 28, 1))
        validation_data = pca.inverse_transform(validation_data).reshape((-1, 28, 28, 1))
        test_data = pca.inverse_transform(test_data).reshape((-1, 28, 28, 1))
        
        model = MNISTModel(None).model
    else:
        model = make_model(components)
    
    def fn(correct, predicted):
        return tf.nn.softmax_cross_entropy_with_logits(labels=correct,
                                                       logits=predicted)

    #sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    
    model.compile(loss=fn,
                  optimizer='adam',
                  metrics=['accuracy'])
    
    model.fit(train_data, data.train_labels,
              batch_size=batch_size,
              validation_data=(validation_data, data.validation_labels),
              nb_epoch=num_epochs,
              shuffle=True)

    acc = np.mean(np.argmax(model.predict(test_data),axis=1)==np.argmax(data.test_labels,axis=1))
    print("Overall accuracy on test set:", acc)

    if file_name != None:
        model.save(file_name)

    return model
Example #7
0
def main(args):
    with tf.Session() as sess:
        print("Loading data and classification model: {}".format(
            args["dataset"]))
        if args['dataset'] == "mnist":
            data, model = MNIST(), MNISTModel("models/mnist",
                                              sess,
                                              use_softmax=True)
        elif args['dataset'] == "cifar10":
            data, model = CIFAR(), CIFARModel("models/cifar",
                                              sess,
                                              use_softmax=True)
        elif args['dataset'] == "imagenet":
            # data, model = ImageNet(data_path=args["imagenet_dir"], targetFile=args["attack_single_img"]), InceptionModel(sess, use_softmax=True)
            data, model = ImageNetDataNP(), InceptionModel(sess,
                                                           use_softmax=True)
        # elif args['dataset'] == "imagenet_np":

        if len(data.test_labels) < args["num_img"]:
            raise Exception("No enough data, only have {} but need {}".format(
                len(data.test_labels), args["num_img"]))

        if args["attack_single_img"]:
            # manually setup attack set
            # attacking only one image with random attack]
            orig_img = data.test_data
            orig_labels = data.test_labels
            orig_img_id = np.array([1])

            if args["attack_type"] == "targeted":
                target_labels = [
                    np.eye(model.num_labels)[args["single_img_target_label"]]
                ]
            else:
                target_labels = orig_labels
        else:
            # generate attack set
            if args["dataset"] == "imagenet" or args[
                    "dataset"] == "imagenet_np":
                shift_index = True
            else:
                shift_index = False

        if args["random_target"] and (args["dataset"] == "imagenet"
                                      or args["dataset"] == "imagenet_np"):
            # find all possible class
            all_class = np.unique(np.argmax(data.test_labels, 1))
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                random_target_class=all_class,
                shift_index=shift_index)
        elif args["random_target"]:
            # random target on all possible classes
            class_num = data.test_labels.shape[1]
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                random_target_class=list(range(class_num)),
                shift_index=shift_index)
        else:
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                shift_index=shift_index)

            # check attack data
        # for i in range(len(orig_img_id)):
        #     tar_lab = np.argmax(target_labels[i])
        #     orig_lab = np.argmax(orig_labels[i])
        #     print("{}:, target label:{}, orig_label:{}, orig_img_id:{}".format(i, tar_lab, orig_lab, orig_img_id[i]))

        # attack related settings
        if args["attack_method"] == "zoo" or args[
                "attack_method"] == "autozoom_bilin":
            if args["img_resize"] is None:
                args["img_resize"] = model.image_size
                print(
                    "Argument img_resize is not set and not using autoencoder, set to image original size:{}"
                    .format(args["img_resize"]))

        if args["attack_method"] == "zoo" or args["attack_method"] == "zoo_ae":
            if args["batch_size"] is None:
                args["batch_size"] = 128
                print(
                    "Using zoo or zoo_ae attack, and batch_size is not set.\nSet batch_size to {}."
                    .format(args["batch_size"]))

        else:
            if args["batch_size"] is not None:
                print("Argument batch_size is not used")
                args["batch_size"] = 1  # force to be 1

        if args["attack_method"] == "zoo_ae" or args[
                "attack_method"] == "autozoom_ae":
            #_, decoder = util.load_codec(args["codec_prefix"])
            if args["dataset"] == "mnist" or args["dataset"] == "cifar10":
                codec = CODEC(model.image_size,
                              model.num_channels,
                              args["compress_mode"],
                              use_tanh=False)
            else:
                codec = CODEC(128, model.num_channels, args["compress_mode"])
            print(args["codec_prefix"])
            codec.load_codec(args["codec_prefix"])
            decoder = codec.decoder
            print(decoder.input_shape)
            args["img_resize"] = decoder.input_shape[1]
            print("Using autoencoder, set the attack image size to:{}".format(
                args["img_resize"]))

        # setup attack
        if args["attack_method"] == "zoo":
            blackbox_attack = ZOO(sess, model, args)
        elif args["attack_method"] == "zoo_ae":
            blackbox_attack = ZOO_AE(sess, model, args, decoder)
        elif args["attack_method"] == "autozoom_bilin":
            blackbox_attack = AutoZOOM_BiLIN(sess, model, args)
        elif args["attack_method"] == "autozoom_ae":
            blackbox_attack = AutoZOOM_AE(sess, model, args, decoder, codec)

        save_prefix = os.path.join(args["save_path"], args["dataset"],
                                   args["attack_method"], args["attack_type"])

        os.system("mkdir -p {}".format(save_prefix))

        total_success = 0
        l2_total = 0

        for i in range(all_orig_img_id.size):
            orig_img = all_orig_img[i:i + 1]
            target = all_target_labels[i:i + 1]
            label = all_orig_labels[i:i + 1]

            target_class = np.argmax(target)
            true_class = np.argmax(label)
            test_index = all_orig_img_id[i]

            # print information
            print(
                "[Info][Start]{}: test_index:{}, true label:{}, target label:{}"
                .format(i, test_index, true_class, target_class))
            if args["attack_method"] == "zoo_ae" or args[
                    "attack_method"] == "autozoom_ae":
                #print ae info
                if args["dataset"] == "mnist" or args["dataset"] == "cifar10":
                    temp_img = all_orig_img[i:i + 1]
                else:
                    temp_img = all_orig_img[i]
                    temp_img = (temp_img + 0.5) * 255
                    temp_img = scipy.misc.imresize(temp_img, (128, 128))
                    temp_img = temp_img / 255 - 0.5
                    temp_img = np.expand_dims(temp_img, axis=0)
                encode_img = codec.encoder.predict(temp_img)
                decode_img = codec.decoder.predict(encode_img)
                diff_img = (decode_img - temp_img)
                diff_mse = np.mean(diff_img.reshape(-1)**2)
                print("[Info][AE] MSE:{:.4f}".format(diff_mse))

            timestart = time.time()
            adv_img = blackbox_attack.attack(orig_img, target)
            timeend = time.time()

            if len(adv_img.shape) == 3:
                adv_img = np.expand_dims(adv_img, axis=0)

            l2_dist = np.sum((adv_img - orig_img)**2)**.5
            adv_class = np.argmax(model.model.predict(adv_img))

            success = False
            if args["attack_type"] == "targeted":
                if adv_class == target_class:
                    success = True
            else:
                if adv_class != true_class:
                    success = True

            if success:
                total_success += 1
                l2_total += l2_dist

            print(
                "[Info][End]{}: test_index:{}, true label:{}, adv label:{}, success:{}, distortion:{:.5f}, success_rate:{:.4f}, l2_avg:{:.4f}"
                .format(i, test_index, true_class, adv_class, success, l2_dist,
                        total_success / (i + 1),
                        0 if total_success == 0 else l2_total / total_success))

            # save images
            suffix = "id{}_testIndex{}_true{}_adv{}".format(
                i, test_index, true_class, adv_class)
            # original image
            save_name = os.path.join(save_prefix, "Orig_{}.png".format(suffix))
            util.save_img(orig_img, save_name)
            save_name = os.path.join(save_prefix, "Orig_{}.npy".format(suffix))
            np.save(save_name, orig_img)

            # adv image
            save_name = os.path.join(save_prefix, "Adv_{}.png".format(suffix))
            util.save_img(adv_img, save_name)
            save_name = os.path.join(save_prefix, "Adv_{}.npy".format(suffix))
            np.save(save_name, adv_img)

            # diff image
            save_name = os.path.join(save_prefix, "Diff_{}.png".format(suffix))
            util.save_img((adv_img - orig_img) / 2, save_name)
            save_name = os.path.join(save_prefix, "Diff_{}.npy".format(suffix))
            np.save(save_name, adv_img - orig_img)
Example #8
0
def main():
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    train_data = mnist.train.images * 2.0 - 1.0
    train_label = mnist.train.labels

    test_data = mnist.test.images * 2.0 - 1.0
    test_label = mnist.test.labels

    x_dim = train_data.shape[1]
    y_dim = train_label.shape[1]

    opt = opts.parse_opt()
    batch_size = opt.batch_size

    # Changing the options here.
    opt.input_data = "MNIST"
    opt.input_c_dim = 1
    opt.output_c_dim = 1
    opt.input_dim = x_dim
    opt.label_dim = y_dim
    # Running arguments
    opt.c = 1.
    opt.ld = 500.
    opt.H_lambda = 10.
    opt.cgan_flag = True
    opt.patch_flag = True
    opt.G_lambda = 10.
    opt.s_l = 0
    opt.t_l = 1

    # batch_size = opt.batch_size

    # Runnign a session, to load the saved model.
    with tf.Session() as sess:
        model_store = opt.model_restore
        print 'MNIST model is stored at %s' % model_store
        whitebox_model = MNISTModel(model_store)
        #initial ADVGAN
        model = advGAN(whitebox_model, model_store, opt, sess)

        best_model_path = './GAN/save/best.ckpt'
        print 'advGAN is stored at %s' % best_model_path
        model.load(best_model_path)

        # tvars = tf.trainable_variables()
        # tvars_vals = sess.run(tvars)

        # for var, val in zip(tvars, tvars_vals):
        #     if 'generator' not in var.name:
        #         continue
        #     print(var.name, val.shape)  # Prints the name of the variable alongside its value.

        # We have to load a batch of images, then create the fake ones.
        # They should be identical.
        num_images = 10
        images = train_data[:num_images]
        fake_images = sess.run([model.fake_images_sample],
                               {model.source: images})

        plt.imshow(np.reshape(fake_images[0], [28, 28]))
        plt.show()
Example #9
0
def train():
    flatten_flag = True  # flatten output of G or not?
    opt = opts.parse_opt()
    opt.input_data = "MNIST"
    # mapping [0,1] -> [-1,1]
    # load data
    # mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    # train_data = mnist.train.images * 2.0 - 1.0
    # train_label = mnist.train.labels

    # test_data = mnist.test.images * 2.0 - 1.0
    # test_label = mnist.test.labels

    loaded = np.load('MNIST_data/B.npz')
    train_data, train_label, test_data, test_label = \
        loaded['train_data'], loaded['train_label'], \
        loaded['test_data'], loaded['test_label']

    # We create the label clues here.
    if opt.cgan_gen is True:
        label_clue = np.zeros((train_label.shape[1], opt.img_dim, opt.img_dim,
                               train_label.shape[1]))
        for lbl in range(train_label.shape[1]):
            label_clue[lbl, :, :, lbl] = 1

    if opt.cgan_gen:
        output_samples, output_labels = output_sample(test_data, test_label,
                                                      True)
    else:
        output_samples = output_sample(test_data, test_label)
    print output_samples.shape

    print 'Shape of data:'
    print '\tTraining data: ' + str(train_data.shape)
    print '\tTraining label: ' + str(train_label.shape)
    print '\tTest data: ' + str(test_data.shape)
    print '\tTest label: ' + str(test_label.shape)

    x_dim = train_data.shape[1]
    y_dim = train_label.shape[1]

    opt.input_c_dim = 1
    opt.output_c_dim = 1
    opt.input_dim = x_dim
    opt.label_dim = y_dim

    batch_size = opt.batch_size

    NUM_THREADS = 2
    tf_config = tf.ConfigProto()
    tf_config.intra_op_parallelism_threads = NUM_THREADS
    tf_config.gpu_options.allow_growth = True

    with tf.Session(config=tf_config) as sess:
        # Initialize the variables, and restore the variables form checkpoint if there is.
        # and initialize the writer
        global_step = 0

        print '\tRetrieving evil model from "%s"' % opt.evil_model_path
        evil_model = MNISTModel(opt.evil_model_path)
        print '\tRetrieving good model from "%s"' % opt.good_model_path
        good_model = OddEvenMNIST(opt.good_model_path)
        # model = advGAN(whitebox_model, model_store, opt, sess)
        model = advGAN(good_model, evil_model, opt, sess)

        min_adv_accuracy = 10e10
        max_accuracy_diff = -np.inf

        # summary_dir = "logs/MNIST/g_%d_ld_%d_gl_%d_L2_%.2f_dn_%d" % (
        #     opt.G_lambda, opt.ld, opt.good_loss_coeff,
        #     opt.L2_lambda, opt.d_train_num)

        summary_dir = "logs/MNIST/dn_%d_gn_%d" % (opt.d_train_num,
                                                  opt.g_train_num)

        duplicate_num = 0
        while os.path.isdir(summary_dir + '_' + str(duplicate_num) + '/'):
            duplicate_num += 1
        summary_dir += '_' + str(duplicate_num) + '/'
        print 'Creating directory %s for logs.' % summary_dir
        os.mkdir(summary_dir)

        writer = tf.summary.FileWriter(summary_dir, sess.graph)
        loader = Dataset2(train_data, train_label)
        print 'Training data loaded.'

        best_evil_accuracy = -1.0
        best_res_epoch = -1
        best_res = None
        for epoch_num in range(opt.max_epoch):
            print 'Epoch %d' % epoch_num

            # Randomly shuffle the data.
            random_indices = np.arange(train_data.shape[0])
            np.random.shuffle(random_indices)
            train_data = train_data[random_indices, :]
            train_label = train_label[random_indices, :]

            real_buckets = []
            for lbl in range(train_label.shape[1]):
                real_buckets.append(np.where(train_label[:, lbl] == 1)[0])

            # Mini-batch Gradient Descent.
            batch_no = 0
            while (batch_no * batch_size) < train_data.shape[0]:
                head = batch_no * batch_size
                if head + batch_size <= train_data.shape[0]:
                    tail = head + batch_size
                else:
                    tail = train_data.shape[0]
                    head = train_data.shape[0] - batch_size

                feed_data = train_data[head:tail, :]
                evil_labels = train_label[head:tail, :]
                good_labels = odd_even_labels(evil_labels)

                # Finding randomly sampled real data.
                real_data = np.zeros_like(feed_data)
                # Indices of training batch with specific label.
                # label_indices[i] = indices of feed data, that have evil_label[i] == 1.
                label_indices = [np.where(evil_labels[:, lbl] == 1)[0] \
                    for lbl in range(evil_labels.shape[1])]

                for lbl in range(evil_labels.shape[1]):
                    # We take a random sample of size |label_indices[lbl]|
                    # from the real bucket of `lbl`.
                    selected_real_data = np.random.choice(
                        real_buckets[lbl], label_indices[lbl].shape[0])

                    # We put this random sample in the same index of their
                    # corresponding batch training data.
                    real_data[label_indices[lbl], :] = train_data[
                        selected_real_data, :]

                feed = {
                    model.source: feed_data,
                    model.target: real_data,
                    model.good_labels: good_labels,
                    model.evil_labels: evil_labels
                }

                # Train G.
                for _ in range(opt.g_train_num):
                    summary_str, G_loss, gan_loss, hinge_loss, l1_loss, l2_loss, \
                        good_fn_loss, evil_fn_loss, adv_loss, total_loss, _ = sess.run([
                            model.total_loss_merge_sum,
                            model.g_loss,
                            model.gan_loss,
                            model.hinge_loss,
                            model.l1_loss,
                            model.l2_loss,
                            model.good_fn_loss,
                            model.evil_fn_loss,
                            model.adv_loss,
                            model.total_loss,
                            model.G_train_op], feed)
                    writer.add_summary(summary_str, global_step)

                # Train D.
                for _ in range(opt.d_train_num):
                    summary_str, D_loss, _ = sess.run([
                        model.total_loss_merge_sum, model.d_loss,
                        model.D_pre_train_op
                    ], feed)
                    writer.add_summary(summary_str, global_step)

                global_step += 1
                batch_no += 1

            # Validation after each trainig epoch.
            print '\tD: %.4f, G: %.4f\n\thinge(%.1f): %.4f, L1(%.1f): %.4f, L2(%.1f): %.4f' % (
                D_loss, G_loss, opt.H_lambda, hinge_loss, opt.L1_lambda,
                l1_loss, opt.L2_lambda, l2_loss)
            print '\t\tGAN total loss: %.4f' % gan_loss
            print '\tGood: %.4f, Evil: %.4f' % (good_fn_loss, evil_fn_loss)
            print '\tAdv: %.4f, Total: %.4f' % (adv_loss, total_loss)

            new_pred_data = []
            head = 0
            last_batch = False
            while head < test_data.shape[0]:
                if head + batch_size <= test_data.shape[0]:
                    tail = head + batch_size
                else:
                    tail = test_data.shape[0]
                    head = test_data.shape[0] - batch_size
                    last_batch = True
                if opt.cgan_gen:
                    cur_data = sess.run(
                        model.fake_images_sample,
                        {model.evil_labels: test_label[head:tail, :]})
                else:
                    cur_data = sess.run(
                        model.fake_images_sample,
                        {model.source: test_data[head:tail, :]})

                if last_batch:
                    new_pred_data.append(
                        cur_data[-(test_data.shape[0] % batch_size):, :])
                else:
                    new_pred_data.append(cur_data)
                head += batch_size
            new_pred_data = np.concatenate(new_pred_data)

            good_pred = np.argmax(
                model.good_model.model.predict(new_pred_data), axis=1)
            evil_pred = np.argmax(
                model.evil_model.model.predict(new_pred_data), axis=1)
            evil_true = np.argmax(test_label, axis=1)
            good_true = np.argmax(odd_even_labels(test_label), axis=1)

            good_accuracy = accuracy_score(good_true, good_pred)
            evil_accuracy = accuracy_score(evil_true, evil_pred)
            total_good_confusion = confusion_matrix(good_true, good_pred)
            total_evil_confusion = confusion_matrix(evil_true,
                                                    evil_pred,
                                                    labels=range(
                                                        opt.evil_label_num))

            print '\tGood Accuracy: %.4f, Evil Accuracy: %.4f' % (
                good_accuracy, evil_accuracy)
            print '\tAccuracy diff: %f' % (good_accuracy - evil_accuracy)
            print 'Good confusion matrix:'
            print total_good_confusion
            print 'Evil confusion matrix:'
            print total_evil_confusion

            # Creating snapshots to save.
            if opt.cgan_gen:
                fake_samples = sess.run(model.fake_images_sample,
                                        {model.evil_labels: output_labels})
            else:
                fake_samples, fake_noise = sess.run(
                    [model.fake_images_sample, model.sample_noise],
                    {model.source: output_samples})
            max_accuracy_diff = good_accuracy - evil_accuracy

            fakes = merge(fake_samples[:100, :], [10, 10])
            separator = np.ones((280, 2))
            original = merge(output_samples[:100].reshape(-1, 28, 28, 1),
                             [10, 10])

            if opt.cgan_gen:
                scipy.misc.imsave(
                    'snapshot_%d.png' % epoch_num,
                    np.concatenate([fakes, separator, original], axis=1))
            else:
                noise = merge(fake_noise[:100], [10, 10])
                scipy.misc.imsave(
                    'snapshot_%d.png' % epoch_num,
                    np.concatenate([fakes, noise, original], axis=1))

            # Only for the purpose of finding best D and G training times.
            if evil_accuracy > best_evil_accuracy:
                best_evil_accuracy = evil_accuracy
                best_res_epoch = epoch_num
                if opt.cgan_gen:
                    best_res = np.concatenate([fakes, separator, original],
                                              axis=1)
                else:
                    best_res = np.concatenate([fakes, noise, original], axis=1)

        best_image_path = 'best_dn_%d_gn_%d_%d_epoch_%d.png' % \
            (opt.d_train_num, opt.g_train_num, duplicate_num, best_res_epoch)
        scipy.misc.imsave(best_image_path, best_res)

        # print 'Maximum iterations: %d' % opt.max_iteration
        # while iteration < opt.max_iteration:
        #     # this function returns (data, label, np.array(target)).
        #     # data = loader.next_batch(batch_size, negative=False)
        #     feed_data, evil_labels, real_data = loader.next_batch(
        #         batch_size, negative=False)
        #     good_labels = odd_even_labels(evil_labels)

        #     feed = {
        #         model.source: feed_data,
        #         model.target: real_data,
        #         model.good_labels: good_labels,
        #         model.evil_labels: evil_labels
        #     }

        #     # if opt.cgan_gen:
        #     #     feed[model.label_clue] = label_clue[evil_labels.argmax(axis=1)]

        #     # Training G once.
        #     # summary_str, G_loss, _ = sess.run(
        #     #     [model.total_loss_merge_sum, model.g_loss, model.G_train_op], feed)
        #     # writer.add_summary(summary_str, iteration)

        #     # Training G twice.
        #     summary_str, G_loss, gan_loss, hinge_loss, l1_loss, l2_loss, \
        #         good_fn_loss, evil_fn_loss, adv_loss, total_loss, _ = sess.run([
        #             model.total_loss_merge_sum,
        #             model.g_loss,
        #             model.gan_loss,
        #             model.hinge_loss,
        #             model.l1_loss,
        #             model.l2_loss,
        #             model.good_fn_loss,
        #             model.evil_fn_loss,
        #             model.adv_loss,
        #             model.total_loss,
        #             model.G_train_op], feed)
        #     writer.add_summary(summary_str, iteration)

        #     # Training D.
        #     for _ in range(opt.d_train_num):
        #         summary_str, D_loss, _ = sess.run(
        #             [model.total_loss_merge_sum, model.d_loss, model.D_pre_train_op], feed)
        #         writer.add_summary(summary_str, iteration)

        #     if iteration % opt.losses_log_every == 0:

        #     # if iteration != 0 and iteration % opt.save_checkpoint_every == 0:
        #         # checkpoint_path = os.path.join(opt.checkpoint_path, 'checkpoint.ckpt')
        #         # print 'Saving the model in "%s"' % checkpoint_path

        #         # model.saver.save(sess, checkpoint_path, global_step=iteration)
        #         # test_loader = Dataset2(test_data, test_label)

        #         # test_num = test_loader._num_examples
        #         # test_iter_num = (test_num - batch_size) / batch_size

        #         # total_evil_accuracy = 0.0
        #         # total_good_accuracy = 0.0

        #         # fake_samples = [[] for _ in range(test_loader._num_labels)]
        #         # fake_noise = [[] for _ in range(test_loader._num_labels)]
        #         # original_samples = [[] for _ in range(test_loader._num_labels)]

        #         # for _ in range(test_iter_num):

        #         #     # Loading the next batch of test images
        #         #     test_input_data, test_evil_labels, _ = \
        #         #         test_loader.next_batch(batch_size)
        #         #     evil_categorical_labels = np.argmax(test_evil_labels, axis=1)
        #         #     test_good_labels = odd_even_labels(test_evil_labels)
        #         #     feed = {
        #         #         model.source: test_input_data,
        #         #         model.evil_labels: test_evil_labels,
        #         #         model.good_labels: test_good_labels
        #         #     }

        #         #     # if opt.cgan_gen:
        #         #     #     feed[model.label_clue] = label_clue[test_evil_labels.argmax(axis=1)]

        #         #     evil_accuracy, good_accuracy = sess.run(
        #         #         [model.evil_accuracy, model.good_accuracy], feed)
        #         #     # We divide the total accuracy by the number of test iterations.
        #         #     total_good_accuracy += good_accuracy
        #         #     total_evil_accuracy += evil_accuracy
        #         #     # print 'Evil accuracy: %.6f\tGood accuracy: %.6f' % (
        #         #     #     evil_accuracy, good_accuracy)
        #         #     # test_accuracy, test_adv_accuracy = sess.run(
        #         #     #     [model.accuracy, model.adv_accuracy], feed)
        #         #     # test_acc += test_accuracy
        #         #     # test_adv_acc += test_adv_accuracy

        #         #     # fake_images, g_x = sess.run(
        #         #     #     [model.fake_images_sample, model.sample_noise],
        #         #     #     {model.source: test_input_data})

        #         #     # for lbl in range(test_loader._num_labels):
        #         #     #     if len(fake_samples[lbl]) < 10:
        #         #     #         idx = np.where(evil_categorical_labels == lbl)[0]
        #         #     #         if idx.shape[0] >= 10:
        #         #     #             fake_samples[lbl] = fake_images[idx[:10]]
        #         #     #             fake_noise[lbl] = g_x[idx[:10]]
        #         #     #             original_samples[lbl] = test_input_data[idx[:10]]

        #         #     # for lbl, sample, noise in zip(test_evil_labels, fake_images, fake_noise):
        #         #     #     if len(fake_samples[lbl]) > 10:
        #         #     #         continue
        #         #     #     fake_samples[lbl].append(sample)
        #         #     #     fake_noise[lbl].append(noise)

        #         #     # pdb.set_trace()
        #         #     # print fake_images.shape

        #         #     # Finding those predicted labels that are equal to the target label
        #         #     # idxs = np.where(out_predict_labels == target_label)[0]
        #         #     # save_images(samples[:100], [10, 10], 'CIFAR10/result2/test_' + str(source_idx) + str(target_idx)+  '_.png')
        #         #     # pdb.set_trace()
        #         #     # show_samples.append(samples)
        #         #     # input_samples.append(s_imgs)
        #         #     # save_samples.append(samples)
        #         #     # if opt.is_advGAN:
        #         #     #     save_samples.append(samples[idxs])
        #         #     # else:
        #         #         # We add all samples.
        #         # # show_samples = np.concatenate(show_samples, axis=0)
        #         # # save_samples = np.concatenate(save_samples, axis=0)
        #         # good_accuracy = total_good_accuracy / float(test_iter_num)
        #         # evil_accuracy = total_evil_accuracy / float(test_iter_num)
        #         # print '\tAccuracy diff: %f' % (good_accuracy - evil_accuracy)
        #         # print '\tGood accuracy %f, Evil accuracy %f' % (
        #         #     good_accuracy, evil_accuracy)

        #         # Resizing the samples to save them later on.
        #         # fake_samples = np.reshape(np.array(fake_samples), [100, -1])
        #         # original_samples = np.reshape(np.array(original_samples), [100, -1])
        #         # fake_noise = np.reshape(np.array(fake_noise), [100, -1])

        #         # if (good_accuracy - evil_accuracy) > max_accuracy_diff:
        #         # test_accuracy = test_acc / float(test_iter_num)
        #         # test_adv_accuracy = test_adv_acc / float(test_iter_num)
        #         # if (good_accuracy - evil_accuracy) > max_accuracy_diff:
        #         #     max_accuracy_diff = good_accuracy - evil_accuracy
        #         # if min_adv_accuracy > test_adv_accuracy:
        #         #     min_adv_accuracy = test_adv_accuracy
        #         # save_images(fake_images[:100], [10, 10], 'fake.png')
        #         # save_images(test_input_data[:100], [10, 10], 'real.png')
        #         # all_idx = np.arange(100)
        #         # odds = np.where((all_idx / 10) % 2 == 1)[0]
        #         # evens = np.where((all_idx / 10) % 2 == 0)[0]
        #         # order = np.concatenate((odds, evens))
        #         # save_images(fake_samples[order], [10, 10], 'best_images.png')
        #         # save_images(fake_noise[order], [10, 10], 'best_noise.png')
        #         # save_images(original_samples[order], [10, 10], 'best_original.png')

        #         # save_anything = True
        #         # Saving the best yet model.
        #         # best_model_path = os.path.join(opt.checkpoint_path, 'best.ckpt')
        #         # print 'Saving the best model yet at "%s"' % best_model_path
        #         # model.saver.save(sess, best_model_path)

        #         # if save_anything is False:
        #         #     # Nothing is saved. We save a version here.
        #         #     save_images(fake_samples[:100], [10, 10], 'last_images.png')
        #         #     save_images(fake_noise[:100], [10, 10], 'last_noise.png')
        #         #     save_anything = True

        #     iteration += 1

        # We can transform the training and test data given in the beginning here.
        # This is only half the actual data.
        if opt.save_data:
            # if opt.cgan_gen:
            raise NotImplementedError(
                'Saving data for CGAN_GEN is not yet implemented.')
Example #10
0
def main(args):
    with tf.Session() as sess:
        if (args['dataset'] == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess)
            handpick = False
            inception = False
        if (args['dataset'] == "cifar"):
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            handpick = True
            inception = False
        if (args['dataset'] == "imagenet"):
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess)
            handpick = True
            inception = True

        if (args['adversarial'] != "none"):
            model = MNISTModel("models/mnist_cw" + str(args['adversarial']),
                               sess)

        if (args['temp'] and args['dataset'] == 'mnist'):
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']),
                               sess)
        if (args['temp'] and args['dataset'] == 'cifar'):
            model = CIFARModel("models/cifar-distilled-" + str(args['temp']),
                               sess)

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            inception=inception,
            handpick=handpick,
            train=args['train'],
            seed=args['seed'])
        timestart = time.time()
        if (args['attack'] == 'L2'):
            attack = CarliniL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['binary_steps'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'L1'):
            attack = EADL1(sess,
                           model,
                           batch_size=args['batch_size'],
                           max_iterations=args['maxiter'],
                           confidence=args['conf'],
                           binary_search_steps=args['binary_steps'],
                           beta=args['beta'],
                           abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'EN'):
            attack = EADEN(sess,
                           model,
                           batch_size=args['batch_size'],
                           max_iterations=args['maxiter'],
                           confidence=args['conf'],
                           binary_search_steps=args['binary_steps'],
                           beta=args['beta'],
                           abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        """If untargeted, pass labels instead of targets"""
        if (args['attack'] == 'FGSM'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=np.inf,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'FGML1'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=1,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'FGML2'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=2,
                         inception=inception)
            adv = attack.attack(inputs, targets)

        if (args['attack'] == 'IFGSM'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=np.inf,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'IFGML1'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=1,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'IFGML2'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=2,
                         inception=inception)
            adv = attack.attack(inputs, targets)

        timeend = time.time()
        print("Took", timeend - timestart, "seconds to run",
              len(inputs) / args['batch_size'], "random instances.")

        if (args['train']):
            np.save('labels_train.npy', labels)
            np.save(str(args['attack']) + '_train.npy', adv)
            return

        r_best = []
        d_best_l1 = []
        d_best_l2 = []
        d_best_linf = []
        r_average = []
        d_average_l1 = []
        d_average_l2 = []
        d_average_linf = []
        r_worst = []
        d_worst_l1 = []
        d_worst_l2 = []
        d_worst_linf = []

        if (args['conf'] != 0):
            model = MNISTModel("models/mnist-distilled-100", sess)

        if (args['show']):
            if not os.path.exists(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack'])):
                os.makedirs(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack']))

        for i in range(0, len(inputs), args['batch_size']):

            pred = []
            for j in range(i, i + args['batch_size']):
                if inception:
                    pred.append(
                        np.reshape(model.model.predict(adv[j:j + 1]),
                                   (data.test_labels[0:1].shape)))
                else:
                    pred.append(model.model.predict(adv[j:j + 1]))

            dist_l1 = 1e10
            dist_l2 = 1e10
            dist_linf = 1e10
            dist_l1_index = 1e10
            dist_l2_index = 1e10
            dist_linf_index = 1e10
            for k, j in enumerate(range(i, i + args['batch_size'])):
                if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)):
                    if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1):
                        dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                        dist_l1_index = j
                    if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf):
                        dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                        dist_linf_index = j
                    if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2):
                        dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                        dist_l2_index = j
            if (dist_l1_index != 1e10):
                d_best_l2.append((np.sum(
                    (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                d_best_l1.append(
                    np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index])))
                d_best_linf.append(
                    np.amax(
                        np.abs(adv[dist_linf_index] -
                               inputs[dist_linf_index])))
                r_best.append(1)
            else:
                r_best.append(0)

            rand_int = np.random.randint(i, i + args['batch_size'])
            if inception:
                pred_r = np.reshape(
                    model.model.predict(adv[rand_int:rand_int + 1]),
                    (data.test_labels[0:1].shape))
            else:
                pred_r = model.model.predict(adv[rand_int:rand_int + 1])
            if (np.argmax(pred_r,
                          1) == np.argmax(targets[rand_int:rand_int + 1], 1)):
                r_average.append(1)
                d_average_l2.append(
                    np.sum((adv[rand_int] - inputs[rand_int])**2)**.5)
                d_average_l1.append(
                    np.sum(np.abs(adv[rand_int] - inputs[rand_int])))
                d_average_linf.append(
                    np.amax(np.abs(adv[rand_int] - inputs[rand_int])))

            else:
                r_average.append(0)

            dist_l1 = 0
            dist_l1_index = 1e10
            dist_linf = 0
            dist_linf_index = 1e10
            dist_l2 = 0
            dist_l2_index = 1e10
            for k, j in enumerate(range(i, i + args['batch_size'])):
                if (np.argmax(pred[k], 1) != np.argmax(targets[j:j + 1], 1)):
                    r_worst.append(0)
                    dist_l1_index = 1e10
                    dist_l2_index = 1e10
                    dist_linf_index = 1e10
                    break
                else:
                    if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1):
                        dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                        dist_l1_index = j
                    if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf):
                        dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                        dist_linf_index = j
                    if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2):
                        dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                        dist_l2_index = j
            if (dist_l1_index != 1e10):
                d_worst_l2.append((np.sum(
                    (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                d_worst_l1.append(
                    np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index])))
                d_worst_linf.append(
                    np.amax(
                        np.abs(adv[dist_linf_index] -
                               inputs[dist_linf_index])))
                r_worst.append(1)

            if (args['show']):
                for j in range(i, i + args['batch_size']):
                    target_id = np.argmax(targets[j:j + 1], 1)
                    label_id = np.argmax(labels[j:j + 1], 1)
                    prev_id = np.argmax(
                        np.reshape(model.model.predict(inputs[j:j + 1]),
                                   (data.test_labels[0:1].shape)), 1)
                    adv_id = np.argmax(
                        np.reshape(model.model.predict(adv[j:j + 1]),
                                   (data.test_labels[0:1].shape)), 1)
                    suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format(
                        true_ids[i], target_id, label_id, prev_id,
                        adv_id, adv_id == target_id,
                        np.sum(np.abs(adv[j] - inputs[j])),
                        np.sum((adv[j] - inputs[j])**2)**.5,
                        np.amax(np.abs(adv[j] - inputs[j])))

                    show(
                        inputs[j:j + 1],
                        str(args['save']) + "/" + str(args['dataset']) + "/" +
                        str(args['attack']) +
                        "/original_{}.png".format(suffix))
                    show(
                        adv[j:j + 1],
                        str(args['save']) + "/" + str(args['dataset']) + "/" +
                        str(args['attack']) +
                        "/adversarial_{}.png".format(suffix))

        print('best_case_L1_mean', np.mean(d_best_l1))
        print('best_case_L2_mean', np.mean(d_best_l2))
        print('best_case_Linf_mean', np.mean(d_best_linf))
        print('best_case_prob', np.mean(r_best))
        print('average_case_L1_mean', np.mean(d_average_l1))
        print('average_case_L2_mean', np.mean(d_average_l2))
        print('average_case_Linf_mean', np.mean(d_average_linf))
        print('average_case_prob', np.mean(r_average))
        print('worst_case_L1_mean', np.mean(d_worst_l1))
        print('worst_case_L2_mean', np.mean(d_worst_l2))
        print('worst_case_Linf_mean', np.mean(d_worst_linf))
        print('worst_case_prob', np.mean(r_worst))
    def load_model(self,
                   dataset="mnist",
                   model_name="2-layer",
                   activation="relu",
                   model=None,
                   batch_size=0,
                   compute_slope=False,
                   order=1):
        """
        model: if set to None, then load dataset with model_name. Otherwise use the model directly.
        dataset: mnist, cifar and imagenet. recommend to use mnist and cifar as a starting point.
        model_name: possible options are 2-layer, distilled, and normal
        """
        from setup_cifar import CIFAR, CIFARModel, TwoLayerCIFARModel
        from setup_mnist import MNIST, MNISTModel, TwoLayerMNISTModel
        from nlayer_model import NLayerModel
        from setup_imagenet import ImageNet, ImageNetModel

        # if set this to true, we will use the logit layer output instead of probability
        # the logit layer's gradients are usually larger and more stable
        output_logits = True
        self.dataset = dataset
        self.model_name = model_name

        if model is None:
            print('Loading model...')
            if dataset == "mnist":
                self.batch_size = 1024
                if model_name == "2-layer":
                    model = TwoLayerMNISTModel("models/mnist_2layer",
                                               self.sess, not output_logits)
                elif model_name == "normal":
                    if activation == "relu":
                        model = MNISTModel("models/mnist", self.sess,
                                           not output_logits)
                    else:
                        print("actviation = {}".format(activation))
                        model = MNISTModel("models/mnist_cnn_7layer_" +
                                           activation,
                                           self.sess,
                                           not output_logits,
                                           activation=activation)
                        time.sleep(5)

                elif model_name == "brelu":
                    model = MNISTModel("models/mnist_brelu",
                                       self.sess,
                                       not output_logits,
                                       use_brelu=True)
                elif model_name == "distilled":
                    model = MNISTModel("models/mnist-distilled-100", self.sess,
                                       not output_logits)
                else:
                    # specify model parameters as N,M,opts
                    model_params = model_name.split(",")
                    if len(model_params) < 3:
                        raise (RuntimeError("incorrect model option" +
                                            model_name))
                    numlayer = int(model_params[0])
                    nhidden = int(model_params[1])
                    modelfile = "models/mnist_{}layer_relu_{}_{}".format(
                        numlayer, nhidden, model_params[2])
                    print("loading", modelfile)
                    model = NLayerModel([nhidden] * (numlayer - 1), modelfile)
            elif dataset == "cifar":
                self.batch_size = 1024
                if model_name == "2-layer":
                    model = TwoLayerCIFARModel("models/cifar_2layer",
                                               self.sess, not output_logits)
                elif model_name == "normal":
                    if activation == "relu":
                        model = CIFARModel("models/cifar", self.sess,
                                           not output_logits)
                    else:
                        model = CIFARModel("models/cifar_cnn_7layer_" +
                                           activation,
                                           self.sess,
                                           not output_logits,
                                           activation=activation)
                elif model_name == "brelu":
                    model = CIFARModel("models/cifar_brelu",
                                       self.sess,
                                       not output_logits,
                                       use_brelu=True)
                elif model_name == "distilled":
                    model = CIFARModel("models/cifar-distilled-100", self.sess,
                                       not output_logits)
                else:
                    # specify model parameters as N,M,opts
                    model_params = model_name.split(",")
                    if len(model_params) < 3:
                        raise (RuntimeError("incorrect model option" +
                                            model_name))
                    numlayer = int(model_params[0])
                    nhidden = int(model_params[1])
                    modelfile = "models/cifar_{}layer_relu_{}_{}".format(
                        numlayer, nhidden, model_params[2])
                    print("loading", modelfile)
                    model = NLayerModel([nhidden] * (numlayer - 1),
                                        modelfile,
                                        image_size=32,
                                        image_channel=3)
            elif dataset == "imagenet":
                self.batch_size = 32
                model = ImageNetModel(self.sess,
                                      use_softmax=not output_logits,
                                      model_name=model_name,
                                      create_prediction=False)
            else:
                raise (RuntimeError("dataset unknown"))

        #print("*** Loaded model successfully")

        self.model = model
        self.compute_slope = compute_slope
        if batch_size != 0:
            self.batch_size = batch_size

        ## placeholders: self.img, self.true_label, self.target_label
        # img is the placeholder for image input
        self.img = tf.placeholder(shape=[
            None, model.image_size, model.image_size, model.num_channels
        ],
                                  dtype=tf.float32)
        # output is the output tensor of the entire network
        self.output = model.predict(self.img)
        # create the graph to compute gradient
        # get the desired true label and target label
        self.true_label = tf.placeholder(dtype=tf.int32, shape=[])
        self.target_label = tf.placeholder(dtype=tf.int32, shape=[])
        true_output = self.output[:, self.true_label]
        target_output = self.output[:, self.target_label]
        # get the difference
        self.objective = true_output - target_output
        # get the gradient(deprecated arguments)
        self.grad_op = tf.gradients(self.objective, self.img)[0]
        # compute gradient norm: (in computation graph, so is faster)
        grad_op_rs = tf.reshape(self.grad_op, (tf.shape(self.grad_op)[0], -1))
        self.grad_2_norm_op = tf.norm(grad_op_rs, axis=1)
        self.grad_1_norm_op = tf.norm(grad_op_rs, ord=1, axis=1)
        self.grad_inf_norm_op = tf.norm(grad_op_rs, ord=np.inf, axis=1)

        ### Lily: added Hessian-vector product calculation here for 2nd order bound:
        if order == 2:
            ## _hessian_vector_product(ys, xs, v): return a list of tensors containing the product between the Hessian and v
            ## ys: a scalar valur or a tensor or a list of tensors to be summed to yield of scalar
            ## xs: a list of tensors that we should construct the Hessian over
            ## v: a list of tensors with the same shape as xs that we want to multiply by the Hessian
            # self.randv: shape = (Nimg,28,28,1) (the v in _hessian_vector_product)
            self.randv = tf.placeholder(shape=[
                None, model.image_size, model.image_size, model.num_channels
            ],
                                        dtype=tf.float32)
            # hv_op_tmp: shape = (Nimg,28,28,1) for mnist, same as self.img (the xs in _hessian_vector_product)
            hv_op_tmp = gradients_impl._hessian_vector_product(
                self.objective, [self.img], [self.randv])[0]
            # hv_op_rs: reshape hv_op_tmp to hv_op_rs whose shape = (Nimg, 784) for mnist
            hv_op_rs = tf.reshape(hv_op_tmp, (tf.shape(hv_op_tmp)[0], -1))
            # self.hv_norm_op: norm of hessian vector product, keep shape = (Nimg,1) using keepdims
            self.hv_norm_op = tf.norm(hv_op_rs, axis=1, keepdims=True)
            # hv_op_rs_normalize: normalize Hv to Hv/||Hv||, shape = (Nimg, 784)
            hv_op_rs_normalize = hv_op_rs / self.hv_norm_op
            # self.hv_op: reshape hv_op_rs_normalize to shape = (Nimg,28,28,1)
            self.hv_op = tf.reshape(hv_op_rs_normalize, tf.shape(hv_op_tmp))

            ## reshape randv and compute its norm
            # shape: (Nimg, 784)
            randv_rs = tf.reshape(self.randv, (tf.shape(self.randv)[0], -1))
            # shape: (Nimg,)
            self.randv_norm_op = tf.norm(randv_rs, axis=1)
            ## compute v'Hv: use un-normalized Hv (hv_op_tmp, hv_op_rs)
            # element-wise multiplication and then sum over axis = 1 (now shape: (Nimg,))
            self.vhv_op = tf.reduce_sum(tf.multiply(randv_rs, hv_op_rs),
                                        axis=1)
            ## compute Rayleigh quotient: v'Hv/v'v (estimated largest eigenvalue), shape: (Nimg,)
            # note: self.vhv_op and self.randv_norm_op has to be in the same dimension (either (Nimg,) or (Nimg,1))
            self.eig_est = self.vhv_op / tf.square(self.randv_norm_op)

            ## Lily added the tf.while to compute the eigenvalue in computational graph later
            # cond for computing largest abs/neg eigen-value
            def cond(it, randv, eig_est, eig_est_prev, tfconst):
                norm_diff = tf.norm(eig_est - eig_est_prev, axis=0)
                return tf.logical_and(it < 500, norm_diff > 0.001)

            # compute largest abs eigenvalue: tfconst = 0
            # compute largest neg eigenvalue: tfconst = 10
            def body(it, randv, eig_est, eig_est_prev, tfconst):
                #hv_op_tmp = gradients_impl._hessian_vector_product(self.objective, [self.img], [randv])[0]-10*randv
                hv_op_tmp = gradients_impl._hessian_vector_product(
                    self.objective, [self.img], [randv])[0] - tf.multiply(
                        tfconst, randv)
                hv_op_rs = tf.reshape(hv_op_tmp, (tf.shape(hv_op_tmp)[0], -1))
                hv_norm_op = tf.norm(hv_op_rs, axis=1, keepdims=True)
                hv_op_rs_normalize = hv_op_rs / hv_norm_op
                hv_op = tf.reshape(hv_op_rs_normalize, tf.shape(hv_op_tmp))

                randv_rs = tf.reshape(randv, (tf.shape(randv)[0], -1))
                randv_norm_op = tf.norm(randv_rs, axis=1)
                vhv_op = tf.reduce_sum(tf.multiply(randv_rs, hv_op_rs), axis=1)
                eig_est_prev = eig_est
                eig_est = vhv_op / tf.square(randv_norm_op)

                return (it + 1, hv_op, eig_est, eig_est_prev, tfconst)

            it = tf.constant(0)
            # compute largest abs eigenvalue
            result = tf.while_loop(
                cond, body,
                [it, self.randv, self.vhv_op, self.eig_est,
                 tf.constant(0.0)])
            # compute largest neg eigenvalue
            self.shiftconst = tf.placeholder(shape=(), dtype=tf.float32)
            result_1 = tf.while_loop(
                cond, body,
                [it, self.randv, self.vhv_op, self.eig_est, self.shiftconst])

            # computing largest abs eig value and save result
            self.it = result[0]
            self.while_hv_op = result[1]
            self.while_eig = result[2]

            # computing largest neg eig value and save result
            self.it_1 = result_1[0]
            #self.while_eig_1 = tf.add(result_1[2], tfconst)
            self.while_eig_1 = tf.add(result_1[2], result_1[4])

            show_tensor_op = False
            if show_tensor_op:
                print("====================")
                print("Define hessian_vector_product operator: ")
                print("hv_op_tmp = {}".format(hv_op_tmp))
                print("hv_op_rs = {}".format(hv_op_rs))
                print("self.hv_norm_op = {}".format(self.hv_norm_op))
                print("hv_op_rs_normalize = {}".format(hv_op_rs_normalize))
                print("self.hv_op = {}".format(self.hv_op))
                print("self.grad_op = {}".format(self.grad_op))
                print("randv_rs = {}".format(randv_rs))
                print("self.randv_norm_op = {}".format(self.randv_norm_op))
                print("self.vhv_op = {}".format(self.vhv_op))
                print("self.eig_est = {}".format(self.eig_est))
                print("====================")

        return self.img, self.output
Example #12
0
def main(args):

    with tf.Session() as sess:
        if args['dataset'] == 'mnist':
            data, model = MNIST(), MNISTModel("models/mnist", sess)
            handpick = False
            inception = False
        if args['dataset'] == "cifar":
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            handpick = True
            inception = False
        if args['dataset'] == "imagenet":
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess)
            handpick = True
            inception = True

        if args['adversarial'] != "none":
            model = MNISTModel("models/mnist_cwl2_admm" + str(args['adversarial']), sess)

        if args['temp'] and args['dataset'] == 'mnist':
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess)
        if args['temp'] and args['dataset'] == 'cifar':
            model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess)

        inputs, targets, labels, true_ids = generate_data_ST(data, model, samples=args['numimg'],
                                                             samplesT=args['numimgT'], targeted=True,
                                        start=0, inception=inception, handpick=handpick, seed=args['seed'])
        #print(true_ids)
        if args['attack'] == 'L2C':
            attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['binary_steps'],
                               abort_early=args['abort_early'])

        if args['attack'] == 'L2LA2':
            attack = LADMML2re(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'],
                               layernum=args['layer_number'], use_kernel=args['use_kernel'],
                               confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'],
                               abort_early=args['abort_early'])


        timestart = time.time()
        adv = attack.attack(inputs, targets)
        timeend = time.time()

        print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n")

        if args['conf'] != 0:
            model = MNISTModel("models/mnist-distilled-100", sess)

        if args['kernel_bias']:
            EP = evaluate_perturbation_kb(args, sess, model, inputs)
            scores, l2 = EP(inputs, targets, adv)
            EPT = evaluate_perturbation_testset(args, sess, model, data.test_data)
            test_scores = EPT(data.test_data, data.test_labels)
            EP2 = evaluate_perturbation_kb_restore(args, sess, model, inputs)
            scores2 = EP2(inputs, targets, adv)
            EPT2 = evaluate_perturbation_testset(args, sess, model, data.test_data)
            test_scores2 = EPT2(data.test_data, data.test_labels)
        else:
            EP = evaluate_perturbation(args, sess, model, inputs)
#        scores = EP(inputs, targets, adv)
#        scores2 = EP2(inputs, targets, adv)

        score_count = []
        score_count2 = []
        score_count3 = []

        score_count4 = []
        for e, (sc) in enumerate(scores):

            if np.argmax(sc) == np.argmax(targets[e]):
                score_count.append(1)
                if e < args['numimg']:
                    score_count4.append(1)
            else:
                score_count.append(0)
                if e < args['numimg']:
                    score_count4.append(0)

        for e, (sc) in enumerate(scores):
            if np.argmax(sc) == np.argmax(labels[e]):
                score_count3.append(1)
            else:
                score_count3.append(0)

        for e, (sc2) in enumerate(scores2):
            if np.argmax(sc2) == np.argmax(labels[e]):
                score_count2.append(1)
            else:
                score_count2.append(0)

        test_score_count = []
        test_score_count2 = []

        for e, (tsc) in enumerate(test_scores):

            if np.argmax(tsc) == np.argmax(data.test_labels[e]):
                test_score_count.append(1)
            else:
                test_score_count.append(0)

        for e, (tsc2) in enumerate(test_scores2):

            if np.argmax(tsc2) == np.argmax(data.test_labels[e]):
                test_score_count2.append(1)
            else:
                test_score_count2.append(0)

        l0s = np.count_nonzero(adv)
        successrate = np.mean(score_count)
        successrate2 = np.mean(score_count2)
        successrate3 = np.mean(score_count3)
        test_successrate = np.mean(test_score_count)
        test_successrate2 = np.mean(test_score_count2)

        print('original model, success rate of T images for the original labels:', successrate2)
        print('modified model, success rate of T images for the original labels:', successrate3)
        print('modified model, success rate of T images for the target labels:', successrate)
        print('modified model, success rate of S imges for the target labels:', np.mean(score_count4))

        print('modified model, success rate of test set for the original labels:', test_successrate)
        print('original model, success rate of test set for the original labels:', test_successrate2)
        print('l0 distance:', l0s)
        print('l2 distance:', l2)
K = int(sys.argv[3])
bias = float(sys.argv[4])

config = tf.ConfigProto()
config.gpu_options.allow_growth=True
set_session(tf.Session(config=config))

sess = Keras.get_session()
Keras.set_learning_phase(False)

np.random.seed(1)
tf.set_random_seed(1)

if dataset == "MNIST":
    data = MNIST()
    model = MNISTModel("../1-Models/MNIST")
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
elif dataset == "CIFAR":
    data = CIFAR()
    model = CIFARModel("../1-Models/CIFAR")
    x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))

training_accuracy = np.mean(np.argmax(model.model.predict(data.train_data), axis = 1) == np.argmax(data.train_labels, axis = 1))
print("Training Accuracy: " + str(training_accuracy))
testing_accuracy = np.mean(np.argmax(model.model.predict(data.test_data), axis = 1) == np.argmax(data.test_labels, axis = 1))
print("Testing Accuracy: " + str(testing_accuracy))

X = data.train_data
X_adv = np.load("../2-AEs/" + dataset + "/train_" + mode + ".npy")

pred_original = model.model.predict(X)
Example #14
0
def main(args):
    #   gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)
    with tf.Session() as sess:
        if args['dataset'] == 'mnist':
            data, model = MNIST(), MNISTModel("models/mnist", sess)
            handpick = False
            inception = False
        if args['dataset'] == "cifar":
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            handpick = True
            inception = False
        if args['dataset'] == "imagenet":
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess)
            handpick = True
            inception = True

        if args['adversarial'] != "none":
            model = MNISTModel(
                "models/mnist_cwl2_admm" + str(args['adversarial']), sess)

        if args['temp'] and args['dataset'] == 'mnist':
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']),
                               sess)
        if args['temp'] and args['dataset'] == 'cifar':
            model = CIFARModel("models/cifar-distilled-" + str(args['temp']),
                               sess)

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            targeted=args['targeted'],
            start=0,
            inception=inception,
            handpick=handpick,
            seed=args['seed'])

        #print(true_ids)
        if args['attack'] == 'L2C':
            attack = CarliniL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               targeted=args['targeted'],
                               binary_search_steps=args['binary_steps'],
                               abort_early=args['abort_early'])

        if args['attack'] == 'L2BB':
            # score-based ZO-ADMM attack
            attack = LADMMBB(sess,
                             model,
                             batch_size=args['batch_size'],
                             max_iterations=args['maxiter'],
                             targeted=args['targeted'],
                             confidence=args['conf'],
                             binary_search_steps=args['iteration_steps'],
                             ro=args['ro'],
                             abort_early=args['abort_early'],
                             gama=args['gama'],
                             epi=args['epi'],
                             alpha=args['alpha'])

        timestart = time.time()
        #    adv = attack.attack(inputs, targets)
        adv, querycount, queryl2 = attack.attack(inputs, targets)
        timeend = time.time()
        print("Took", timeend - timestart, "seconds to run", len(inputs),
              "samples.\n")

        if args['train']:
            np.save('labels_train.npy', labels)
            np.save(str(args['attack']) + '_train.npy', adv)

        if (args['conf'] != 0):
            model = MNISTModel("models/mnist-distilled-100", sess)

        if args['targeted']:
            l1_l2_li_computation(args, data, model, adv, inception, inputs,
                                 targets, labels, true_ids, querycount,
                                 queryl2)
        else:
            l2_computation(args, data, model, adv, inception, inputs, targets,
                           labels, true_ids, querycount, queryl2)
Example #15
0
    assert args.lsa ^ args.dsa, "Select either 'lsa' or 'dsa'"
    print(args)

    if args.d == "mnist":
        #(x_train, y_train), (x_test, y_test) = mnist.load_data()
        data = MNIST()
        x_train = data.train_data
        y_train = data.train_labels
        x_test = data.test_data
        y_test = data.test_labels
        x_train = x_train.reshape(-1, 28, 28, 1)
        x_test = x_test.reshape(-1, 28, 28, 1)

        # Load pre-trained model.
        #model = load_model("./model/model_mnist.h5")
        model = MNISTModel("./models/mnist")
        model = model.model
        model.summary()

        # You can select some layers you want to test.
        # layer_names = ["activation_1"]
        # layer_names = ["activation_2"]
        layer_names = ["activation_3"]

        # Load target set.
        #x_target = np.load("./adv/adv_mnist_{}.npy".format(args.target))

        #x_target = []
        #for i in range(1, 10):
        #    target_img = imread("/tmp/adv_result_{}_to_0.jpg".format(i))
        #    x_target.append(target_img)
Example #16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model_path',
        type=str,
        default="models/",
        help="Path to save trained model.")
    parser.add_argument(
        '--pert_data',
        type=str,
        default='./MNIST_data/perturbed.npz',
        help='Path to LFW perturbed data.')
    parser.add_argument(
        '--orig_data',
        type=str,
        default='MNIST_data/B.npz',
        help="Path to original data."
    )
    parser.add_argument(
        '--image_size', type=int, default=28, help='Size of input images.')
    parser.add_argument(
        '--num_channels',
        type=int,
        default=1,
        help='Number of channels in input images.')
    parser.add_argument(
        '--train_new',
        dest='train_new',
        action='store_true',
        help='Train a new classifier.')
    parser.set_defaults(train_new=False)
    args = parser.parse_args()

    loaded = np.load(args.pert_data)
    pert_data = np.concatenate((loaded['train_data'], loaded['test_data']))
    pert_data = pert_data.reshape(pert_data.shape[0], args.image_size, args.image_size, -1)
    pert_evil_label = np.concatenate((loaded['train_label'], loaded['test_label']))
    pert_good_label = odd_even_labels(pert_evil_label).\
        argmax(axis=1)
    pert_evil_label = np.argmax(pert_evil_label, axis=1)

    loaded = np.load(args.orig_data)
    orig_data = np.concatenate((loaded['train_data'], loaded['test_data']))
    orig_data = orig_data.reshape(orig_data.shape[0], args.image_size, args.image_size, -1)
    orig_evil_label = np.concatenate((loaded['train_label'], loaded['test_label']))
    orig_good_label = odd_even_labels(orig_evil_label).\
        argmax(axis=1)
    orig_evil_label = np.argmax(orig_evil_label, axis=1)
    print 'Original data shape:', orig_data.shape

    good_used = OddEvenMNIST(args.model_path + 'A_odd_even')
    good_left = OddEvenMNIST(args.model_path + 'C_odd_even')
    evil_used = MNISTModel(args.model_path + 'A_digits')
    evil_left = MNISTModel(args.model_path + 'C_digits')

    evil_pair = (orig_evil_label, pert_evil_label)
    good_pair = (orig_good_label, pert_good_label)

    for model, label_pair, name in zip(
            [evil_used, good_used, evil_left, good_left],
            [evil_pair, good_pair, evil_pair, good_pair],
            ['Used Evil', 'Used Good', 'Left-out Evil', 'Left-out Good']):

        org_true, pert_true = label_pair
        print name + ':'
        org_pred = np.argmax(model.model.predict(orig_data), axis=1)
        print org_pred.shape
        print org_true.shape
        org_acc = accuracy_score(org_true, org_pred)
        print '\tOriginal Accuracy: %.4f' % org_acc
        dst_pred = np.argmax(model.model.predict(pert_data), axis=1)
        dst_acc = accuracy_score(pert_true, dst_pred)
        print '\tPerturbed Accuracy: %.4f' % dst_acc

    if args.train_new:
        # Train a new classifier with the new training data, test with original test data.
        raise NotImplementedError(
            'Training new classifier is not yet implemented.')
Example #17
0
    def load_model(self,
                   dataset="mnist",
                   model_name="2-layer",
                   model=None,
                   batch_size=0,
                   compute_slope=False):
        """
        model: if set to None, then load dataset with model_name. Otherwise use the model directly.
        dataset: mnist, cifar and imagenet. recommend to use mnist and cifar as a starting point.
        model_name: possible options are 2-layer, distilled, and normal
        """
        import tensorflow as tf
        from setup_cifar import CIFAR, CIFARModel, TwoLayerCIFARModel
        from setup_mnist import MNIST, MNISTModel, TwoLayerMNISTModel
        from setup_imagenet import ImageNet, ImageNetModel

        # if set this to true, we will use the logit layer output instead of probability
        # the logit layer's gradients are usually larger and more stable
        output_logits = True
        self.dataset = dataset
        self.model_name = model_name

        if model is None:
            print('Loading model...')
            if dataset == "mnist":
                self.batch_size = 1024
                if model_name == "2-layer":
                    model = TwoLayerMNISTModel("models/mnist_2layer",
                                               self.sess, not output_logits)
                elif model_name == "normal":
                    model = MNISTModel("models/mnist", self.sess,
                                       not output_logits)
                elif model_name == "brelu":
                    model = MNISTModel("models/mnist_brelu",
                                       self.sess,
                                       not output_logits,
                                       use_brelu=True)
                elif model_name == "distilled":
                    model = MNISTModel("models/mnist-distilled-100", self.sess,
                                       not output_logits)
                else:
                    raise (RuntimeError("incorrect model option"))
            elif dataset == "cifar":
                self.batch_size = 1024
                if model_name == "2-layer":
                    model = TwoLayerCIFARModel("models/cifar_2layer",
                                               self.sess, not output_logits)
                elif model_name == "normal":
                    model = CIFARModel("models/cifar", self.sess,
                                       not output_logits)
                elif model_name == "brelu":
                    model = CIFARModel("models/cifar_brelu",
                                       self.sess,
                                       not output_logits,
                                       use_brelu=True)
                elif model_name == "distilled":
                    model = CIFARModel("models/cifar-distilled-100", self.sess,
                                       not output_logits)
                else:
                    raise (RuntimeError("incorrect model option"))
            elif dataset == "imagenet":
                self.batch_size = 32
                model = ImageNetModel(self.sess,
                                      use_softmax=not output_logits,
                                      model_name=model_name,
                                      create_prediction=False)
            else:
                raise (RuntimeError("dataset unknown"))

        self.model = model
        self.compute_slope = compute_slope
        if batch_size != 0:
            self.batch_size = batch_size

        # img is the placeholder for image input
        self.img = tf.placeholder(shape=[
            None, model.image_size, model.image_size, model.num_channels
        ],
                                  dtype=tf.float32)
        # output is the output tensor of the entire network
        self.output = model.predict(self.img)
        # create the graph to compute gradient
        # get the desired true label and target label
        self.true_label = tf.placeholder(dtype=tf.int32, shape=[])
        self.target_label = tf.placeholder(dtype=tf.int32, shape=[])
        true_output = self.output[:, self.true_label]
        target_output = self.output[:, self.target_label]
        # get the different
        self.objective = true_output - target_output
        # get the gradient
        self.grad_op = tf.gradients(self.objective, self.img)[0]
        # compute gradient norm
        grad_op_rs = tf.reshape(self.grad_op, (tf.shape(self.grad_op)[0], -1))
        self.grad_2_norm_op = tf.norm(grad_op_rs, axis=1)
        self.grad_1_norm_op = tf.norm(grad_op_rs, ord=1, axis=1)
        self.grad_inf_norm_op = tf.norm(grad_op_rs, ord=np.inf, axis=1)

        return self.img, self.output
Example #18
0
if __name__ == "__main__":
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    #the path of storing UAE
    dir_adv = 'unsupervised_attack/'

    #load model of Autoencoder
    dir_model = 'models/MNIST/convAE'

    data = MNIST()
    inputs = data.train_data

    tf.reset_default_graph()
    with tf.Session(config=config) as sess:

        model = MNISTModel(dir_model, sess)
        attack = MINE_unsupervised(sess,
                                   model,
                                   batch_size=1,
                                   max_iterations=40,
                                   confidence=0,
                                   epsilon=1.0,
                                   mine_batch='conv')
        adv, Mi = attack.attack(inputs)

        np.save(dir_adv + 'adv.npy', image)
        np.save(dir_adv + 'mi.npy', Mi)

    tf.get_default_graph().finalize()
Example #19
0
def main(args):
    temp_encoder = encoder(level=args['level'])
    with tf.Session() as sess:
        use_log = not args['use_zvalue']
        is_inception = args['dataset'] == "imagenet"
        # load network
        print('Loading model', args['dataset'])
        if args['dataset'] == "mnist":
            data, model = MNIST(), MNISTModel("models/mnist", sess, use_log)
            # data, model =  MNIST(), MNISTModel("models/mnist-distilled-100", sess, use_log)
        elif args['dataset'] == "cifar10":
            #data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log)
            # data, model = CIFAR(), CIFARModel("models/cifar-distilled-100", sess, use_log)
            data, model = CIFAR(), CIFAR_WIDE("models/wide_resnet", sess,
                                              use_log)
        elif args['dataset'] == "imagenet":
            data, model = ImageNet(), InceptionModel(sess, use_log)
        print('Done...')
        if args['numimg'] == 0:
            args['numimg'] = len(data.test_labels) - args['firstimg']
        print('Using', args['numimg'], 'test images')
        # load attack module
        if args['attack'] == "white":
            # batch size 1, optimize on 1 image at a time, rather than optimizing images jointly
            attack = CarliniL2(sess,
                               model,
                               batch_size=1,
                               max_iterations=args['maxiter'],
                               print_every=args['print_every'],
                               early_stop_iters=args['early_stop_iters'],
                               confidence=0,
                               learning_rate=args['lr'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               use_log=use_log,
                               adam_beta1=args['adam_beta1'],
                               adam_beta2=args['adam_beta2'])
        else:
            # batch size 128, optimize on 128 coordinates of a single image
            attack = BlackBoxL2(sess,
                                model,
                                batch_size=128,
                                max_iterations=args['maxiter'],
                                print_every=args['print_every'],
                                early_stop_iters=args['early_stop_iters'],
                                confidence=0,
                                learning_rate=args['lr'],
                                initial_const=args['init_const'],
                                binary_search_steps=args['binary_steps'],
                                targeted=not args['untargeted'],
                                use_log=use_log,
                                use_tanh=args['use_tanh'],
                                use_resize=args['use_resize'],
                                adam_beta1=args['adam_beta1'],
                                adam_beta2=args['adam_beta2'],
                                reset_adam_after_found=args['reset_adam'],
                                solver=args['solver'],
                                save_ckpts=args['save_ckpts'],
                                load_checkpoint=args['load_ckpt'],
                                start_iter=args['start_iter'],
                                init_size=args['init_size'],
                                use_importance=not args['uniform'])

        random.seed(args['seed'])
        np.random.seed(args['seed'])
        print('Generate data')
        all_inputs, all_targets, all_labels, all_true_ids, encoding_all = generate_data(
            data,
            samples=args['numimg'],
            targeted=not args['untargeted'],
            start=args['firstimg'],
            inception=is_inception)
        print('Done...')
        #print('all_inputs : ', all_inputs.shape)
        #print('encoding_all : ',encoding_all.shape)
        os.system("mkdir -p {}/{}".format(args['save'], args['dataset']))
        img_no = 0
        total_success = 0
        l2_total = 0.0
        origin_correct = 0
        adv_correct = 0
        for i in range(all_true_ids.size):
            print(' adversarial_image_no: ', i)
            inputs = all_inputs[i:i + 1]
            encoding_inputs = encoding_all[i:i + 1]
            #print('encoding_inputs shape: ', encoding_inputs)
            targets = all_targets[i:i + 1]
            labels = all_labels[i:i + 1]
            print("true labels:", np.argmax(labels), labels)
            print("target:", np.argmax(targets), targets)
            # test if the image is correctly classified
            original_predict = model.model.predict(encoding_inputs)
            original_predict = np.squeeze(original_predict)
            original_prob = np.sort(original_predict)
            original_class = np.argsort(original_predict)
            print("original probabilities:", original_prob[-1:-6:-1])
            print("original classification:", original_class[-1:-6:-1])
            print("original probabilities (most unlikely):", original_prob[:6])
            print("original classification (most unlikely):",
                  original_class[:6])
            if original_class[-1] != np.argmax(labels):
                print(
                    "skip wrongly classified image no. {}, original class {}, classified as {}"
                    .format(i, np.argmax(labels), original_class[-1]))
                continue
            origin_correct += np.argmax(labels, 1) == original_class[-1]

            img_no += 1
            timestart = time.time()
            adv, const = attack.attack_batch(inputs, targets)
            if type(const) is list:
                const = const[0]
            if len(adv.shape) == 3:
                adv = adv.reshape((1, ) + adv.shape)
            timeend = time.time()
            l2_distortion = np.sum((adv - inputs)**2)**.5

            ##### llj
            encode_adv = np.transpose(adv, axes=(0, 3, 1, 2))
            channel0, channel1, channel2 = encode_adv[:,
                                                      0, :, :], encode_adv[:,
                                                                           1, :, :], encode_adv[:,
                                                                                                2, :, :]
            channel0, channel1, channel2 = temp_encoder.tempencoding(
                channel0), temp_encoder.tempencoding(
                    channel1), temp_encoder.tempencoding(channel2)
            encode_adv = np.concatenate([channel0, channel1, channel2], axis=1)
            encode_adv = np.transpose(encode_adv, axes=(0, 2, 3, 1))

            #### llj
            adversarial_predict = model.model.predict(encode_adv)
            adversarial_predict = np.squeeze(adversarial_predict)
            adversarial_prob = np.sort(adversarial_predict)
            adversarial_class = np.argsort(adversarial_predict)
            print("adversarial probabilities:", adversarial_prob[-1:-6:-1])
            print("adversarial classification:", adversarial_class[-1:-6:-1])

            adv_correct += np.argmax(labels, 1) == adversarial_class[-1]

            success = False
            if args['untargeted']:
                if adversarial_class[-1] != original_class[-1]:
                    success = True
            else:
                if adversarial_class[-1] == np.argmax(targets):
                    success = True
            if l2_distortion > 20.0:
                success = False
            if success:
                total_success += 1
                l2_total += l2_distortion
            suffix = "id{}_seq{}_prev{}_adv{}_{}_dist{}".format(
                all_true_ids[i], i, original_class[-1], adversarial_class[-1],
                success, l2_distortion)
            print("Saving to", suffix)
            show(
                inputs,
                "{}/{}/{}_original_{}.png".format(args['save'],
                                                  args['dataset'], img_no,
                                                  suffix))
            show(
                adv,
                "{}/{}/{}_adversarial_{}.png".format(args['save'],
                                                     args['dataset'], img_no,
                                                     suffix))
            show(
                adv - inputs,
                "{}/{}/{}_diff_{}.png".format(args['save'], args['dataset'],
                                              img_no, suffix))
            print(
                "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}"
                .format(img_no, i, all_true_ids[i], timeend - timestart,
                        success, const, original_class[-1],
                        adversarial_class[-1], l2_distortion,
                        total_success / float(img_no),
                        0 if total_success == 0 else l2_total / total_success))
            sys.stdout.flush()

        print(' origin accuracy : ',
              100.0 * origin_correct / all_true_ids.size)
        print(' adv accuracy : ', 100.0 * adv_correct / all_true_ids.size)
Example #20
0
def main(args):
    with tf.Session() as sess:

        random.seed(SEED)
        np.random.seed(SEED)
        tf.set_random_seed(SEED)

        image_id_set = np.random.choice(range(1000),
                                        args["image_number"] * 3,
                                        replace=False)
        #image_id_set = np.random.randint(1, 1000, args["image_number"] )
        arg_max_iter = args['maxiter']  ### max number of iterations
        arg_init_const = args[
            'init_const']  ### regularization prior to attack loss
        arg_kappa = args['kappa']  ### attack confidence level
        arg_q = args['q']  ### number of random direction vectors
        arg_mode = args['mode']  ### algorithm name
        arg_save_iteration = args['save_iteration']
        arg_Dataset = args["dataset"]
        arg_targeted_attack = args["targeted_attack"]
        arg_bsz = args["mini_batch_sz"]
        idx_lr = args["lr_idx"]

        ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5]
        if (arg_Dataset == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess, True)
        elif (arg_Dataset == 'cifar10'):
            data, model = CIFAR(), CIFARModel("models/cifar", sess, True)
        elif (arg_Dataset == 'imagenet'):
            data, model = ImageNet(SEED), InceptionModel(sess, True)
        else:
            print('Please specify a valid dataset')

        succ_count, ii, iii = 0, 0, 0
        final_distortion_count,first_iteration_count, first_distortion_count = [], [], []
        while iii < args["image_number"]:
            ii = ii + 1
            image_id = image_id_set[ii]

            # if image_id!= 836: continue # for test only

            orig_prob, orig_class, orig_prob_str = util.model_prediction(
                model, np.expand_dims(data.test_data[image_id],
                                      axis=0))  ## orig_class: predicted label;

            if arg_targeted_attack:  ### target attack
                target_label = np.remainder(orig_class + 1, 10)
            else:
                target_label = orig_class

            orig_img, target = util.generate_data(data, image_id, target_label)
            # shape of orig_img is (1,28,28,1) in [-0.5, 0.5]

            true_label_list = np.argmax(data.test_labels, axis=1)
            true_label = true_label_list[image_id]

            print("Image ID:{}, infer label:{}, true label:{}".format(
                image_id, orig_class, true_label))
            if true_label != orig_class:
                print(
                    "True Label is different from the original prediction, pass!"
                )
                continue
            else:
                iii = iii + 1

            print('\n', iii, '/', args["image_number"])

            ##  parameter
            d = orig_img.size  # feature dim
            print("dimension = ", d)

            # mu=1/d**2  # smoothing parameter
            q = arg_q + 0
            I = arg_max_iter + 0
            kappa = arg_kappa + 0
            const = arg_init_const + 0

            ## flatten image to vec
            orig_img_vec = np.resize(orig_img, (1, d))
            delta_adv = np.zeros((1, d))  ### initialized adv. perturbation
            #delta_adv = np.random.uniform(-16/255,16/255,(1,d))

            ## w adv image initialization
            if args["constraint"] == 'uncons':
                # * 0.999999 to avoid +-0.5 return +-infinity
                w_ori_img_vec = np.arctanh(
                    2 * (orig_img_vec) * 0.999999
                )  # in real value, note that orig_img_vec in [-0.5, 0.5]
                w_img_vec = np.arctanh(
                    2 * (np.clip(orig_img_vec + delta_adv, -0.5, 0.5)) *
                    0.999999)
            else:
                w_ori_img_vec = orig_img_vec.copy()
                w_img_vec = np.clip(w_ori_img_vec + delta_adv, -0.5, 0.5)

            # ## test ##
            # for test_value in w_ori_img_vec[0, :]:
            #     if np.isnan(test_value) or np.isinf(test_value):
            #         print(test_value)

            # initialize the best solution & best loss
            best_adv_img = []  # successful adv image in [-0.5, 0.5]
            best_delta = []  # best perturbation
            best_distortion = (0.5 * d)**2  # threshold for best perturbation
            total_loss = np.zeros(I)  ## I: max iters
            l2s_loss_all = np.zeros(I)
            attack_flag = False
            first_flag = True  ## record first successful attack

            # parameter setting for ZO gradient estimation
            mu = args["mu"]  ### smoothing parameter

            ## learning rate
            base_lr = args["lr"]

            if arg_mode == "ZOAdaMM":
                ## parameter initialization for AdaMM
                v_init = 1e-7  #0.00001
                v_hat = v_init * np.ones((1, d))
                v = v_init * np.ones((1, d))

                m = np.zeros((1, d))
                # momentum parameter for first and second order moment
                beta_1 = 0.9
                beta_2 = 0.9  # only used by AMSGrad
                print(beta_1, beta_2)

            #for i in tqdm(range(I)):
            for i in range(I):

                if args["decay_lr"]:
                    base_lr = args["lr"] / np.sqrt(i + 1)

                ## Total loss evaluation
                if args["constraint"] == 'uncons':
                    total_loss[i], l2s_loss_all[
                        i] = function_evaluation_uncons(
                            w_img_vec, kappa, target_label, const, model,
                            orig_img, arg_targeted_attack)

                else:
                    total_loss[i], l2s_loss_all[i] = function_evaluation_cons(
                        w_img_vec, kappa, target_label, const, model, orig_img,
                        arg_targeted_attack)

                ## gradient estimation w.r.t. w_img_vec
                if arg_mode == "ZOSCD":
                    grad_est = grad_coord_estimation(mu, q, w_img_vec, d,
                                                     kappa, target_label,
                                                     const, model, orig_img,
                                                     arg_targeted_attack,
                                                     args["constraint"])
                elif arg_mode == "ZONES":
                    grad_est = gradient_estimation_NES(mu, q, w_img_vec, d,
                                                       kappa, target_label,
                                                       const, model, orig_img,
                                                       arg_targeted_attack,
                                                       args["constraint"])
                else:
                    grad_est = gradient_estimation_v2(mu, q, w_img_vec, d,
                                                      kappa, target_label,
                                                      const, model, orig_img,
                                                      arg_targeted_attack,
                                                      args["constraint"])

                # if np.remainder(i,50)==0:
                # print("total loss:",total_loss[i])
                # print(np.linalg.norm(grad_est, np.inf))

                ## ZO-Attack, unconstrained optimization formulation
                if arg_mode == "ZOSGD":
                    delta_adv = delta_adv - base_lr * grad_est
                if arg_mode == "ZOsignSGD":
                    delta_adv = delta_adv - base_lr * np.sign(grad_est)
                if arg_mode == "ZOSCD":
                    delta_adv = delta_adv - base_lr * grad_est
                if arg_mode == "ZOAdaMM":
                    m = beta_1 * m + (1 - beta_1) * grad_est
                    v = beta_2 * v + (1 - beta_2) * np.square(grad_est)  ### vt
                    v_hat = np.maximum(v_hat, v)
                    #print(np.mean(v_hat))
                    delta_adv = delta_adv - base_lr * m / np.sqrt(v_hat)
                    if args["constraint"] == 'cons':
                        tmp = delta_adv.copy()
                        #X_temp = orig_img_vec.reshape((-1,1))
                        #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10))
                        V_temp = np.sqrt(v_hat.reshape(1, -1))
                        delta_adv = projection_box(tmp, orig_img_vec, V_temp,
                                                   -0.5, 0.5)
                        #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5)
                    # v_init = 1e-2 #0.00001
                    # v = v_init * np.ones((1, d))
                    # m = np.zeros((1, d))
                    # # momentum parameter for first and second order moment
                    # beta_1 = 0.9
                    # beta_2 = 0.99  # only used by AMSGrad
                    # m = beta_1 * m + (1-beta_1) * grad_est
                    # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v)
                    # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10)
                    # if args["constraint"] == 'cons':
                    #     V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10))
                    #     X_temp = orig_img_vec.reshape((-1,1))
                    #     delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5)
                if arg_mode == "ZOSMD":
                    delta_adv = delta_adv - 0.5 * base_lr * grad_est
                    # delta_adv = delta_adv - base_lr* grad_est
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X_temp = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)
                if arg_mode == "ZOPSGD":
                    delta_adv = delta_adv - base_lr * grad_est
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X_temp = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)
                if arg_mode == "ZONES":
                    delta_adv = delta_adv - base_lr * np.sign(grad_est)
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)

                # if arg_mode == "ZO-AdaFom":
                #     m = beta_1 * m + (1-beta_1) * grad_est
                #     v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1)
                #     w_img_vec = w_img_vec - base_lr * m/np.sqrt(v)
                ##

                ### adv. example update
                w_img_vec = w_ori_img_vec + delta_adv

                ## covert back to adv_img in [-0.5 , 0.5]
                if args["constraint"] == 'uncons':
                    adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999  #
                else:
                    adv_img_vec = w_img_vec.copy()

                adv_img = np.resize(adv_img_vec, orig_img.shape)

                ## update the best solution in the iterations
                attack_prob, _, _ = util.model_prediction(model, adv_img)
                target_prob = attack_prob[0, target_label]
                attack_prob_tmp = attack_prob.copy()
                attack_prob_tmp[0, target_label] = 0
                other_prob = np.amax(attack_prob_tmp)

                if args["print_iteration"]:
                    if np.remainder(i + 1, 1) == 0:
                        if true_label != np.argmax(attack_prob):
                            print(
                                "Iter %d (Succ): ID = %d, lr = %3.5f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d"
                                % (i + 1, image_id, args["lr"],
                                   int(args["decay_lr"]), arg_mode,
                                   args["constraint"], total_loss[i],
                                   l2s_loss_all[i], true_label,
                                   np.argmax(attack_prob)))
                        else:
                            print(
                                "Iter %d (Fail): ID = %d, lr = %3.6f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d"
                                % (i + 1, image_id, args["lr"],
                                   int(args["decay_lr"]), arg_mode,
                                   args["constraint"], total_loss[i],
                                   l2s_loss_all[i], true_label,
                                   np.argmax(attack_prob)))

                if arg_save_iteration:
                    os.system("mkdir Examples")
                    if (np.logical_or(
                            true_label != np.argmax(attack_prob),
                            np.remainder(i + 1,
                                         10) == 0)):  ## every 10 iterations
                        suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format(
                            image_id, arg_mode, true_label,
                            np.argmax(attack_prob), i + 1)
                        # util.save_img(adv_img, "Examples/{}.png".format(suffix))

                if arg_targeted_attack:
                    if (np.log(target_prob + 1e-10) -
                            np.log(other_prob + 1e-10) >=
                            kappa):  # check attack confidence
                        if (distortion(adv_img, orig_img) <
                                best_distortion):  # check distortion
                            # print('best distortion obtained at',i,'-th iteration')
                            best_adv_img = adv_img
                            best_distortion = distortion(adv_img, orig_img)
                            best_delta = adv_img - orig_img
                            best_iteration = i + 1
                            adv_class = np.argmax(attack_prob)
                            attack_flag = True
                            ## Record first attack
                            if (first_flag):
                                first_flag = False  ### once gets into this, it will no longer record the next sucessful attack
                                first_adv_img = adv_img
                                first_distortion = distortion(
                                    adv_img, orig_img)
                                first_delta = adv_img - orig_img
                                first_class = adv_class
                                first_iteration = i + 1
                else:
                    if (np.log(other_prob + 1e-10) -
                            np.log(target_prob + 1e-10) >=
                            kappa):  # check attack confidence
                        if (distortion(adv_img, orig_img) <
                                best_distortion):  # check distortion
                            # print('best distortion obtained at',i,'-th iteration')
                            best_adv_img = adv_img
                            best_distortion = distortion(adv_img, orig_img)
                            best_delta = adv_img - orig_img
                            best_iteration = i + 1
                            adv_class = np.argmax(attack_prob)
                            attack_flag = True
                            ## Record first attack
                            if (first_flag):
                                first_flag = False
                                first_adv_img = adv_img
                                first_distortion = distortion(
                                    adv_img, orig_img)
                                first_delta = adv_img - orig_img
                                first_class = adv_class
                                first_iteration = i + 1

            if (attack_flag):
                # os.system("mkdir Results_SL")
                # ## best attack (final attack)
                # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, orig_class) ## orig_class, predicted label
                # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, adv_class)
                # suffix3 = "id_{}_Mode_{}".format(image_id, arg_mode)
                # ### save original image
                # util.save_img(orig_img, "Results_SL/id_{}.png".format(image_id))
                # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix))
                # ### adv. image
                # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2))
                # ### adv. perturbation
                # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3))
                #
                #
                # ## first attack
                # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, first_class)
                # ## first adv. imag
                # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4))
                # ### first adv. perturbation
                # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3))

                ## save data
                succ_count = succ_count + 1
                final_distortion_count.append(l2s_loss_all[-1])
                first_distortion_count.append(first_distortion)
                first_iteration_count.append(first_iteration)
                suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format(
                    image_id, arg_mode, args["constraint"], args["lr"],
                    int(args["decay_lr"]), args["exp_code"])
                np.savez("{}".format(suffix0),
                         id=image_id,
                         mode=arg_mode,
                         loss=total_loss,
                         perturbation=l2s_loss_all,
                         best_distortion=best_distortion,
                         first_distortion=first_distortion,
                         first_iteration=first_iteration,
                         best_iteation=best_iteration,
                         learn_rate=args["lr"],
                         decay_lr=args["decay_lr"],
                         attack_flag=attack_flag)
                ## print
                print("It takes {} iteations to find the first attack".format(
                    first_iteration))
                # print(total_loss)
            else:
                ## save data
                suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format(
                    image_id, arg_mode, args["constraint"], args["lr"],
                    int(args["decay_lr"]), args["exp_code"])
                np.savez("{}".format(suffix0),
                         id=image_id,
                         mode=arg_mode,
                         loss=total_loss,
                         perturbation=l2s_loss_all,
                         best_distortion=best_distortion,
                         learn_rate=args["lr"],
                         decay_lr=args["decay_lr"],
                         attack_flag=attack_flag)
                print("Attack Fails")

            sys.stdout.flush()
    print('succ rate:', succ_count / args["image_number"])
    print('average first success l2', np.mean(first_distortion_count))
    print('average first itrs', np.mean(first_iteration_count))
    print('average l2:', np.mean(final_distortion_count), ' best l2:',
          np.min(final_distortion_count), ' worst l2:',
          np.max(final_distortion_count))
def main(args):
    with tf.Session() as sess:
        if (args['dataset'] == 'mnist'):
            data = MNIST()
            inception = False
            if (args['adversarial'] != "none"):
                model = MNISTModel(
                    "models/mnist_cw" + str(args['adversarial']), sess)
            elif (args['temp']):
                model = MNISTModel(
                    "models/mnist-distilled-" + str(args['temp']), sess)
            else:
                model = MNISTModel("models/mnist", sess)
        if (args['dataset'] == "cifar"):
            data = CIFAR()
            inception = False
            if (args['adversarial'] != "none"):
                model = CIFARModel(
                    "models/cifar_cw" + str(args['adversarial']), sess)
            elif (args['temp']):
                model = CIFARModel(
                    "models/cifar-distilled-" + str(args['temp']), sess)
            else:
                model = CIFARModel("models/cifar", sess)
        if (args['dataset'] == "imagenet"):
            data, model = ImageNet(args['seed_imagenet'],
                                   2 * args['numimg']), InceptionModel(sess)
            inception = True

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            targeted=not args['untargeted'],
            target_num=args['targetnum'],
            inception=inception,
            train=args['train'],
            seed=args['seed'])
        timestart = time.time()
        if (args['restore_np']):
            if (args['train']):
                adv = np.load(
                    str(args['dataset']) + '_' + str(args['attack']) +
                    '_train.npy')
            else:
                adv = np.load(
                    str(args['dataset']) + '_' + str(args['attack']) + '.npy')
        else:
            if (args['attack'] == 'L2'):
                attack = CarliniL2(sess,
                                   model,
                                   batch_size=args['batch_size'],
                                   max_iterations=args['maxiter'],
                                   confidence=args['conf'],
                                   initial_const=args['init_const'],
                                   binary_search_steps=args['binary_steps'],
                                   targeted=not args['untargeted'],
                                   beta=args['beta'],
                                   abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'L1'):
                attack = EADL1(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'EN'):
                attack = EADEN(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            """If untargeted, pass labels instead of targets"""
            if (args['attack'] == 'FGSM'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=np.inf,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'FGML1'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=1,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'FGML2'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=2,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)

            if (args['attack'] == 'IFGSM'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=np.inf,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'IFGML1'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=1,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'IFGML2'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=2,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)

        timeend = time.time()

        if args['untargeted']:
            num_targets = 1
        else:
            num_targets = args['targetnum']
        print("Took", timeend - timestart, "seconds to run",
              len(inputs) / num_targets, "random instances.")

        if (args['save_np']):
            if (args['train']):
                np.save(str(args['dataset']) + '_labels_train.npy', labels)
                np.save(
                    str(args['dataset']) + '_' + str(args['attack']) +
                    '_train.npy', adv)
            else:
                np.save(
                    str(args['dataset']) + '_' + str(args['attack'] + '.npy'),
                    adv)

        r_best_ = []
        d_best_l1_ = []
        d_best_l2_ = []
        d_best_linf_ = []
        r_average_ = []
        d_average_l1_ = []
        d_average_l2_ = []
        d_average_linf_ = []
        r_worst_ = []
        d_worst_l1_ = []
        d_worst_l2_ = []
        d_worst_linf_ = []

        #Transferability Tests
        model_ = []
        model_.append(model)
        if (args['targetmodel'] != "same"):
            if (args['targetmodel'] == "dd_100"):
                model_.append(MNISTModel("models/mnist-distilled-100", sess))
        num_models = len(model_)

        if (args['show']):
            if not os.path.exists(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack'])):
                os.makedirs(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack']))
        for m, model in enumerate(model_):
            r_best = []
            d_best_l1 = []
            d_best_l2 = []
            d_best_linf = []
            r_average = []
            d_average_l1 = []
            d_average_l2 = []
            d_average_linf = []
            r_worst = []
            d_worst_l1 = []
            d_worst_l2 = []
            d_worst_linf = []
            for i in range(0, len(inputs), num_targets):
                pred = []
                for j in range(i, i + num_targets):
                    if inception:
                        pred.append(
                            np.reshape(model.model.predict(adv[j:j + 1]),
                                       (data.test_labels[0:1].shape)))
                    else:
                        pred.append(model.model.predict(adv[j:j + 1]))

                dist_l1 = 1e10
                dist_l1_index = 1e10
                dist_linf = 1e10
                dist_linf_index = 1e10
                dist_l2 = 1e10
                dist_l2_index = 1e10
                for k, j in enumerate(range(i, i + num_targets)):
                    success = False
                    if (args['untargeted']):
                        if (np.argmax(pred[k], 1) != np.argmax(
                                targets[j:j + 1], 1)):
                            success = True
                    else:
                        if (np.argmax(pred[k],
                                      1) == np.argmax(targets[j:j + 1], 1)):
                            success = True
                    if (success):
                        if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1):
                            dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                            dist_l1_index = j
                        if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf):
                            dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                            dist_linf_index = j
                        if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2):
                            dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                            dist_l2_index = j
                if (dist_l1_index != 1e10):
                    d_best_l2.append((np.sum(
                        (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                    d_best_l1.append(
                        np.sum(
                            np.abs(adv[dist_l1_index] -
                                   inputs[dist_l1_index])))
                    d_best_linf.append(
                        np.amax(
                            np.abs(adv[dist_linf_index] -
                                   inputs[dist_linf_index])))
                    r_best.append(1)
                else:
                    r_best.append(0)

                rand_int = np.random.randint(i, i + num_targets)
                if inception:
                    pred_r = np.reshape(
                        model.model.predict(adv[rand_int:rand_int + 1]),
                        (data.test_labels[0:1].shape))
                else:
                    pred_r = model.model.predict(adv[rand_int:rand_int + 1])
                success_average = False
                if (args['untargeted']):
                    if (np.argmax(pred_r, 1) != np.argmax(
                            targets[rand_int:rand_int + 1], 1)):
                        success_average = True
                else:
                    if (np.argmax(pred_r, 1) == np.argmax(
                            targets[rand_int:rand_int + 1], 1)):
                        success_average = True
                if success_average:
                    r_average.append(1)
                    d_average_l2.append(
                        np.sum((adv[rand_int] - inputs[rand_int])**2)**.5)
                    d_average_l1.append(
                        np.sum(np.abs(adv[rand_int] - inputs[rand_int])))
                    d_average_linf.append(
                        np.amax(np.abs(adv[rand_int] - inputs[rand_int])))

                else:
                    r_average.append(0)

                dist_l1 = 0
                dist_l1_index = 1e10
                dist_linf = 0
                dist_linf_index = 1e10
                dist_l2 = 0
                dist_l2_index = 1e10
                for k, j in enumerate(range(i, i + num_targets)):
                    failure = True
                    if (args['untargeted']):
                        if (np.argmax(pred[k], 1) != np.argmax(
                                targets[j:j + 1], 1)):
                            failure = False
                    else:
                        if (np.argmax(pred[k],
                                      1) == np.argmax(targets[j:j + 1], 1)):
                            failure = False
                    if failure:
                        r_worst.append(0)
                        dist_l1_index = 1e10
                        dist_l2_index = 1e10
                        dist_linf_index = 1e10
                        break
                    else:
                        if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1):
                            dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                            dist_l1_index = j
                        if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf):
                            dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                            dist_linf_index = j
                        if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2):
                            dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                            dist_l2_index = j
                if (dist_l1_index != 1e10):
                    d_worst_l2.append((np.sum(
                        (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                    d_worst_l1.append(
                        np.sum(
                            np.abs(adv[dist_l1_index] -
                                   inputs[dist_l1_index])))
                    d_worst_linf.append(
                        np.amax(
                            np.abs(adv[dist_linf_index] -
                                   inputs[dist_linf_index])))
                    r_worst.append(1)

                if (args['show'] and m == (num_models - 1)):
                    for j in range(i, i + num_targets):
                        target_id = np.argmax(targets[j:j + 1], 1)
                        label_id = np.argmax(labels[j:j + 1], 1)
                        prev_id = np.argmax(
                            np.reshape(model.model.predict(inputs[j:j + 1]),
                                       (data.test_labels[0:1].shape)), 1)
                        adv_id = np.argmax(
                            np.reshape(model.model.predict(adv[j:j + 1]),
                                       (data.test_labels[0:1].shape)), 1)
                        suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format(
                            true_ids[i], target_id, label_id, prev_id, adv_id,
                            adv_id == target_id,
                            np.sum(np.abs(adv[j] - inputs[j])),
                            np.sum((adv[j] - inputs[j])**2)**.5,
                            np.amax(np.abs(adv[j] - inputs[j])))

                        show(
                            inputs[j:j + 1],
                            str(args['save']) + "/" + str(args['dataset']) +
                            "/" + str(args['attack']) +
                            "/original_{}.png".format(suffix))
                        show(
                            adv[j:j + 1],
                            str(args['save']) + "/" + str(args['dataset']) +
                            "/" + str(args['attack']) +
                            "/adversarial_{}.png".format(suffix))
            if (m != (num_models - 1)):
                lbl = "Src_"
                if (num_models > 2):
                    lbl += str(m) + "_"
            else:
                lbl = "Tgt_"
            if (num_targets > 1):
                print(lbl + 'best_case_L1_mean', np.mean(d_best_l1))
                print(lbl + 'best_case_L2_mean', np.mean(d_best_l2))
                print(lbl + 'best_case_Linf_mean', np.mean(d_best_linf))
                print(lbl + 'best_case_prob', np.mean(r_best))
                print(lbl + 'average_case_L1_mean', np.mean(d_average_l1))
                print(lbl + 'average_case_L2_mean', np.mean(d_average_l2))
                print(lbl + 'average_case_Linf_mean', np.mean(d_average_linf))
                print(lbl + 'average_case_prob', np.mean(r_average))
                print(lbl + 'worst_case_L1_mean', np.mean(d_worst_l1))
                print(lbl + 'worst_case_L2_mean', np.mean(d_worst_l2))
                print(lbl + 'worst_case_Linf_mean', np.mean(d_worst_linf))
                print(lbl + 'worst_case_prob', np.mean(r_worst))
            else:
                print(lbl + 'L1_mean', np.mean(d_average_l1))
                print(lbl + 'L2_mean', np.mean(d_average_l2))
                print(lbl + 'Linf_mean', np.mean(d_average_linf))
                print(lbl + 'success_prob', np.mean(r_average))
Example #22
0
def main(args):
    with tf.Session() as sess:
        if args['dataset'] == 'mnist':
            data, model = MNIST(), MNISTModel("models/mnist", sess)
            handpick = False
            inception = False
        if args['dataset'] == "cifar":
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            handpick = True
            inception = False
        if args['dataset'] == "imagenet":
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess)
            handpick = True
            inception = True

        if args['adversarial'] != "none":
            model = MNISTModel(
                "models/mnist_cwl2_admm" + str(args['adversarial']), sess)

        if args['temp'] and args['dataset'] == 'mnist':
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']),
                               sess)
        if args['temp'] and args['dataset'] == 'cifar':
            model = CIFARModel("models/cifar-distilled-" + str(args['temp']),
                               sess)

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            targeted=True,
            start=0,
            inception=inception,
            handpick=handpick,
            seed=args['seed'])

        #print(true_ids)
        if args['attack'] == 'L2C':
            attack = CarliniL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['binary_steps'],
                               abort_early=args['abort_early'])

        if args['attack'] == 'L0A':
            attack = ADMML0(sess,
                            model,
                            batch_size=args['batch_size'],
                            max_iterations=args['maxiter'],
                            confidence=args['conf'],
                            binary_search_steps=args['iteration_steps'],
                            ro=args['ro'],
                            abort_early=args['abort_early'])

        timestart = time.time()
        adv = attack.attack(inputs, targets)
        timeend = time.time()

        print("Took", timeend - timestart, "seconds to run", len(inputs),
              "samples.\n")

        if args['train']:
            np.save('labels_train.npy', labels)
            np.save(str(args['attack']) + '_train.npy', adv)

        if (args['conf'] != 0):
            model = MNISTModel("models/mnist-distilled-100", sess)

        if args['attack'] != 'L0A' and args['attack'] != 'L0AE' and args[
                'attack'] != 'L0C' and args['attack'] != 'L0AE2':
            l1_l2_li_computation(args, data, model, adv, inception, inputs,
                                 targets, labels, true_ids)
        else:
            l0_computation(args, data, model, adv, inception, inputs, targets,
                           labels, true_ids)
Example #23
0
def run(args, restrict=True):
    if restrict:
        # Restrict the visible GPUs to the one for this subprocess
        id = np.int(multiprocessing.current_process().name.split("-")[1])
        os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1)

    # Load Parameters
    dataset = args[0]
    epsilon = float(args[1])
    mode = args[2]
    K = int(args[3])

    fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str(K)

    # Configure Keras/Tensorflow
    Keras.clear_session()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))

    sess = Keras.get_session()
    Keras.set_learning_phase(False)

    # Fix Random Seeds
    np.random.seed(1)
    tf.set_random_seed(
        1
    )  #Having this before keras.clear_session() causes it it hang for some reason

    # Load Model/Data and setup SPSA placeholders
    N = 50
    if dataset == "MNIST":
        # Base Model
        base_model = MNISTModel("../1-Models/MNIST")
        data = MNIST()
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
        # SPSA
        shape_spsa = (1, 28, 28, 1)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    elif dataset == "CIFAR":
        # Base Model
        base_model = CIFARModel("../1-Models/CIFAR")
        data = CIFAR()
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        # SPSA
        shape_spsa = (1, 32, 32, 3)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    y_spsa = tf.placeholder(tf.int32)

    # Load the hidden representations of the real and adversarial examples from the training set
    x_train_real = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy"))
    x_train_adv = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_adv_" + mode +
                ".npy"))

    n_train = x_train_real.shape[0]
    n_train_adv = x_train_adv.shape[0]
    x_train = np.float32(np.vstack((x_train_real, x_train_adv)))
    #print("Bounds ", np.max(np.abs(x_train)))
    y_train = np.float32(
        np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv))))

    # Create the defended model
    model_defended = DefendedModel(base_model, x_train, y_train, K)
    defended_logits = model_defended.get_logits(x)

    # Configure the attack
    attack = SPSA(model_defended, back="tf", sess=sess)
    with tf.name_scope("Attack") as scope:
        gen = attack.generate(x_spsa,
                              y=y_spsa,
                              epsilon=epsilon,
                              is_targeted=False,
                              num_steps=100,
                              batch_size=2048,
                              early_stop_loss_threshold=-5.0)

    # Run the attack
    f = open(fname + ".txt", "w")

    sample = np.random.choice(data.test_data.shape[0], N, replace=False)
    x_sample = data.test_data[sample]
    y_sample = np.argmax(data.test_labels[sample], axis=1)

    logits_nat = sess.run(defended_logits, {x: x_sample})
    f.write("Accuracy on Natural Images: " +
            str(np.mean(np.argmax(logits_nat, axis=1) == y_sample)) + "\n")

    pred_adv = -1.0 * np.ones((N))
    for i in range(N):
        x_real = x_sample[i].reshape(shape_spsa)
        x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: y_sample[i]})
        pred_adv[i] = np.argmax(sess.run(defended_logits, {x: x_adv}))

    f.write("Accuracy on Adversarial Images: " +
            str(np.mean(pred_adv == y_sample)))
    f.close()
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes,
              nb_epochs_s, batch_size, learning_rate, data_aug, lmbda):
    """
    This function creates the substitute by alternatively
    augmenting the training data and training the substitute.
    :param sess: TF session
    :param x: input TF placeholder
    :param y: output TF placeholder
    :param bbox_preds: output of black-box model predictions
    :param X_sub: initial substitute training data
    :param Y_sub: initial substitute training labels
    :param nb_classes: number of output classes
    :param nb_epochs_s: number of epochs to train substitute model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param data_aug: number of times substitute training data is augmented
    :param lmbda: lambda from arxiv.org/abs/1602.02697
    :return:
    """
    # Define TF model graph (for the black-box model)
    # model_sub = substitute_model()
    if DATASET == "mnist":
        model_sub = MNISTModel(use_log = True).model
    else:
        model_sub = CIFARModel(use_log = True).model
    preds_sub = model_sub(x)
    print("Defined TensorFlow model graph for the substitute.")

    # Define the Jacobian symbolically using TensorFlow
    grads = jacobian_graph(preds_sub, x, nb_classes)

    # Train the substitute and augment dataset alternatively
    for rho in xrange(data_aug):
        print("Substitute training epoch #" + str(rho))
        train_params = {
            'nb_epochs': nb_epochs_s,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub),
                    init_all=False, verbose=False, args=train_params)

        # If we are not at last substitute training iteration, augment dataset
        if rho < data_aug - 1:
            if FLAGS.cached_aug:
                augs = np.load('sub_saved/{}-aug-{}.npz'.format(DATASET, rho))
                X_sub = augs['X_sub']
                Y_sub = augs['Y_sub']
            else:
                print("Augmenting substitute training data.")
                # Perform the Jacobian augmentation
                X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda)

                print("Labeling substitute training data.")
                # Label the newly generated synthetic points using the black-box
                Y_sub = np.hstack([Y_sub, Y_sub])
                X_sub_prev = X_sub[int(len(X_sub)/2):]
                eval_params = {'batch_size': batch_size}
                bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev],
                                      args=eval_params)[0]
                # Note here that we take the argmax because the adversary
                # only has access to the label (not the probabilities) output
                # by the black-box model
                Y_sub[int(len(X_sub)/2):] = np.argmax(bbox_val, axis=1)
                # cache the augmentation
                if not FLAGS.cached_aug:
                    np.savez('sub_saved/{}-aug-{}.npz'.format(DATASET, rho), X_sub = X_sub, Y_sub = Y_sub)

    return model_sub, preds_sub
Example #25
0
def main(args):
    with tf.Session() as sess:
        random.seed(121)
        np.random.seed(1211)

        image_id = args['img_id']
        arg_max_iter = args['maxiter']
        arg_b = args['binary_steps']
        arg_init_const = args['init_const']
        arg_mode = args['mode']
        arg_kappa = args['kappa']
        arg_beta = args['beta']
        arg_gamma = args['gamma']

        AE_model = util.load_AE("mnist_AE_1")
        data, model = MNIST(), MNISTModel("models/mnist", sess, False)

        orig_prob, orig_class, orig_prob_str = util.model_prediction(
            model, np.expand_dims(data.test_data[image_id], axis=0))
        target_label = orig_class
        print("Image:{}, infer label:{}".format(image_id, target_label))
        orig_img, target = util.generate_data(data, image_id, target_label)

        attack = AEADEN(sess,
                        model,
                        mode=arg_mode,
                        AE=AE_model,
                        batch_size=1,
                        kappa=arg_kappa,
                        init_learning_rate=1e-2,
                        binary_search_steps=arg_b,
                        max_iterations=arg_max_iter,
                        initial_const=arg_init_const,
                        beta=arg_beta,
                        gamma=arg_gamma)

        adv_img = attack.attack(orig_img, target)

        adv_prob, adv_class, adv_prob_str = util.model_prediction(
            model, adv_img)
        delta_prob, delta_class, delta_prob_str = util.model_prediction(
            model, orig_img - adv_img)

        INFO = "[INFO]id:{}, kappa:{}, Orig class:{}, Adv class:{}, Delta class: {}, Orig prob:{}, Adv prob:{}, Delta prob:{}".format(
            image_id, arg_kappa, orig_class, adv_class, delta_class,
            orig_prob_str, adv_prob_str, delta_prob_str)
        print(INFO)

        suffix = "id{}_kappa{}_Orig{}_Adv{}_Delta{}".format(
            image_id, arg_kappa, orig_class, adv_class, delta_class)
        arg_save_dir = "{}_ID{}_Gamma_{}".format(arg_mode, image_id, arg_gamma)
        os.system("mkdir -p Results/{}".format(arg_save_dir))
        util.save_img(
            orig_img,
            "Results/{}/Orig_original{}.png".format(arg_save_dir, orig_class))
        util.save_img(adv_img,
                      "Results/{}/Adv_{}.png".format(arg_save_dir, suffix))
        util.save_img(
            np.absolute(orig_img - adv_img) - 0.5,
            "Results/{}/Delta_{}.png".format(arg_save_dir, suffix))

        sys.stdout.flush()
def run(args, restrict=True):
    if restrict:
        # Restrict the visible GPUs to the one for this subprocess
        id = np.int(multiprocessing.current_process().name.split("-")[1])
        os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1)

    # Load Parameters
    dataset = args[0]
    epsilon = float(args[1])
    mode = args[2]
    K = int(args[3])
    bias = float(args[4])

    fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str(
        K) + "_" + str(bias)

    # Configure Keras/Tensorflow
    Keras.clear_session()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))

    sess = Keras.get_session()
    Keras.set_learning_phase(False)

    # Fix Random Seeds
    np.random.seed(1)
    tf.set_random_seed(
        1
    )  #Having this before keras.clear_session() causes it it hang for some reason

    # Load Model/Data and setup SPSA placeholders
    N = 1000
    if dataset == "MNIST":
        # Base Model
        base_model = MNISTModel("../1-Models/MNIST")
        data = MNIST()
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
        # SPSA
        shape_spsa = (1, 28, 28, 1)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    elif dataset == "CIFAR":
        # Base Model
        base_model = CIFARModel("../1-Models/CIFAR")
        data = CIFAR()
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        # SPSA
        shape_spsa = (1, 32, 32, 3)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    y_spsa = tf.placeholder(tf.int32)

    # Load the hidden representations of the real and adversarial examples from the training set
    x_train_real = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy"))
    x_train_adv = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_adv_" + mode +
                ".npy"))

    n_train = x_train_real.shape[0]
    n_train_adv = x_train_adv.shape[0]
    x_train = np.float32(np.vstack((x_train_real, x_train_adv)))
    #print("Bounds ", np.max(np.abs(x_train)))
    y_train = np.float32(
        np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv))))

    # Create the defended model
    model_defended = DefendedModel(base_model, x_train, y_train, K, bias=bias)
    defended_logits = model_defended.get_logits(x)

    # Get the predictions on the original images
    labels = np.argmax(data.test_labels[:N], axis=1)
    logits_real = sess.run(defended_logits, {x: data.test_data[:N]})
    fp = (np.argmax(logits_real,
                    axis=1) == 10)  #False positives of the defense
    pred_undefended = np.argmax(np.delete(logits_real, -1, axis=1),
                                axis=1)  #Original model prediction

    # Configure the attack
    attack = SPSA(model_defended, back="tf", sess=sess)
    with tf.name_scope("Attack") as scope:
        gen = attack.generate(x_spsa,
                              y_target=y_spsa,
                              epsilon=epsilon,
                              is_targeted=True,
                              num_steps=100,
                              batch_size=2048,
                              early_stop_loss_threshold=-5.0)

    # Run the attack
    pred_adv = -1.0 * np.ones((N, 10))
    for i in range(N):
        if i % 10 == 0:
            print(fname, " ", i)
            out = {}
            out["FP"] = fp
            out["Labels"] = labels
            out["UndefendedPrediction"] = pred_undefended
            out["AdversarialPredictions"] = pred_adv
            file = open(fname, "wb")
            pickle.dump(out, file)
            file.close()

        x_real = data.test_data[i].reshape(shape_spsa)

        # Try a targeted attack for each class other than the original network prediction and the adversarial class
        for y in range(10):
            if y != pred_undefended[i]:
                x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: y})
                pred_adv[i,
                         y] = np.argmax(sess.run(defended_logits, {x: x_adv}))

    out = {}
    out["FP"] = fp
    out["Labels"] = labels
    out["UndefendedPrediction"] = pred_undefended
    out["AdversarialPredictions"] = pred_adv
    file = open(fname, "wb")
    pickle.dump(out, file)
    file.close()

    analysis(fname)
Example #27
0
## Copyright (C) 2016, Nicholas Carlini <*****@*****.**>.
##
## This program is licenced under the BSD 2-Clause licence,
## contained in the LICENCE file in this directory.

from setup_cifar import CIFAR, CIFARModel
from setup_mnist import MNIST, MNISTModel
from setup_inception import ImageNet, InceptionModel

import tensorflow as tf
import numpy as np

BATCH_SIZE = 1

with tf.Session() as sess:
    data, model = MNIST(), MNISTModel("models/mnist", sess)
    data, model = CIFAR(), CIFARModel("models/cifar", sess)
    data, model = ImageNet(), InceptionModel(sess)

    x = tf.placeholder(
        tf.float32,
        (None, model.image_size, model.image_size, model.num_channels))
    y = model.predict(x)

    r = []
    for i in range(0, len(data.test_data), BATCH_SIZE):
        pred = sess.run(y, {x: data.test_data[i:i + BATCH_SIZE]})
        #print(pred)
        #print('real',data.test_labels[i],'pred',np.argmax(pred))
        r.append(
            np.argmax(pred, 1) == np.argmax(data.test_labels[i:i +

def expandImage(image_data):
    image_data2 = np.array(image_data)
    image_data2 = (image_data2 + 0.5) * 255
    return image_data2


# In[4]:

if __name__ == "__main__":
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        modelPath = '%smodels/mnist' % (nn_robust_attack_root)
        data, model = MNIST(), MNISTModel(modelPath, sess)

        attack = CarliniLi(sess, model, max_iterations=1000, targeted=False)

        inputs, targets = generate_data(data,
                                        samples=1000,
                                        targeted=False,
                                        start=5500,
                                        inception=False)

        original_classified_wrong_number = 0  #number of benign samples that are misclassified
        disturbed_failure_number = 0  #number of samples that failed to craft corresponding adversarial samples
        test_number = 0  #number of adversarial samples that we generate
        TTP = 0
        TP = 0
        FN = 0
Example #29
0
def main(args):
    with tf.Session() as sess:

        random.seed(SEED)
        np.random.seed(SEED)
        tf.set_random_seed(SEED)

        class_id = args['class_id']  ### input image (natural example)
        target_id = args[
            'target_id']  ### target images id (adv example) if target attack
        arg_max_iter = args['maxiter']  ### max number of iterations
        arg_init_const = args[
            'init_const']  ### regularization prior to attack loss
        arg_kappa = args['kappa']  ### attack confidence level
        arg_q = args['q']  ### number of random direction vectors
        arg_mode = args['mode']  ### algorithm name
        arg_save_iteration = args['save_iteration']
        arg_Dataset = args["dataset"]
        arg_targeted_attack = args["targeted_attack"]
        arg_bsz = args["mini_batch_sz"]
        idx_lr = args["lr_idx"]

        ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5]
        if (arg_Dataset == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess, True)
        elif (arg_Dataset == 'cifar10'):
            data, model = CIFAR(), CIFARModel("models/cifar", sess, True)
        elif (arg_Dataset == 'imagenet'):
            data, model = ImageNet_Universal(SEED), InceptionModel(sess, True)
            #model = InceptionModel(sess, True)
        else:
            print('Please specify a valid dataset')

        #orig_img = np.load('ori_img_backup.npy')
        orig_img = data.test_data[np.where(
            np.argmax(data.test_labels, 1) == class_id)]
        #np.save('ori_img_backup',orig_img)

        #true_label = data.test_labels[np.where(np.argmax(data.test_labels,1) == class_id)]
        _, orig_class = util.model_prediction_u(
            model, orig_img[:30]
        )  # take 30 or less images to make sure arg_bsz number of them are valid

        # filter out the images which misclassified already
        orig_img = orig_img[np.where(orig_class == class_id)]
        if orig_img.shape[0] < arg_bsz:
            assert 'no enough valid inputs'

        orig_img = orig_img[:arg_bsz]

        np.save('original_imgsID' + str(class_id), orig_img)
        #true_label = np.zeros((arg_bsz, 1001))
        #true_label[np.arange(arg_bsz), class_id] = 1
        true_label = class_id

        if arg_targeted_attack:  ### target attack
            #target_label = np.zeros((arg_bsz, 1001))
            #target_label[np.arange(arg_bsz), target_id] = 1
            target_label = target_id
        else:
            target_label = true_label

        #orig_img, target = util.generate_data(data, class_id, target_label)
        # shape of orig_img is (1,28,28,1) in [-0.5, 0.5]

        ##  parameter
        if orig_img.ndim == 3 or orig_img.shape[0] == 1:
            d = orig_img.size  # feature dim
        else:
            d = orig_img[0].size
        print("dimension = ", d)

        # mu=1/d**2  # smoothing parameter
        q = arg_q + 0
        I = arg_max_iter + 0
        kappa = arg_kappa + 0
        const = arg_init_const + 0

        ## flatten image to vec
        orig_img_vec = np.resize(orig_img, (arg_bsz, d))

        ## w adv image initialization
        if args["constraint"] == 'uncons':
            # * 0.999999 to avoid +-0.5 return +-infinity
            w_ori_img_vec = np.arctanh(
                2 * (orig_img_vec) * 0.999999
            )  # in real value, note that orig_img_vec in [-0.5, 0.5]
            w_img_vec = w_ori_img_vec.copy()
        else:
            w_ori_img_vec = orig_img_vec.copy()
            w_img_vec = w_ori_img_vec.copy()

        # ## test ##
        # for test_value in w_ori_img_vec[0, :]:
        #     if np.isnan(test_value) or np.isinf(test_value):
        #         print(test_value)

        delta_adv = np.zeros((1, d))  ### initialized adv. perturbation

        # initialize the best solution & best loss
        best_adv_img = []  # successful adv image in [-0.5, 0.5]
        best_delta = []  # best perturbation
        best_distortion = (0.5 * d)**2  # threshold for best perturbation
        total_loss = np.zeros(I)  ## I: max iters
        l2s_loss_all = np.zeros(I)
        attack_flag = False
        first_flag = True  ## record first successful attack

        # parameter setting for ZO gradient estimation
        mu = args["mu"]  ### smoothing parameter

        ## learning rate
        base_lr = args["lr"]

        if arg_mode == "ZOAdaMM":
            ## parameter initialization for AdaMM
            v_init = 1e-7  #0.00001
            v_hat = v_init * np.ones((1, d))
            v = v_init * np.ones((1, d))

            m = np.zeros((1, d))
            # momentum parameter for first and second order moment
            beta_1 = 0.9
            beta_2 = 0.3  # only used by AMSGrad
            print(beta_1, beta_2)

        #for i in tqdm(range(I)):
        for i in range(I):

            if args["decay_lr"]:
                base_lr = args["lr"] / np.sqrt(i + 1)

            ## Total loss evaluation
            if args["constraint"] == 'uncons':
                total_loss[i], l2s_loss_all[i] = function_evaluation_uncons(
                    w_img_vec, kappa, target_label, const, model, orig_img,
                    arg_targeted_attack)

            else:
                total_loss[i], l2s_loss_all[i] = function_evaluation_cons(
                    w_img_vec, kappa, target_label, const, model, orig_img,
                    arg_targeted_attack)

            ## gradient estimation w.r.t. w_img_vec
            if arg_mode == "ZOSCD":
                grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa,
                                                 target_label, const, model,
                                                 orig_img, arg_targeted_attack,
                                                 args["constraint"])
            elif arg_mode == "ZONES":
                grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa,
                                                   target_label, const, model,
                                                   orig_img,
                                                   arg_targeted_attack,
                                                   args["constraint"])
            else:
                grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa,
                                                  target_label, const, model,
                                                  orig_img,
                                                  arg_targeted_attack,
                                                  args["constraint"])

            # if np.remainder(i,50)==0:
            # print("total loss:",total_loss[i])
            # print(np.linalg.norm(grad_est, np.inf))

            ## ZO-Attack, unconstrained optimization formulation
            if arg_mode == "ZOSGD":
                delta_adv = delta_adv - base_lr * grad_est
            if arg_mode == "ZOsignSGD":
                delta_adv = delta_adv - base_lr * np.sign(grad_est)
            if arg_mode == "ZOSCD":
                delta_adv = delta_adv - base_lr * grad_est
            if arg_mode == "ZOAdaMM":
                m = beta_1 * m + (1 - beta_1) * grad_est
                v = beta_2 * v + (1 - beta_2) * np.square(grad_est)  ### vt
                #print(np.mean(np.abs(m)),np.mean(np.sqrt(v)))
                v_hat = np.maximum(v_hat, v)
                delta_adv = delta_adv - base_lr * m / np.sqrt(v)
                if args["constraint"] == 'cons':
                    tmp = delta_adv.copy()
                    #X_temp = orig_img_vec.reshape((-1,1))
                    #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10))
                    V_temp = np.sqrt(v_hat.reshape(1, -1))
                    delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5,
                                               0.5)
                    #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5)
                # v_init = 1e-2 #0.00001
                # v = v_init * np.ones((1, d))
                # m = np.zeros((1, d))
                # # momentum parameter for first and second order moment
                # beta_1 = 0.9
                # beta_2 = 0.99  # only used by AMSGrad
                # m = beta_1 * m + (1-beta_1) * grad_est
                # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v)
                # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10)
                # if args["constraint"] == 'cons':
                #     V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10))
                #     X_temp = orig_img_vec.reshape((-1,1))
                #     delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5)
            if arg_mode == "ZOSMD":
                delta_adv = delta_adv - 0.5 * base_lr * grad_est
                # delta_adv = delta_adv - base_lr* grad_est
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X_temp = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)
            if arg_mode == "ZOPSGD":
                delta_adv = delta_adv - base_lr * grad_est
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X_temp = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)
            if arg_mode == "ZONES":
                delta_adv = delta_adv - base_lr * np.sign(grad_est)
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)

            # if arg_mode == "ZO-AdaFom":
            #     m = beta_1 * m + (1-beta_1) * grad_est
            #     v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1)
            #     w_img_vec = w_img_vec - base_lr * m/np.sqrt(v)
            ##

            ### adv. example update
            w_img_vec = w_ori_img_vec + delta_adv

            ## covert back to adv_img in [-0.5 , 0.5]
            if args["constraint"] == 'uncons':
                adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999  #
            else:
                adv_img_vec = w_img_vec.copy()

            adv_img = np.resize(adv_img_vec, orig_img.shape)

            ## update the best solution in the iterations
            attack_prob, _, _ = util.model_prediction(model, adv_img)
            target_prob = attack_prob[:, target_label]
            attack_prob_tmp = attack_prob.copy()
            attack_prob_tmp[:, target_label] = 0
            other_prob = np.amax(attack_prob_tmp, 1)

            if i % 1000 == 0 and i != 0:
                if arg_mode == "ZOAdaMM": print(beta_1, beta_2)
                print("save delta_adv")
                np.save(
                    'retimgs/' + str(i) + 'itrs' +
                    str(np.argmax(attack_prob, 1)) + arg_mode +
                    str(args["lr"]), delta_adv)

            if args["print_iteration"]:
                if np.remainder(i + 1, 20) == 0:
                    if (true_label != np.argmax(attack_prob, 1)).all():
                        print(
                            "Iter %d (Succ): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s"
                            % (i + 1, class_id, args["lr"],
                               int(args["decay_lr"]), arg_mode,
                               args["constraint"], total_loss[i],
                               l2s_loss_all[i], true_label,
                               np.argmax(attack_prob, 1)))
                    else:
                        sr = np.sum(
                            true_label != np.argmax(attack_prob, 1)) / arg_bsz
                        print(
                            "Iter %d (Fail): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s, succ rate = %.2f"
                            % (i + 1, class_id, args["lr"],
                               int(args["decay_lr"]), arg_mode,
                               args["constraint"], total_loss[i],
                               l2s_loss_all[i], true_label,
                               np.argmax(attack_prob, 1), sr))

            if arg_save_iteration:
                os.system("mkdir Examples")
                if (np.logical_or(
                        true_label != np.argmax(attack_prob, 1),
                        np.remainder(i + 1, 10) == 0)):  ## every 10 iterations
                    suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format(
                        class_id, arg_mode, true_label,
                        np.argmax(attack_prob, 1), i + 1)
                    # util.save_img(adv_img, "Examples/{}.png".format(suffix))

            if arg_targeted_attack:
                if ((np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10))
                        >= kappa).all():  # check attack confidence
                    if (distortion(adv_img, orig_img) <
                            best_distortion):  # check distortion
                        # print('best distortion obtained at',i,'-th iteration')
                        best_adv_img = adv_img
                        best_distortion = distortion(adv_img, orig_img)
                        #best_delta = adv_img - orig_img
                        best_iteration = i + 1
                        adv_class = np.argmax(attack_prob, 1)
                        attack_flag = True
                        ## Record first attack
                        if (first_flag):
                            first_flag = False  ### once gets into this, it will no longer record the next sucessful attack
                            first_adv_img = adv_img
                            first_distortion = distortion(adv_img, orig_img)
                            #first_delta = adv_img - orig_img
                            first_class = adv_class
                            first_iteration = i + 1
            else:
                if ((np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10))
                        >= kappa).all():  # check attack confidence
                    if (distortion(adv_img, orig_img) <
                            best_distortion):  # check distortion
                        # print('best distortion obtained at',i,'-th iteration')
                        best_adv_img = adv_img
                        best_distortion = distortion(adv_img, orig_img)
                        #best_delta = adv_img - orig_img
                        best_iteration = i + 1
                        adv_class = np.argmax(attack_prob, 1)
                        attack_flag = True
                        ## Record first attack
                        if (first_flag):
                            first_flag = False
                            first_adv_img = adv_img
                            first_distortion = distortion(adv_img, orig_img)
                            #first_delta = adv_img - orig_img
                            first_class = adv_class
                            first_iteration = i + 1

        if (attack_flag):
            # os.system("mkdir Results_SL")
            # ## best attack (final attack)
            # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, orig_class) ## orig_class, predicted label
            # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, adv_class)
            # suffix3 = "id_{}_Mode_{}".format(class_id, arg_mode)
            # ### save original image
            # util.save_img(orig_img, "Results_SL/id_{}.png".format(class_id))
            # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix))
            # ### adv. image
            # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2))
            # ### adv. perturbation
            # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3))
            #
            #
            # ## first attack
            # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, first_class)
            # ## first adv. imag
            # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4))
            # ### first adv. perturbation
            # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3))

            ## save data
            suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format(
                class_id, arg_mode, args["constraint"], str(args["lr"]),
                int(args["decay_lr"]), args["exp_code"], args["init_const"])
            np.savez("{}".format(suffix0),
                     id=class_id,
                     mode=arg_mode,
                     loss=total_loss,
                     perturbation=l2s_loss_all,
                     best_distortion=best_distortion,
                     first_distortion=first_distortion,
                     first_iteration=first_iteration,
                     best_iteation=best_iteration,
                     learn_rate=args["lr"],
                     decay_lr=args["decay_lr"],
                     attack_flag=attack_flag)
            ## print
            print("It takes {} iteations to find the first attack".format(
                first_iteration))
            # print(total_loss)
        else:
            ## save data
            suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format(
                class_id, arg_mode, args["constraint"], str(args["lr"]),
                int(args["decay_lr"]), args["exp_code"], args["init_const"])
            np.savez("{}".format(suffix0),
                     id=class_id,
                     mode=arg_mode,
                     loss=total_loss,
                     perturbation=l2s_loss_all,
                     best_distortion=best_distortion,
                     learn_rate=args["lr"],
                     decay_lr=args["decay_lr"],
                     attack_flag=attack_flag)
            print("Attack Fails")

        sys.stdout.flush()
Example #30
0
        #print(h%10)
        #print(sess.run(model.predict(new_img)[h]))
        #print(np.argmax(label_batch,1)[h])
        #print(loss_instant[h])
        #print(np.argmax(sess.run(model.predict(tf.tanh(img)*0.5+0.5))[h]))
        #print(sess.run(attack_pixel[h]))

        #label_acc.append(sum(np.argmax(sess.run(model.predict(new_img)),1)==np.argmax(label_batch,1))/90)
        #label_disloss.append(sum(lowest_dist[lowest_dist<1000])/len(lowest_dist<1000))
        #print(lowest_dist)
    #print(label_disloss)
    #print(label_acc)
    #print(label_disloss)
    print(sum(const_c) / 90, min(const_c), max(const_c))


#load data

train_data, train_label, test_data, test_label = load_data()

data_batch, label_batch = batch_loader(train_data, train_label)

#load model
with tf.Session() as sess:

    model = MNISTModel('models/mnist', sess)

    original_img = np.arctanh((data_batch - 0.5) * 2 * 0.999)

    print(attack2(data_batch, label_batch, 90, model, sess))