def __init__(self, model, x=None, y=None): # :param: x: placeholder for inputs # :param: y: placeholder for labels self.keras_model = model model_wrap = KerasModelWrapper(model) self.predictions = model_wrap.get_logits(x) self.probs = tf.nn.softmax(logits=self.predictions) self.loss = tf.nn.softmax_cross_entropy_with_logits( labels=y, logits=self.predictions)
def test_get_logits(self): import tensorflow as tf model = KerasModelWrapper(self.model) x = tf.placeholder(tf.float32, shape=(None, 100)) preds = model.get_probs(x) logits = model.get_logits(x) x_val = np.random.rand(2, 100) tf.global_variables_initializer().run(session=self.sess) p_val, logits = self.sess.run([preds, logits], feed_dict={x: x_val}) p_gt = np.exp(logits)/np.sum(np.exp(logits), axis=1, keepdims=True) self.assertTrue(np.allclose(p_val, p_gt, atol=1e-6))
def test_get_logits(self): import tensorflow as tf model = KerasModelWrapper(self.model) x = tf.placeholder(tf.float32, shape=(None, 100)) preds = model.get_probs(x) logits = model.get_logits(x) x_val = np.random.rand(2, 100) tf.global_variables_initializer().run(session=self.sess) p_val, logits = self.sess.run([preds, logits], feed_dict={x: x_val}) p_gt = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True) self.assertTrue(np.allclose(p_val, p_gt, atol=1e-6))
def train_sub(data_aug, sess, x_sub, lmbda, target_model, aug_batch_size=AUG_BATCH_SIZE): placeholder_sub = tf.placeholder(tf.float32, shape=(None, SUB_IMAGE_SIZE, SUB_IMAGE_SIZE, NUM_OF_CHANNELS)) placeholder_bbox = tf.placeholder(tf.float32, shape=(None, BBOX_IMAGE_SIZE, BBOX_IMAGE_SIZE, NUM_OF_CHANNELS)) print("Loading substitute model...") model = get_model_category_by_id(SUBSTITUTE_MODEL_ID, NB_SUB_CLASSES, metric='accuracy') # simple vanilla cnn if SUBSTITUTE_MODEL_ID == '-1': model = get_simple_model(NB_SUB_CLASSES, SUB_IMAGE_SIZE) model.compile(optimizer=Adam(lr=0.1, decay=1e-6), loss="categorical_crossentropy", metrics=['accuracy']) model_sub = KerasModelWrapper(model) preds_sub = model_sub.get_logits(placeholder_sub) print("Subsitute model loaded.") # Define the Jacobian symbolically using TensorFlow print("Defining jacobian graph...") grads = jacobian_graph(preds_sub, placeholder_sub, NB_SUB_CLASSES) print("Jacobian graph defined.") y_sub = bbox_predict(target_model, x_sub, sess, placeholder_bbox, batch_size=1) train_gen = TransferGenerator(x_sub, labels=y_sub, num_classes=NB_SUB_CLASSES, batch_size=BATCH_SIZE, image_size=SUB_IMAGE_SIZE) for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) train_gen.reinitialize(x_sub, y_sub, BATCH_SIZE, SUB_IMAGE_SIZE) print("Fitting the generator with the labels: ") print(train_gen.labels) model_sub.model.fit_generator(generator=train_gen, epochs=NUM_EPOCHS) # print("Saving substitute model that is trained so far") # path = Path(__file__).resolve().parent.parent.joinpath("resources/models") # save_model(str(path) + "sub_model_after_epoch" + str(rho) + ".h5", model_sub.model) # input_sample = np.empty(shape=(1, IMAGE_SIZE_SUB, IMAGE_SIZE_SUB, NUM_OF_CHANNELS), dtype=np.float32) if rho < data_aug - 1: print("Augmenting substitute training data...") # Perform the Jacobian augmentation lmbda_coef = 2 * int(int(rho / 3) != 0) - 1 x_sub = jacobian_augmentation(sess, placeholder_sub, x_sub, y_sub, grads, lmbda_coef * lmbda, aug_batch_size) print("Substitute training data augmented.") print("Labeling substitute training data using bbox...") y_sub = np.hstack([y_sub, y_sub]) x_sub_new = x_sub[int(len(x_sub) / 2):] y_sub[int(len(x_sub)/2):] = bbox_predict(target_model, x_sub_new, sess, placeholder_bbox) return model_sub
def __init__(self, model, preprocess, x = None,y = None): # :param: x: placeholder for inputs # :param: y: placeholder for labels self.x = x self.y = y self.keras_model = model model_wrap = KerasModelWrapper(model) self.preprocess = preprocess self.processed_image = self.preprocess(self.x) self.predictions = model_wrap.get_logits(self.processed_image) # logit self.eval_preds = tf.argmax(self.predictions, 1) self.probs = tf.nn.softmax(logits = self.predictions) self.loss = tf.nn.softmax_cross_entropy_with_logits(labels = self.y, logits = self.predictions)
def prep_bbox(sess, x, y, X_train, Y_train, X_test, Y_test, nb_epochs, batch_size, learning_rate, rng, nb_classes=10, img_rows=28, img_cols=28, nchannels=1): """ Define and train a model that simulates the "remote" black-box oracle described in the original paper. :param sess: the TF session :param x: the input placeholder for MNIST :param y: the ouput placeholder for MNIST :param X_train: the training data for the oracle :param Y_train: the training labels for the oracle :param X_test: the testing data for the oracle :param Y_test: the testing labels for the oracle :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param rng: numpy.random.RandomState :return: """ # Define Keras-based TF model graph (for the black-box model) nb_filters = 64 model = cnn_model(nb_filters=nb_filters, nb_classes=nb_classes) # Wrap the model in KerasModelWrapper model = KerasModelWrapper(model, nb_classes) loss = LossCrossEntropy(model, smoothing=0.1) predictions = model.get_logits(x) print("Defined TensorFlow model graph.") # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } train(sess, loss, x, y, X_train, Y_train, args=train_params, rng=rng) # Print out the accuracy on legitimate data eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args=eval_params) print('Test accuracy of black-box on legitimate test ' 'examples: ' + str(accuracy)) return model, predictions, accuracy
def __init__(self, sess, type=1, use_softmax=True, x=None, y=None, load_existing=False, model_name='modelA', loss='cw'): self.x = x self.y = y self.sess = sess if load_existing: save_dir = 'MNIST_models/normal_models/' filepath = os.path.join(save_dir, model_name + '.h5') model = load_model(filepath) self.model = model model = KerasModelWrapper(model) self.predictions = model.get_logits(self.x) else: model, preds = model_mnist(input_ph=x, type=type) self.model = model self.predictions = preds self.probs = tf.nn.softmax(logits=self.predictions) self.loss = tf.nn.softmax_cross_entropy_with_logits( labels=self.y, logits=self.predictions) if loss == 'cw': # tlab = tf.one_hot(self.y, NUM_CLASSES, on_value=1.0, off_value=0.0, dtype=tf.float32) target_probs = tf.reduce_sum(self.y * self.probs, 1) other_probs = tf.reduce_max( (1 - self.y) * self.probs - (self.y * 10000), 1) self.loss = tf.log(other_probs + 1e-30) - tf.log(target_probs + 1e-30) elif loss == 'xent': self.loss = tf.nn.softmax_cross_entropy_with_logits( labels=self.y, logits=self.predictions) else: raise NotImplementedError self.eval_preds = tf.argmax(self.predictions, 1) self.y_target = tf.placeholder(tf.int64, shape=None) # tensor.shape (?,) self.eval_percent_adv = tf.equal( self.eval_preds, self.y_target) # one-to-one comparison
def prep_bbox(sess, x, y, x_train, y_train, x_test, y_test, nb_epochs, batch_size, learning_rate, rng, nb_classes=10, img_rows=28, img_cols=28, nchannels=1): """ Define and train a model that simulates the "remote" black-box oracle described in the original paper. :param sess: the TF session :param x: the input placeholder for MNIST :param y: the ouput placeholder for MNIST :param x_train: the training data for the oracle :param y_train: the training labels for the oracle :param x_test: the testing data for the oracle :param y_test: the testing labels for the oracle :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param rng: numpy.random.RandomState :return: """ keras.layers.core.K.set_learning_phase(1) config = tf.ConfigProto(device_count = {'GPU' : 1}) # sess = tf.InteractiveSession(config=config) keras.backend.set_session(sess) model_path = MODEL_PATH try: oracle = KerasModelWrapper(load_model(model_path)) except: import errno, os raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), model_path) loss = CrossEntropy(oracle, smoothing=0.1) predictions = oracle.get_logits(x) print("Loaded well-trained Keras oracle.") # Print out the accuracy on legitimate data eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, predictions, x_test, y_test, args=eval_params) print('Test accuracy of black-box on legitimate test ' 'examples: ' + str(accuracy)) return oracle, predictions, accuracy
class VGG16(Model): __metaclass__ = ABCMeta def __init__(self): Model.__init__(self) from keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions self.keras_model = VGG16(weights='imagenet') self.model = KerasModelWrapper(self.keras_model) self.preprocess_input = preprocess_input self.decode_predictions = decode_predictions def get_logits(self, x): return self.model.get_logits(self.preprocess_input(x)) def get_probs(self, x): return self.model.get_probs(self.preprocess_input(x)) def get_layer(self, x, layer): output = self.model.fprop(self.preprocess_input(x)) try: requested = output[layer] except KeyError: raise NoSuchLayerError() return requested def get_layer_names(self): """ :return: Names of all the layers kept by Keras """ layer_names = [x.name for x in self.keras_model.layers] return layer_names def predict(self, x, preprocess=False): if preprocess: return self.keras_model.predict(self.preprocess_input(x)) else: return self.keras_model.predict(x)
def __init__(self, sess, test_batch_size, type = 1,use_softmax = True, x = None,y = None, is_training=None,\ keep_prob=None,load_existing = False, model_name = 'modelA', loss = 'cw'): self.x = x self.y = y self.sess = sess self.is_training = is_training self.keep_prob = keep_prob self.test_batch_size = test_batch_size if load_existing: save_dir = 'CIFAR10_models/Normal_simple_models' # TODO: put your own ROOT directory of simple cifar10 models filepath = os.path.join(save_dir, model_name + '.h5') model = load_model(filepath) self.model = model model = KerasModelWrapper(model) self.predictions = model.get_logits(self.x) else: model, preds = model_cifar10(input_ph=x, type=type) self.model = model self.predictions = preds self.probs = tf.nn.softmax(logits=self.predictions) self.eval_preds = tf.argmax(self.predictions, 1) self.y_target = tf.placeholder(tf.int64, shape=None) # tensor.shape (?,) self.eval_percent_adv = tf.equal( self.eval_preds, self.y_target) # one-to-one comparison if loss == 'cw': self.target_logits = tf.reduce_sum(self.y * self.predictions, 1) self.other_logits = tf.reduce_max( (1 - self.y) * self.predictions - (self.y * 10000), 1) self.loss = self.other_logits - self.target_logits elif loss == 'xent': self.loss = tf.nn.softmax_cross_entropy_with_logits( labels=self.y, logits=self.predictions) else: raise NotImplementedError
def train_sub(data_aug, sess, x_sub, y_sub, lmbda, target_model, aug_batch_size=1): x = tf.placeholder(tf.float32, shape=(None, IMAGE_SIZE, IMAGE_SIZE, NUM_OF_CHANNELS)) print("Loading substitute model...") model = get_model("InceptionResNetV2") # model = get_simple_model(num_classes=NB_CLASSES, image_size=IMAGE_SIZE) model.compile(optimizer=Adam(), loss="categorical_crossentropy", metrics=[age_mae]) model_sub = KerasModelWrapper(model) preds_sub = model_sub.get_logits(x) print("Subsitute model loaded.") # Define the Jacobian symbolically using TensorFlow print("Defining jacobian graph...") grads = jacobian_graph(preds_sub, x, NB_CLASSES) print("Jacobian graph defined.") train_gen = TransferGenerator(x_sub, y_sub, num_classes=101, batch_size=BATCH_SIZE, image_size=IMAGE_SIZE) for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) train_gen.reinitialize(data=x_sub, labels=y_sub, batch_size=BATCH_SIZE, image_size=IMAGE_SIZE) model_sub.model.fit_generator(generator=train_gen, epochs=1) input_sample = np.empty(shape=(1, IMAGE_SIZE, IMAGE_SIZE, NUM_OF_CHANNELS), dtype=np.float32) if rho < data_aug - 1: print("Augmenting substitute training data...") # Perform the Jacobian augmentation lmbda_coef = 2 * int(int(rho / 3) != 0) - 1 x_sub_tmp = np.vstack([x_sub, x_sub]) for i in range(0, len(y_sub)): input_sample[0, :, :, :] = x_sub[i] adv = jacobian_augmentation(sess=sess, x=x, X_sub_prev=input_sample, Y_sub=[y_sub[i]], grads=grads, lmbda=lmbda_coef * lmbda, aug_batch_size=aug_batch_size) x_sub_tmp[2 * i] = adv[0, :, :, :] x_sub_tmp[2 * i + 1] = adv[1, :, :, :] x_sub = x_sub_tmp print("Substitute training data augmented.") print("Labeling substitute training data using bbox...") y_sub = np.hstack([y_sub, y_sub]) x_sub_prev = x_sub[int(len(x_sub) / 2):] predictions = bbox_predict(target_model, x_sub_prev, sess, x) y_sub[int(len(x_sub) / 2):] = predictions return model_sub
class KerasModel(): WEIGHT_DIR = "" def __init__(self, name): self.name = name self._model = { 'preprocess': unify_preprocess, 'default_input_size': 299, } self.sess = None self.model = None self.cleverhans_model = None def load_weight(self, sess=None, checkpoint_path=''): if self.model is None: self._load_model(self.weight_path) else: self.model.load_weights(self.weight_path) print("loaded keras model weights from ", self.weight_path) def _load_model(self, path): if os.path.exists(path): self.model = load_model(path) self.cleverhans_model = KerasModelWrapper(self.model) print("loaded keras model from ", path) else: print("keras model path not exit", path) return self.model def _input_resize(self, imgs): default_input_size = self._model['default_input_size'] imgs = tf.image.resize_images(imgs, [default_input_size, default_input_size]) return imgs def output_resize(self, imgs, size): imgs = tf.image.resize_images(imgs, [size, size]) return imgs def preprocess(self, imgs): imgs = self._input_resize(imgs) return self._model['preprocess'](imgs) def predict_create_graph(self, batch_shape=None, use_prob=True, TOP_K=1): if (use_prob == False): print("Keras Model, use_prob==False not implemented!!") if self.sess: self.clear_session() config = gpu_session_config() self.sess = tf.Session(config=config) with self.sess.as_default(): self.load_weight(self.sess) def predict_batch(self, X, Y=None): with self.sess.as_default(): X = self.preprocess(X) ypred = self.model.predict_on_batch(X) # ypred = ypred.argmax(1) if Y is not None: return ypred, None, None else: return ypred def evaluate_generator(self, generator, batch_shape=None, use_prob=True): total_ypred = [] total_correct = 0 total_size = 0 p = Profile(self.name + 'evaluate_generator ') self.predict_create_graph() for _, X, Y in generator: ypred = self.predict_batch(X) total_ypred = total_ypred + [ypred] total_correct += X[ypred.argmax(1) == Y.argmax(1)].shape[0] total_size += X.shape[0] # print(total_correct, total_size) total_accuracy = float(total_correct / total_size) p.stop() return np.concatenate(total_ypred), None, total_accuracy def clear_session(self): K.clear_session() del self.model self.model = None if self.sess: self.sess.close() self.sess = None tf.reset_default_graph() def reload(self): self.clear_session() self.model = self._load_model(self.weight_path) def get_logits(self, x, nb_classes): return self.cleverhans_model.get_logits(x) def get_probs(self, x, nb_classes): return self.cleverhans_model.get_probs(x)
def mnist_ae(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, num_threads=None, label_smoothing=0.1): report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) rng = np.random.RandomState() source_samples = 10 # Create TF session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') nb_latent_size = 100 # Get MNIST test data # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) x_t = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) y_t = tf.placeholder( tf.float32, shape=(None, nb_classes)) #z = tf.placeholder(tf.float32, shape = (None, nb_latent_size)) #z_t = tf.placeholder(tf.float32, shape = (None, nb_latent_size)) ''' save_dir= 'models' model_name = 'cifar10_AE.h5' model_path_ae = os.path.join(save_dir, model_name) ''' #model_ae= ae_model(x, img_rows=img_rows, img_cols=img_cols, # channels=nchannels) #recon = model_ae(x) #print("recon: ",recon) wrap_ae = ModelVAE('wrap_ae') recon = wrap_ae.get_layer(x,'RECON') print("Defined TensorFlow model graph.") def evaluate_ae(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': 128} noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(sess, x, x_t,recon, x_train, x_train, args=eval_params) print("reconstruction distance: ", d1) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, #'train_dir': train_dir_ae, #'filename': filename } rng = np.random.RandomState([2017, 8, 30]) #if not os.path.exists(train_dir_ae): # os.mkdir(train_dir_ae) #ckpt = tf.train.get_checkpoint_state(train_dir_ae) #print(train_dir_ae, ckpt) #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path if clean_train_vae==True: print("Training VAE") loss = vae_loss(wrap_ae) train_ae(sess, loss, x_train, x_train, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate_ae, args=train_params, rng=rng, var_list = wrap_ae.get_params()) saver = tf.train.Saver() saver.save(sess, "train_dir/model_vae_fgsm.ckpt") print("saved model") else: print("Loading VAE") saver = tf.train.Saver() #print(ckpt_path) saver.restore(sess, "train_dir/model_vae.ckpt") evaluate_ae() if(train_further): train_params = { 'nb_epochs': 10, 'batch_size': batch_size, 'learning_rate': 0.0002, } #training with the saved model as starting point loss = SquaredError(wrap_ae) train_ae(sess, loss, x_train, x_train, optimizer = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate_ae, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_vae_fgsm.ckpt") evaluate_ae() print("Model loaded and trained for more epochs") num_classes = 10 ''' save_dir= 'models' model_name = 'cifar10_CNN.h5' model_path_cls = os.path.join(save_dir, model_name) ''' cl_model = cnn_cl_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) preds_cl = cl_model(x) def do_eval_cls(preds, x_set, y_set, x_tar_set,report_key, is_adv = None): acc = model_eval(sess, x, y, preds, x_t, x_set, y_set, x_tar_set, args=eval_params_cls) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_cl,x_t, x_test, y_test, x_test,args=eval_params) report.clean_train_clean_eval = acc # assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) train_params = { 'nb_epochs': 100, 'batch_size': batch_size, 'learning_rate': learning_rate, #'train_dir': train_dir_cl, #'filename': filename } rng = np.random.RandomState([2017, 8, 30]) wrap_cl = KerasModelWrapper(cl_model) if clean_train_cl == True: train_params = { 'nb_epochs': 5, 'batch_size': batch_size, 'learning_rate': learning_rate, #'train_dir': train_dir_cl, #'filename': filename } print("Training CNN Classifier") ''' datagen = ImageDataGenerator( rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True, ) datagen.fit(x_train) ''' loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing) #for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size = 128): # train(sess, loss_cl, x_batch, y_batch, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate, # args=train_params, rng=rng) train(sess, loss_cl, x_train, y_train, evaluate=evaluate, optimizer = tf.train.RMSPropOptimizer(learning_rate = 0.0001, decay = 1e-6), args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_cnn_cl.ckpt") print("saved model at ", "train_dir/model_cnn_cl_fgsm.ckpt") else: print("Loading CNN Classifier") saver = tf.train.Saver() #print(ckpt_path) saver.restore(sess, "train_dir/model_cnn_cl.ckpt") evaluate() if(train_further): train_params = { 'nb_epochs': 10, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir_cl, 'filename': filename } loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing) train(sess, loss_cl, x_train, y_train, evaluate=evaluate, optimizer = tf.train.RMSPropOptimizer(learning_rate = 0.0001, decay = 1e-6), args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_cl_fgsm.ckpt") print("Model loaded and trained further") evaluate() ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + ' adversarial examples') print("This could take some time ...") # Instantiate a CW attack object #cw = CarliniWagnerAE(wrap_ae,wrap_cl, sess=sess) if viz_enabled: assert source_samples == nb_classes idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes)] if targeted: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') grid_viz_data_1 = np.zeros(grid_shape, dtype='f') adv_inputs = np.array( [[instance] * (nb_classes-1) for instance in x_test[idxs]], dtype=np.float32) #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]]) adv_input_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes-1): targ.append(y_test[idxs[curr_num]]) adv_input_y.append(targ) adv_input_y = np.array(adv_input_y) adv_target_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if(id!=curr_num): targ.append(y_test[idxs[id]]) adv_target_y.append(targ) adv_target_y = np.array(adv_target_y) #print("adv_input_y: \n", adv_input_y) #print("adv_target_y: \n", adv_target_y) adv_input_targets = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if(id!=curr_num): targ.append(x_test[idxs[id]]) adv_input_targets.append(targ) adv_input_targets = np.array(adv_input_targets) adv_inputs = adv_inputs.reshape( (source_samples * (nb_classes-1), img_rows, img_cols, nchannels)) adv_input_targets = adv_input_targets.reshape( (source_samples * (nb_classes-1), img_rows, img_cols, nchannels)) adv_input_y = adv_input_y.reshape(source_samples*(nb_classes-1), 10) adv_target_y = adv_target_y.reshape(source_samples*(nb_classes-1), 10) one_hot = np.zeros((nb_classes, nb_classes)) one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape((source_samples * nb_classes, nb_classes)) yname = "y_target" fgsm_params = { 'eps': 0.3, 'clip_min': 0., 'clip_max': 1. } fgsm = FastGradientMethodAe(wrap_ae, sess=sess) adv = fgsm.generate(x,x_t, **fgsm_params) adv = sess.run(adv, {x: adv_inputs, x_t: adv_input_targets}) recon_orig = wrap_ae.get_layer(x, 'RECON') recon_orig = sess.run(recon_orig, feed_dict = {x: adv_inputs}) recon_adv = wrap_ae.get_layer(x, 'RECON') recon_adv = sess.run(recon_adv, feed_dict = {x: adv}) pred_adv_recon = wrap_cl.get_logits(x) pred_adv_recon = sess.run(pred_adv_recon, {x:recon_adv}) #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1) #scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose = 1) #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls) #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls) shape = np.shape(adv_inputs) noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0]) noise = pow(noise,0.5) d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0]) acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)== np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0]) acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)== np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): if targeted: for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if(i==j): grid_viz_data[i,j] = recon_orig[curr_class*9] grid_viz_data_1[i,j] = adv_inputs[curr_class*9] curr_class = curr_class+1 else: if(j>i): grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1] grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1] else: grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j] grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Finally, block & display a grid of all the adversarial examples if viz_enabled: plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fgsm_vae_fig1') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fgsm_vae_fig2') if adversarial_training: print("starting adversarial training") index_shuf = list(range(len(x_train))) x_train_target = x_train[index_shuf] y_train_target = y_train[index_shuf] # Randomly repeat a few training examples each epoch to avoid # having a too-small batch ''' while len(index_shuf) % batch_size != 0: index_shuf.append(rng.randint(len(x_train))) nb_batches = len(index_shuf) // batch_size rng.shuffle(index_shuf) # Shuffling here versus inside the loop doesn't seem to affect # timing very much, but shuffling here makes the code slightly # easier to read ''' print("len of x_train_target and x_train: ", len(x_train_target), len(x_train)) for ind in range (0, len(x_train)): r_ind = -1 while(np.argmax(y_train_target[ind])==np.argmax(y_train[ind])): r_ind = rng.randint(0,len(x_train)) y_train_target[ind] = y_train[r_ind] if r_ind>-1: x_train_target[ind] = x_train[r_ind] wrap_ae2 = ModelVAE('wrap_ae2') fgsm2 = FastGradientMethodAe(wrap_ae2, sess=sess) adv2 = fgsm.generate(x,x_t, **fgsm_params) adv_set = sess.run(adv2, {x: x_train, x_t: x_train_target}) x_train_aim = np.append(x_train, x_train, axis = 0) x_train_app = np.append(x_train, adv_set, axis = 0) loss2 = vae_loss(wrap_ae2) train_params = { 'nb_epochs': 5, 'batch_size': batch_size, 'learning_rate': learning_rate} train_ae(sess, loss2, x_train_app, x_train_aim, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), args=train_params, rng=rng, var_list = wrap_ae2.get_params()) evaluate_ae() adv3 = fgsm2.generate(x, x_t, **fgsm_params) adv3 = sess.run(adv3, {x: adv_inputs, x_t: adv_input_targets}) recon_orig2 = wrap_ae2.get_layer(x, 'RECON') recon_orig2 = sess.run(recon_orig2, feed_dict = {x: adv_inputs}) recon_adv2 = wrap_ae2.get_layer(x, 'RECON') recon_adv2 = sess.run(recon_adv2, feed_dict = {x: adv3}) pred_adv_recon2 = wrap_cl.get_logits(x) pred_adv_recon2 = sess.run(pred_adv_recon2, {x:recon_adv2}) shape = np.shape(adv_inputs) noise = np.sum(np.square(adv3-adv_inputs))/(np.shape(adv3)[0]) noise = pow(noise,0.5) d1 = np.sum(np.square(recon_adv2-adv_inputs))/(np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv2-adv_input_targets))/(np.shape(adv_inputs)[0]) acc_1 = (sum(np.argmax(pred_adv_recon2, axis=-1)== np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0]) acc_2 = (sum(np.argmax(pred_adv_recon2, axis=-1)== np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): if targeted: for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if(i==j): grid_viz_data[i,j] = recon_orig2[curr_class*9] grid_viz_data_1[i,j] = adv_inputs[curr_class*9] curr_class = curr_class+1 else: if(j>i): grid_viz_data[i,j] = recon_adv2[i*(nb_classes-1) + j-1] grid_viz_data_1[i,j] = adv3[i*(nb_classes-1)+j-1] else: grid_viz_data[i,j] = recon_adv2[i*(nb_classes-1) + j] grid_viz_data_1[i,j] = adv3[i*(nb_classes-1)+j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Finally, block & display a grid of all the adversarial examples if viz_enabled: plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fgsm_adv_fig1') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fgsm_adv_fig2') #return report if binarization: print("----------------") print("BINARIZATION") adv[adv>0.5] = 1.0 adv[adv<=0.5] = 0.0 recon_orig = wrap_ae.get_layer(x, 'RECON') recon_adv = wrap_ae.get_layer(x, 'RECON') #pred_adv = wrap_cl.get_logits(x) recon_orig = sess.run(recon_orig, {x: adv_inputs}) recon_adv = sess.run(recon_adv, {x: adv}) #pred_adv = sess.run(pred_adv, {x: recon_adv}) pred_adv_recon = wrap_cl.get_logits(x) pred_adv_recon = sess.run(pred_adv_recon, {x:recon_adv}) eval_params = {'batch_size': 90} if targeted: noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0]) noise = pow(noise,0.5) d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0]) acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)== np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0]) acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)== np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if(i==j): grid_viz_data[i,j] = recon_orig[curr_class*9] grid_viz_data_1[i,j] = adv_inputs[curr_class*9] curr_class = curr_class+1 else: if(j>i): grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1] grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1] else: grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j] grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j] plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy* num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fgsm_vae_fig1_bin') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data_1[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fgsm_vae_fig2_bin') if(mean_filtering ==True): print("----------------") print("MEAN FILTERING") adv = uniform_filter(adv, 2) recon_orig = wrap_ae.get_layer(x, 'RECON') recon_adv = wrap_ae.get_layer(x, 'RECON') pred_adv_recon = wrap_cl.get_logits(x) recon_orig = sess.run(recon_orig, {x: adv_inputs}) recon_adv = sess.run(recon_adv, {x: adv}) pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv}) eval_params = {'batch_size': 90} noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0]) noise = pow(noise,0.5) d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0]) acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)== np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0]) acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)== np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if(i==j): grid_viz_data[i,j] = recon_orig[curr_class*9] grid_viz_data_1[i,j] = adv_inputs[curr_class*9] curr_class = curr_class+1 else: if(j>i): grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1] grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1] else: grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j] grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j] plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy* num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fgsm_vae_fig1_mean') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data_1[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fgsm_vae_fig2_mean')
return i < args.num_noises _, losses = tf.while_loop( condition, body, [0, losses], shape_invariants=[tf.TensorShape(None), tf.TensorShape([None])]) # Define the update func loss = w * losses optimizer = tf.train.AdamOptimizer(learning_rate=args.lr) train_step = optimizer.minimize(loss) # Test acc on legit data logits = wrap.get_logits(x[0]) acc, acc_op = tf.metrics.accuracy( labels=tf.argmax(y, 1), predictions=tf.argmax(logits, 1)) # Define adv attack deepfool = DeepFool(wrap, sess=sess) deepfool_params = {'eps': args.noise_eps, 'clip_min': 0., 'clip_max': 1.} # Attack images x_deepfool = deepfool.generate(x[0], **deepfool_params) # Consider the attack to be constant x_deepfool = tf.stop_gradient(x_deepfool) # Evaluate predictions on adv attacks preds_deepfool = model(x_deepfool) acc_deepfool, acc_op_deepfool = tf.metrics.accuracy(
def cifar10_cw_recon(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=VIZ_ENABLED, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, source_samples=SOURCE_SAMPLES, learning_rate=LEARNING_RATE, attack_iterations=ATTACK_ITERATIONS, model_path=MODEL_PATH, model_path_cls=MODEL_PATH, targeted=TARGETED, num_threads=None, label_smoothing=0.1, nb_filters=NB_FILTERS, filename=FILENAME, train_dir_ae=TRAIN_DIR_AE, train_dir_cl=TRAIN_DIR_CL): # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) rng = np.random.RandomState() # Create TF session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') nb_latent_size = 100 # Get MNIST test data # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) x_t = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) y_t = tf.placeholder(tf.float32, shape=(None, nb_classes)) #model_vae= vae_model(x, img_rows=img_rows, img_cols=img_cols, # channels=nchannels) wrap_vae = ModelVAE('wrap_vae') recon = wrap_vae.get_layer(x, 'RECON') #print("recon: ",recon) print("Defined TensorFlow model graph.") def evaluate_ae(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': 128} noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae( sess, x, x_t, recon, x_train, x_train, args=eval_params) print("reconstruction distance: ", d1) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir_ae, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) if not os.path.exists(train_dir_ae): os.mkdir(train_dir_ae) #ckpt = tf.train.get_checkpoint_state(train_dir_ae) #print(train_dir_ae, ckpt) #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path #wrap_vae = KerasModelWrapper(model_vae) latent_dim = 20 intermediate_dim = 128 #train_ae(sess, global_loss, x_train, x_train, evaluate = evaluate_ae, args = train_params, rng = rng, var_list=wrap_vae.get_params()) if clean_train_vae == True: print("Training VAE") loss = vae_loss(wrap_vae) train_ae(sess, loss, x_train, x_train, evaluate=evaluate_ae, args=train_params, rng=rng, var_list=wrap_vae.get_params()) saver = tf.train.Saver() saver.save(sess, "train_dir/model_vae.ckpt") print("saved model") else: print("Loading VAE") saver = tf.train.Saver() #print(ckpt_path) saver.restore(sess, "train_dir/model_vae.ckpt") evaluate_ae() if (train_further): train_params = { 'nb_epochs': 10, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir_ae, 'filename': filename } #training with the saved model as starting point loss = SquaredError(wrap_vae) train_ae(sess, loss, x_train, x_train, evaluate=evaluate_vae, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_ae_final.ckpt") evaluate_ae() print("Model loaded and trained for more epochs") num_classes = 10 ''' save_dir= 'models' model_name = 'cifar10_CNN.h5' model_path_cls = os.path.join(save_dir, model_name) ''' cl_model = cnn_cl_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) preds_cl = cl_model(x) def do_eval_cls(preds, x_set, y_set, x_tar_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_t, x_set, y_set, x_tar_set, args=eval_params_cls) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_cl, x_t, x_test, y_test, x_test, args=eval_params) report.clean_train_clean_eval = acc # assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) train_params = { 'nb_epochs': 3, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir_cl, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) if not os.path.exists(train_dir_cl): os.mkdir(train_dir_cl) #ckpt = tf.train.get_checkpoint_state(train_dir_cl) #print(train_dir_cl, ckpt) #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path wrap_cl = KerasModelWrapper(cl_model) if clean_train_cl == True: print("Training CNN Classifier") loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing) train(sess, loss_cl, x_train, y_train, evaluate=evaluate, optimizer=tf.train.RMSPropOptimizer(learning_rate=0.0001, decay=1e-6), args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_cnn_cl_vae.ckpt") print("saved model at ", "train_dir/model_cnn_cl.ckpt") else: print("Loading CNN Classifier") saver = tf.train.Saver() #print(ckpt_path) saver.restore(sess, "train_dir/model_cnn_cl_vae.ckpt") print("Model loaded") evaluate() # Score trained model. ''' scores = cl_model.evaluate(x_test, y_test, verbose=1) print('Test loss:', scores[0]) print('Test accuracy:', scores[1]) cl_model_wrap = KerasModelWrapper(cl_model) ` ''' ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + ' adversarial examples') print("This could take some time ...") # Instantiate a CW attack object cw = CarliniWagnerAE(wrap_vae, wrap_cl, sess=sess) if viz_enabled: assert source_samples == nb_classes idxs = [ np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes) ] if targeted: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') grid_viz_data_1 = np.zeros(grid_shape, dtype='f') adv_inputs = np.array([[instance] * (nb_classes - 1) for instance in x_test[idxs]], dtype=np.float32) #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]]) adv_input_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes - 1): targ.append(y_test[idxs[curr_num]]) adv_input_y.append(targ) adv_input_y = np.array(adv_input_y) adv_target_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(y_test[idxs[id]]) adv_target_y.append(targ) adv_target_y = np.array(adv_target_y) #print("adv_input_y: \n", adv_input_y) #print("adv_target_y: \n", adv_target_y) adv_input_targets = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(x_test[idxs[id]]) adv_input_targets.append(targ) adv_input_targets = np.array(adv_input_targets) adv_inputs = adv_inputs.reshape((source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_targets = adv_input_targets.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_y = adv_input_y.reshape( source_samples * (nb_classes - 1), 10) adv_target_y = adv_target_y.reshape( source_samples * (nb_classes - 1), 10) one_hot = np.zeros((nb_classes, nb_classes)) one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape( (source_samples * nb_classes, nb_classes)) yname = "y_target" cw_params_batch_size = source_samples * (nb_classes - 1) cw_params = { 'binary_search_steps': 1, yname: adv_ys, 'max_iterations': attack_iterations, 'learning_rate': CW_LEARNING_RATE, 'batch_size': cw_params_batch_size, 'initial_const': 1 } adv = cw.generate_np(adv_inputs, adv_input_targets, **cw_params) #adv = sess.run(adv) #print("layer names: \n", wrap_vae.get_layer_names()) recon_orig = wrap_vae.get_layer(x, 'RECON') recon_orig = sess.run(recon_orig, feed_dict={x: adv_inputs}) recon_adv = wrap_vae.get_layer(x, 'RECON') recon_adv = sess.run(recon_adv, feed_dict={x: adv}) pred_adv_recon = wrap_cl.get_logits(x) pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv}) #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1) #scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose = 1) #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls) #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls) shape = np.shape(adv_inputs) noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0]) noise = pow(noise, 0.5) d1 = np.sum(np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0]) d2 = np.sum( np.square(recon_adv - adv_input_targets)) / (np.shape(adv_inputs)[0]) acc_1 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax(adv_target_y, axis=-1)) ) / (np.shape(adv_target_y)[0]) acc_2 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax(adv_input_y, axis=-1)) ) / (np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): if targeted: for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Finally, block & display a grid of all the adversarial examples if viz_enabled: plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fig1') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fig2') #return report #adversarial training if (adv_train == True): print("starting adversarial training") #sess1 = tf.Session() adv_input_set = [] adv_input_target_set = [] for i in range(20): indices = np.arange(np.shape(x_train)[0]) np.random.shuffle(indices) print("indices: ", indices[1:10]) x_train = x_train[indices] y_train = y_train[indices] idxs = [ np.where(np.argmax(y_train, axis=1) == i)[0][0] for i in range(nb_classes) ] adv_inputs_2 = np.array([[instance] * (nb_classes - 1) for instance in x_train[idxs]], dtype=np.float32) adv_input_targets_2 = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(x_train[idxs[id]]) adv_input_targets_2.append(targ) adv_input_targets_2 = np.array(adv_input_targets_2) adv_inputs_2 = adv_inputs_2.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_targets_2 = adv_input_targets_2.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_set.append(adv_inputs_2) adv_input_target_set.append(adv_input_targets_2) adv_input_set = np.array(adv_input_set), adv_input_target_set = np.array(adv_input_target_set) print("shape of adv_input_set: ", np.shape(adv_input_set)) print("shape of adv_input_target_set: ", np.shape(adv_input_target_set)) adv_input_set = np.reshape( adv_input_set, (np.shape(adv_input_set)[0] * np.shape(adv_input_set)[1] * np.shape(adv_input_set)[2], np.shape(adv_input_set)[3], np.shape(adv_input_set)[4], np.shape(adv_input_set)[5])) adv_input_target_set = np.reshape(adv_input_target_set, (np.shape(adv_input_target_set)[0] * np.shape(adv_input_target_set)[1], np.shape(adv_input_target_set)[2], np.shape(adv_input_target_set)[3], np.shape(adv_input_target_set)[4])) print("generated adversarial training set") adv_set = cw.generate_np(adv_input_set, adv_input_target_set, **cw_params) x_train_aim = np.append(x_train, adv_input_set, axis=0) x_train_app = np.append(x_train, adv_set, axis=0) #model_name = 'cifar10_AE_adv.h5' #model_path_ae = os.path.join(save_dir, model_name) model_ae_adv = ae_model(x, img_rows=img_rows, img_cols=img_cols, channels=nchannels) recon = model_ae_adv(x) wrap_vae_adv = KerasModelWrapper(model_ae_adv) #print("recon: ",recon) #print("Defined TensorFlow model graph.") print("Training Adversarial AE") loss = SquaredError(wrap_vae_adv) train_ae(sess, loss_2, x_train_app, x_train_aim, evaluate=evaluate_ae, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_ae_adv.ckpt") print("saved model") cw2 = CarliniWagnerAE(wrap_vae_adv, wrap_cl, sess=sess) adv_2 = cw2.generate_np(adv_inputs, adv_input_targets, **cw_params) recon_adv = wrap_vae_adv.get_layer(x, 'RECON') recon_orig = wrap_vae_adv.get_layer(x, 'RECON') recon_adv = sess.run(recon_adv, {x: adv_2}) recon_orig = sess.run(recon_orig, {x: adv_inputs}) pred_adv_recon = wrap_cl.get_logits(x) pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv}) if targeted: noise = reduce_sum(tf.square(adv_inputs - adv_2), list(range(1, len(shape)))) print("noise: ", noise) pred_adv_recon = cl_model.get_layer(recon_adv) #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1) #scores2 = cl_model.eval_params(recon_adv, adv_target_y, verbose = 1) #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls) #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls) noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0]) noise = pow(noise, 0.5) d1 = np.sum( np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv - adv_input_targets)) / (np.shape(adv_inputs)[0]) acc_1 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0]) acc_2 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv_2[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv_2[i * (nb_classes - 1) + j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv_2 - adv_inputs)**2, axis=(1, 2, 3))**.5) print( 'Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session #sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: #_ = grid_visual(grid_viz_data) #_ = grid_visual(grid_viz_data_1) plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig1_vae_adv_trained') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig2_vae_adv_trained') #return report #binarization defense #if(binarization_defense == True or mean_filtering==True): if (binarization_defense == True): print("BINARIZATION") print("---------------------------") adv[adv > 0.5] = 1.0 adv[adv <= 0.5] = 0.0 recon_orig = wrap_vae.get_layer(x, 'RECON') recon_adv = wrap_vae.get_layer(x, 'RECON') #pred_adv = wrap_cl.get_logits(x) recon_orig = sess.run(recon_orig, {x: adv_inputs}) recon_adv = sess.run(recon_adv, {x: adv}) #pred_adv = sess.run(pred_adv, {x: recon_adv}) pred_adv_recon = wrap_cl.get_logits(x) pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv}) eval_params = {'batch_size': 90} if targeted: noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0]) noise = pow(noise, 0.5) d1 = np.sum( np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv - adv_input_targets)) / ( np.shape(adv_inputs)[0]) acc_1 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0]) acc_2 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fig1_bin') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data_1[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fig2_bin') if (mean_filtering == True): print("MEAN FILTERING") print("---------------------------") adv = uniform_filter(adv, 2) recon_orig = wrap_vae.get_layer(x, 'RECON') recon_adv = wrap_vae.get_layer(x, 'RECON') pred_adv_recon = wrap_cl.get_logits(x) recon_orig = sess.run(recon_orig, {x: adv_inputs}) recon_adv = sess.run(recon_adv, {x: adv}) pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv}) eval_params = {'batch_size': 90} noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0]) noise = pow(noise, 0.5) d1 = np.sum( np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv - adv_input_targets)) / (np.shape(adv_inputs)[0]) acc_1 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0]) acc_2 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fig1_mean') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data_1[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fig2_mean')
def main(): dataset = GTSRB(FLAGS.random_seed) set_log_level(logging.DEBUG) if FLAGS.generate_random: print("Using random noise") img = np.random.rand(FLAGS.img_size, FLAGS.img_size, 3) else: print("Loading image from", FLAGS.image) with Image.open(FLAGS.image) as img: img = dataset.preprocess(img) # setup session sess = tf.Session() K.set_session(sess) # setup tf input placeholder x = tf.placeholder(tf.float32, shape=(None, dataset.img_size, dataset.img_size, dataset.n_channels)) # load model tf_model = load_model(os.path.join(FLAGS.model_folder, FLAGS.model), compile=False) model = KerasModelWrapper(tf_model) n_classes = tf_model.output_shape[1] # symbolic model predictions logits = model.get_logits(x) print("Target: ", FLAGS.target, remote_map[FLAGS.target]) adv_inputs = np.array([img]) adv_targets = np.expand_dims(np.eye(n_classes)[FLAGS.target], axis=0) # attack dict # TODO: maybe put this in a separate config file attacks = { 'cwl2': CarliniWagnerL2, 'fgsm': FastGradientMethod, 'lbfgs': LBFGS, 'spsa': SPSA, 'pgd': ProjectedGradientDescent, 'jsma': SaliencyMapMethod, 'physical': Physical, 'robust_cwl2': CarliniL2Robust } attack_params = { 'cwl2': { 'y_target': adv_targets, 'max_iterations': FLAGS.max_iterations, 'binary_search_steps': FLAGS.binary_search_steps, 'learning_rate': 0.01, 'batch_size': 1, 'initial_const': 10, 'confidence': FLAGS.confidence, 'clip_min': FLAGS.boxmin, 'clip_max': FLAGS.boxmax }, 'fgsm': { 'y_target': adv_targets, 'eps': 0.3, 'ord': np.inf, 'clip_min': FLAGS.boxmin, 'clip_max': FLAGS.boxmax }, 'lbfgs': { 'y_target': adv_targets, 'max_iterations': FLAGS.max_iterations, 'binary_search_steps': FLAGS.binary_search_steps, 'batch_size': 1, 'initial_const': 1e-2, 'clip_min': FLAGS.boxmin, 'clip_max': FLAGS.boxmax }, 'spsa': {}, 'pgd': {}, 'jsma': { 'y_target': adv_targets, 'theta': 1, 'gamma': 0.1, 'clip_min': FLAGS.boxmin, 'clip_max': FLAGS.boxmax }, 'physical': { 'y_target': adv_targets, 'mask_path': FLAGS.mask_image, 'max_iterations': FLAGS.max_iterations, 'num_labels': n_classes }, 'robust_cwl2': { 'y_target': adv_targets, 'max_iterations': FLAGS.max_iterations, 'binary_search_steps': FLAGS.binary_search_steps, 'learning_rate': 0.01, 'batch_size': 1, 'initial_const': 10, 'confidence': FLAGS.confidence, 'clip_min': FLAGS.boxmin, 'clip_max': FLAGS.boxmax, 'num_labels': n_classes, 'outdir': FLAGS.outdir } } # setup the attack # TODO: port physical to cleverhans interface attack = attacks[FLAGS.attack](model, sess=sess) attack_kwargs = attack_params[FLAGS.attack] print("Starting attack") print("Parameters: ") for k, v in attack_kwargs.items(): print(k, ":", v) print("") # attack images with Timer("Attack (n_images=" + str(len(adv_inputs)) + ")"): adv = attack.generate_np(adv_inputs, **attack_kwargs) print("Attack finished") # prepare img data for writing to file inputs_img = np.rint(adv_inputs * 255).astype('uint8') adv_img = np.rint(adv * 255).astype('uint8') outdir = FLAGS.outdir if not os.path.exists(outdir): os.makedirs(outdir) for i in range(len(adv)): filepath = os.path.join(outdir, FLAGS.attack + "_") print(filepath) # Original image img = Image.fromarray(inputs_img[i], 'RGB') img.save(filepath + "original.png") orig_y = model_logits(sess, x, logits, adv_inputs[i:i + 1]) pred_input_i = np.argmax(orig_y, axis=-1) adv_y = model_logits(sess, x, logits, adv[i:i + 1]) pred_adv_i = np.argmax(adv_y, axis=-1) if pred_adv_i != FLAGS.target: print("No adv: ", remote_map[pred_input_i], remote_map[pred_adv_i]) continue # Adversarial images adv_image_path = filepath + str(pred_adv_i) + "adv.png" img = Image.fromarray(adv_img[i], 'RGB') img.save(adv_image_path) if not os.path.exists(adv_image_path): print("Saving file failed... retrying") img = Image.fromarray(adv[i], 'RGB') img.save(adv_image_path) if not os.path.exists(adv_image_path): print("Saving file failed again") print("Saving to pickle:") print(adv_image_path) with open(adv_image_path + ".pickle", "wb") as f: pickle.dump(f) print(remote_map[pred_input_i], "->", remote_map[pred_adv_i]) print("Classification (original/target):", pred_input_i, "/", pred_adv_i) orig_softmax_y = softmax(orig_y) adv_softmax_y = softmax(adv_y) print("Original image: ") print(remote_map[pred_input_i], orig_softmax_y[pred_input_i], "\t", remote_map[pred_adv_i], orig_softmax_y[pred_adv_i]) print("Adversarial image: ") print(remote_map[pred_input_i], adv_softmax_y[pred_input_i], "\t", remote_map[pred_adv_i], adv_softmax_y[pred_adv_i]) print("Total distortion:", np.sum((adv[i] - adv_inputs[i])**2)**.5) with open(adv_image_path + ".conf", "w") as f: f.write("python3 " + " ".join(sys.argv))
def __init__(self, sess, depth, test_batch_size, use_softmax=True, x=None, y=None, load_existing=False, model_name='vgg16', loss='cw'): # "depth", "version" are required for resnet self.x = x self.y = y self.sess = sess self.test_batch_size = test_batch_size input_shape = (32, 32, 3) if load_existing: save_dir = 'CIFAR10_models/Normal_deep_models/' # TODO: replace with your own ROOT directory for normal cifar10 models if model_name == 'resnet_v1': model_load_name = 'cifar10_' + 'ResNet' + str( depth) + 'v1_model' elif model_name == 'resnet_v2': model_load_name = 'cifar10_' + 'ResNet' + str( depth) + 'v2_model' else: model_load_name = 'cifar10_' + model_name + '_model' filepath = os.path.join(save_dir, model_load_name + '.h5') model = load_model(filepath) else: if model_name == 'vgg16': model = vgg16_model(input_shape=input_shape) elif model_name == 'densenet': model = densenet(input_shape=input_shape) elif model_name == 'resnet_v1': model = resnet_v1(input_shape=input_shape, depth=depth) elif model_name == 'resnet_v2': model = resnet_v2(input_shape=input_shape, depth=depth) else: print("please provide a valid model name!") sys.exit(0) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) self.model = model model = KerasModelWrapper(model) self.predictions = model.get_logits(self.x) self.probs = tf.nn.softmax(logits=self.predictions) self.eval_preds = tf.argmax(self.predictions, 1) self.y_target = tf.placeholder(tf.int64, shape=None) # tensor.shape (?,) self.eval_percent_adv = tf.equal( self.eval_preds, self.y_target) # one-to-one comparison if loss == 'cw': # tlab = tf.one_hot(self.y, NUM_CLASSES, on_value=1.0, off_value=0.0, dtype=tf.float32) target_probs = tf.reduce_sum(self.y * self.probs, 1) other_probs = tf.reduce_max( (1 - self.y) * self.probs - (self.y * 10000), 1) self.loss = tf.log(other_probs + 1e-30) - tf.log(target_probs + 1e-30) elif loss == 'xent': self.loss = tf.nn.softmax_cross_entropy_with_logits( labels=self.y, logits=self.predictions) else: raise NotImplementedError
# Randomly drop some ratio of pixels np.random.seed(args.seed) input_indices = np.random.choice(num_elements, num_inputs, replace=False) X_train = X_train_org[:, input_indices] X_test = X_test_org[:, input_indices] # Define placeholders x = tf.placeholder(tf.float32, shape=(None, num_inputs)) y = tf.placeholder(tf.float32, shape=(None, num_classes)) # Wrap log reg model for applying adversarial examples model = logistic_regression_model(num_inputs) wrap = KerasModelWrapper(model) # Define the objective logits = wrap.get_logits(x) loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits) # Define the update func optimizer = tf.train.AdamOptimizer(learning_rate=args.lr) train_step = optimizer.minimize(loss) if args.verbose: print('verbose: train_step done') acc, acc_op = tf.metrics.accuracy( labels=tf.argmax(y, 1), predictions=tf.argmax(logits, 1)) if args.verbose: print('verbose: acc={}, acc_op={}'.format(acc, acc_op))
def cifar10_train_on_untargeted(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, testing=True, adv_training=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, num_threads=None,threat_model='white_box', model_key='model_1_a',attacker_key='clean', label_smoothing=0.1): """ CIFAR10 cleverhans training :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) K.set_learning_phase(0) ## Create TF session and set as Keras backend session K.set_session(sess) # Create a new model and train it to be robust to Attacker #keras_model = c10load.load_model(version=2,subtract_pixel_mean=True) meta = read_from_meta() attacker_meta = meta['attacker'][attacker_key] model_meta = meta['model'][model_key] attack_type = attacker_meta['attack_type'] if threat_model == 'black_box_A': print('Using training set A') train_end = int(train_end/2) assert 'black_box_A' in meta['model'][model_key]['threat_models'] dataset_section = 'A' elif threat_model == 'black_box_B': print('Using training set B') train_start = int(train_end/2) dataset_section = 'B' assert 'black_box_B' in meta['model'][model_key]['threat_models'] elif threat_model == 'white_box': print('Using full training set') dataset_section = '' else: raise NotImplementedError # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) attack_params = {} attack_params.update(meta['attacker']['default']['attack_params']) attack_params.update(attacker_meta['attack_params']) for k,v in attack_params.items(): if isinstance(v,str): attack_params[k] = eval(v) if 'meta_key' in attacker_meta.keys() and attack_type == 'advgan': folderpath = meta['advgan'][attacker_meta['meta_key']]['train_params']['output_folder'] attack_params.update({'generator_filepath':os.path.join(folderpath,'generator.hd5')}) model_filename = model_meta['file_name'] if 'black_box' in threat_model: model_filename = model_filename.replace('cifar10','cifar10B') model_filepath=model_meta['folder_path']+'/'+model_filename keras_model=tf.keras.models.load_model( filepath=model_filepath, custom_objects=custom_object()) model = KerasModelWrapper(keras_model) def attack_statistics(x_true,x_adv): # calculate average L1,L2,Linf norms # as well as % of pixels modified L1 = tf.reduce_mean(K.sum(K.abs(x_adv-x_true),axis=(-1,-2,-3))) L2 = tf.reduce_mean(K.sqrt(K.sum(K.square(x_adv-x_true),axis=(-1,-2,-3)))) Linf = tf.reduce_mean(K.max(K.abs(x_true-x_adv),axis=(-1,-2,-3))) eps = tf.constant(1/255,shape=x_true.shape.as_list()[1:]) mod_perc = 100*tf.reduce_mean(K.cast(K.greater(K.abs(x_true-x_adv),eps),dtype='float')) return {'L1':L1,'L2':L2,'Linf':Linf,'%pix':mod_perc} def do_eval(preds, x_set, y_set, report_key, is_adv=None): eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) #define attacker if attack_type == 'cwl2': from cleverhans.attacks import CarliniWagnerL2 attacker = CarliniWagnerL2(model, sess=sess) elif attack_type == 'fgsm': from cleverhans.attacks import FastGradientMethod attacker = FastGradientMethod(model, sess=sess) elif attack_type == 'pgd': from cleverhans.attacks import MadryEtAl attacker = MadryEtAl(model, sess=sess) elif attack_type == 'advgan': from cleverhans.attacks.adversarial_gan import AdvGAN attacker = AdvGAN(model,sess=sess) elif attack_type == None or attack_type=='clean': attacker = None else: print(attack_type+' is not a valid attack type') def attack(x): if attacker: print('attack_params',attack_params) return attacker.generate(x,**attack_params) else: return x loss = CrossEntropy(model, smoothing=label_smoothing, attack=attack) preds = model.get_logits(x) adv_x = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the attacker will change their strategy in response to updates to # the defender's parameters. adv_x = tf.stop_gradient(adv_x) preds_adv = model.get_logits(adv_x) def evaluate(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds_adv, x_test, y_test, 'adv_train_adv_eval', True) #print_attack info with sess.as_default(): print('attack type: '+ attack_type) attack_stats = attack_statistics(x,adv_x) feed_dict={x:x_test[:batch_size],y:y_test[:batch_size]} attack_stats_eval = sess.run(attack_stats,feed_dict=feed_dict) attack_stats_eval = {k:str(v)[:10] for k,v in attack_stats_eval.items()} print(attack_stats_eval) if adv_training: # Train an CIFAR10 model reeval_breaks = 10 train_params = { 'batch_size': batch_size, 'learning_rate': learning_rate } nb_e = nb_epochs prev_acc = 0 # Perform and evaluate adversarial training for rb in range(reeval_breaks,0,-1): train_params.update({'nb_epochs': int(np.ceil(nb_e/rb))}) if nb_e < train_params['nb_epochs'] < 0: train_params['nb_epochs'] = nb_e print("Starting training {} of {}".format(nb_epochs-nb_e, nb_epochs)) train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng) nb_e-=train_params['nb_epochs'] #put accuracies in dictionary fr json serializability report_dict = {attr:str(getattr(report,attr))[:10] for attr in dir(report) if type(getattr(report,attr)) in [float,np.float32,np.float64]} print(report_dict) #save to meta new_meta = read_from_meta() new_model = deepcopy(model_meta) new_model.update({'adv_training':True, 'attacker_key':attacker_key, 'parent_key':model_key, 'threat_models':[threat_model], 'attack_stats':attack_stats_eval, 'report':report_dict, 'train_params': { 'batch_size': batch_size, 'learning_rate': learning_rate, 'nb_epochs': nb_epochs-nb_e, }, 'reeval':False }) if nb_e > 0: new_model.update({'training_finished':False, 'file_name': model_meta['file_name'].replace('clean',attacker_key+'_train_epoch_'+str(new_model['train_params']['nb_epochs']))}) else: new_model.update({'training_finished':True, 'file_name': model_meta['file_name'].replace('clean',attacker_key+'_train')}) new_model_key = get_new_key(model_key,meta) new_meta['model'].update({new_model_key:new_model}) write_to_meta(new_meta) save_filename = new_model['file_name'] if 'black_box' in threat_model: save_filename = save_filename.replace('cifar10','cifar10'+dataset_section) save_model(keras_model,filepath=new_model['folder_path']+'/'+save_filename) if report.adv_train_adv_eval >= 0.9: break elif report.adv_train_adv_eval <= 0.01: #increase_lr lr = train_params['learning_rate'] train_params.update({'learning_rate':lr*1.5}) print('no learning! Increasing learning rate to {}' .format(train_params['learning_rate'])) elif prev_acc<=report.adv_train_adv_eval: #update_lr lr = train_params['learning_rate'] train_params.update({'learning_rate':lr*0.8}) print('decreasing learning rate to {}' .format(train_params['learning_rate'])) prev_acc = copy(report.adv_train_adv_eval) if nb_e<=0: break # Calculate training errors elif testing: do_eval(preds, x_train, y_train, 'train_adv_train_clean_eval') do_eval(preds_adv, x_train, y_train, 'train_adv_train_adv_eval') report_dict = {attr:str(getattr(report,attr))[:10] for attr in dir(report) if type(getattr(report,attr)) in [float,]} print('report_dict') print(report_dict) return report
def train_substitute(modelname='testmodel', lmbda=0.1, tau=2, n_jac_iteration=5, modeltype='cnn_model', n_per_class=1, batch_size=64, descent_only=False): print("initializing training") if not modeltype in MODEL_TYPES: raise RuntimeError("Unknown model type: " + str(modeltype)) modeltype = MODEL_TYPES[modeltype] modeldir = init_modeldir() modelpath = os.path.join(modeldir, modelname + '.h5') set_log_level(logging.DEBUG) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) sess_kwargs = dict(gpu_options=gpu_options) gtsrb = GTSRB(random_seed=42) n_classes = int(len(remote_map) / 2) # (0) init constants LMBDA = 0.1 TAU = 2 N_JAC_ITERATION = 4 cache = {} # (1) select initial training set if descent_only: # we select only a few images per class but these should be classified # with high confidence by the remote model X, _ = get_initial_set(gtsrb, hot_encoded=False, n_per_class=n_per_class, confidence_threshold=0.95) else: X, _ = get_initial_set(gtsrb, hot_encoded=False, n_per_class=n_per_class, confidence_threshold=0) def lr_schedule(epoch): return 0.01 * (0.1**int(epoch / 25)) for rho in range(N_JAC_ITERATION): print("=" * 5, "jacobian iteration: ", rho, "training set size: ", len(X)) # for memory reasons, we must reinitialize the session in each iteration sess = tf.Session(config=tf.ConfigProto(**sess_kwargs)) K.set_session(sess) wrap = KerasModelWrapper(modeltype(gtsrb.img_size, n_classes)) op = SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True) wrap.model.compile(loss='categorical_crossentropy', optimizer=op, metrics=['accuracy']) # (2) specify architecture (already specified) x = tf.placeholder(tf.float32, shape=(None, *X.shape[1:])) initial_weights = wrap.model.get_weights() if descent_only: # own approach lmbda_coef = -1 else: # lambda as described in the paper lmbda_coef = (-1)**(rho % TAU) # (3) label data Y = np.zeros(shape=(len(X), n_classes)) for i in range(len(X)): # take known labels from cache if i in cache: Y[i] = cache[i] else: pred = fetch_single_prediction(X[i], remote_map, n_classes, delay=1) cache[i] = pred Y[i] = pred # (4) fit model on current set wrap.model.set_weights(initial_weights) wrap.model.fit(X, Y, batch_size=64, epochs=(rho + 1) * 20, validation_split=0.2 if len(X) > 64 * 5 else 0, verbose=2, callbacks=[ LearningRateScheduler(lr_schedule), ModelCheckpoint(modelpath, save_best_only=True) ]) # (5) augment data logits = wrap.get_logits(x) jacobian = jacobian_graph(logits, x, n_classes) Y_sub = np.array([np.argmax(row) for row in Y]) X = jacobian_augmentation(sess, x, X, Y_sub, jacobian, lmbda=(LMBDA * lmbda_coef)) if os.path.exists(modelpath): os.remove(modelpath) wrap.model.save(modelpath) K.clear_session() del sess # free as much memory as we can gc.collect()
def cifar10_eval_attacks(train_start=0, train_end=60000, test_start=0, test_end=10000, sweep_eps=SWEEP_EPS, targeted=TARGETED, model_key='model_1_a', attacker_keys='clean', eval_model_keys=None, threat_model='white_box', generate_examples=True): """ CIFAR10 cleverhans training :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param model_key: name of the keras model to be loaded and tested :param attacker_key: name or list of names to be loaded and used to attack the model :return: an AccuracyReport object """ if threat_model == 'white_box': eval_model_keys = [ model_key, ] attacker_partition = '' defender_partition = '' if threat_model == 'black_box': attacker_partition = 'A' defender_partition = 'B' if not isinstance(eval_model_keys, list): raise ValueError('eval_model_keys must be list for black_box') #TODO: add white-box info to meta-data """ v<the eval model "model_1_g": { v< the surrogate model "advgan_b->model_1_e": { "model_acc": "saved_models/model_1_cifar10_ResNet20_v2\\pickle\\model_1_g_advgan_b_model_acc.p", "target_acc": "saved_models/model_1_cifar10_ResNet20_v2\\pickle\\model_1_g_advgan_b_target_acc.p", "attack_stats": { "L1": 127.04542236328125, "L2": 2.9744277954101563, "Linf": 0.2539639711380005, "%pix": 93.39645385742188, "num_batches": 20, "time": "97.7us" "threat_model":"black_box" """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session sess = tf.Session() K.set_learning_phase(0) ## Create TF session and set as Keras backend session K.set_session(sess) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] #dataset_train = dataset_train.map( # lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) #dataset_train = dataset_train.batch(batch_size) #dataset_train = dataset_train.prefetch(16) #x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') #nb_train = x_train.shape[0] nb_test = x_test.shape[0] # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) y_target = tf.placeholder(tf.float32, shape=(None, nb_classes)) meta = read_from_meta() model_meta = meta['model'][model_key] filename = model_meta['file_name'].replace('CIFAR10', 'CIFAR10' + attacker_partition) keras_model = tf.keras.models.load_model( filepath=model_meta['folder_path'] + '/' + filename, custom_objects=custom_object()) model = KerasModelWrapper(keras_model) attacker_keys = list(attacker_keys) report = dict() for attacker_key in attacker_keys: # Create a new model and train it to be robust to Attacker #keras_model = c10load.load_model(version=2,subtract_pixel_mean=True) attacker_meta = meta['attacker'][attacker_key] attack_type = attacker_meta['attack_type'] attack_params = {} attack_params.update(meta['attacker']['default']['attack_params']) attack_params.update(attacker_meta['attack_params']) if 'spsa' in attacker_key: eval_par = {'batch_size': 1} else: eval_par = {'batch_size': attack_params['batch_size']} for k, v in attack_params.items(): if isinstance(v, str): attack_params[k] = eval(v) #define attacker if attack_type == 'advgan' or 'g+' in attack_type: if 'meta_key' in attacker_meta.keys(): folderpath = meta['advgan'][ attacker_meta['meta_key']]['train_params']['output_folder'] attack_params.update({ 'generator_filepath': os.path.join(folderpath, 'generator.hd5'), 'custom_objects': custom_object() }) else: raise NotImplementedError( "Must provide attacker meta with existing meta_key") standard_attackers = { 'cwl2': cha.CarliniWagnerL2, 'fgsm': cha.FastGradientMethod, 'pgd': cha.MadryEtAl, 'jsma': cha.SaliencyMapMethod, 'stm': cha.SpatialTransformationMethod, 'advgan': cha.AdvGAN, 'spsa': cha.SPSA, 'g+pgd': cha.GanInformedPGD, 'g+spsa': cha.GanInformedSPSA #'g+fgsm':cha.GanInformedFGM } if attack_type in standard_attackers.keys(): attacker = standard_attackers[attack_type](model, sess=sess) elif attack_type == None or attack_type == 'clean': attacker = None else: print(attack_type + ' is not a valid attack type') pkl_folderpath = os.path.join(model_meta['folder_path'], 'pickle', attacker_key) if not os.path.isdir(pkl_folderpath): os.makedirs(pkl_folderpath) ######## if targeted: # get target labels target_test = np.repeat(range(nb_classes), nb_test) x_test_shuf = np.array(np.tile(x_test, (nb_classes, 1, 1, 1))) y_test_shuf = np.array(np.tile(y_test, (nb_classes, 1))) y_target_test_shuf = tf.keras.utils.to_categorical( target_test, nb_classes) #do not shuffle #shuffle_in_unison(x_test_shuf,y_test_shuf,y_target_test_shuf) x_test_by_t_o = [[None] * nb_classes for n in range(nb_classes)] y_test_by_t_o = [[None] * nb_classes for n in range(nb_classes)] y_target_test_by_t_o = [[None] * nb_classes for n in range(nb_classes)] nb_test_by_t_o = np.zeros((nb_classes + 1, nb_classes + 1)) print(y_target_test_shuf) for t in range(nb_classes): for o in range(nb_classes): if t == o: continue index = np.logical_and(y_target_test_shuf[:, t], y_test_shuf[:, o]) nb_test_by_t_o[t, o] = np.count_nonzero(index) x_test_by_t_o[t][o] = x_test_shuf[index] y_test_by_t_o[t][o] = y_test_shuf[index] y_target_test_by_t_o[t][o] = y_target_test_shuf[index] np.testing.assert_array_equal(y_target_test_by_t_o[0][1], y_target_test_by_t_o[0][2], err_msg='', verbose=True) nb_test_by_t_o[nb_classes, :] = np.sum(nb_test_by_t_o, axis=0) nb_test_by_t_o[:, nb_classes] = np.sum(nb_test_by_t_o, axis=1) attack_params.update({'y_target': y_target}) def model_eval_wrapper(preds, acc_target='original_class', adv_x=None): if acc_target == 'original_class': acc_target = y_test_by_t_o elif acc_target == 'target_class': acc_target = y_target_test_by_t_o else: raise ValueError('invalid value for accuracy_target: ' + acc_target) accuracy_by_t_o = np.zeros((nb_classes + 1, nb_classes + 1)) orig_accuracy_by_t_o = np.zeros( (nb_classes + 1, nb_classes + 1)) for t in range(nb_classes + 1): for o in range(nb_classes): if t == o: continue row_scale = nb_test_by_t_o[t, o] / nb_test_by_t_o[ t, nb_classes] col_scale = nb_test_by_t_o[t, o] / nb_test_by_t_o[ nb_classes, o] if t < nb_classes: feed = { y_target: y_target_test_by_t_o[t][o] [:eval_par['batch_size'], :] } if generate_examples: assert adv_x is not None, 'adv_x tensor must be supplied when generating examples' pickle_x_file = os.path.join( pkl_folderpath, pickle_file_head + "x_test_targeted_{}_{}.p".format(t, o)) if os.path.exists(pickle_x_file): adv_x_test = pickle.load( open(pickle_x_file, "rb")) else: adv_x_test = gen_np( sess, x_test_by_t_o[t][o], x, adv_x, y_target_test_by_t_o[t][o], y_target) pickle.dump(adv_x_test, open(pickle_x_file, "wb")) accuracy_by_t_o[t, o] = model_eval( sess, adv_x, y, preds, adv_x_test, acc_target[t][o], args=eval_par) orig_accuracy_by_t_o[t, o] = model_eval( sess, adv_x, y, preds, x_test_by_t_o[t][o], acc_target[t][o], args=eval_par) else: accuracy_by_t_o[t, o] = model_eval( sess, x, y, preds, x_test_by_t_o[t][o], acc_target[t][o], feed=feed, args=eval_par) accuracy_by_t_o[ nb_classes, o] += accuracy_by_t_o[t, o] * col_scale orig_accuracy_by_t_o[ nb_classes, o] += orig_accuracy_by_t_o[t, o] * col_scale accuracy_by_t_o[ t, nb_classes] += accuracy_by_t_o[t, o] * row_scale orig_accuracy_by_t_o[ t, nb_classes] += orig_accuracy_by_t_o[t, o] * row_scale if adv_x is not None: # fill diagonal with original accuracies for o in range(nb_classes): accuracy_by_t_o[o, o] = orig_accuracy_by_t_o[nb_classes, o] return accuracy_by_t_o else: x_test_shuf = x_test y_test_shuf = y_test def attack(x, attack_params=attack_params): if attacker: return attacker.generate(x, **attack_params) else: return x def gen_np(sess, X, x, adv_x, Y_target=None, y_target=None): #inputs: # sess (required) : tf session # X (required) : numpy input data # x (required) : placeholder for model input # adv_x (required) : tensor for generator output # Y_target (optional) : optional numpy array speccifying the target class # y_target (optional) : optional placeholder for the target inputs #outputs: # if attacker: with sess.as_default(): _batch_size = eval_par['batch_size'] nb_x = X.shape[0] nb_batches = int(np.ceil(float(nb_x) / _batch_size)) assert nb_batches * _batch_size >= nb_x adv_x_np = np.zeros((0, ) + X.shape[1:], dtype=X.dtype) for batch in range(nb_batches): start = batch * _batch_size end = min(nb_x, start + _batch_size) feed_dict = {x: X[start:end]} if not Y_target is None: feed_dict.update({y_target: Y_target[start:end]}) adv_x_cur = adv_x.eval(feed_dict=feed_dict) adv_x_np = np.concatenate([adv_x_np, adv_x_cur], axis=0) assert end >= nb_x return adv_x_np else: return x def attack_stats_eval(x, adv_x, num_batches=1): # Return attack info with sess.as_default(): _batch_size = eval_par['batch_size'] _as_eval = dict() cum_time = 0. attack_stats = attack_statistics(x, adv_x) for batch in range(num_batches): feed_dict = { x: x_test_shuf[batch * _batch_size:(batch + 1) * _batch_size], y: y_test_shuf[batch * _batch_size:(batch + 1) * _batch_size] } if targeted: feed_dict.update({ y_target: y_target_test_shuf[batch * _batch_size:(batch + 1) * _batch_size] }) _as = sess.run(attack_stats, feed_dict=feed_dict) if batch == 0: _as_eval = deepcopy(_as) else: _as_eval = {k: v + _as[k] for k, v in _as_eval.items()} t_1 = time.process_time() adv_x.eval(feed_dict=feed_dict) t_2 = time.process_time() cum_time += t_2 - t_1 cum_time /= num_batches * _batch_size _as_eval = {k: v / num_batches for k, v in _as_eval.items()} _as_eval.update({ 'num_batches': num_batches, 'time': metric_convert(cum_time, 's') }) return _as_eval report.update({attacker_key: {'model_acc': {}}}) for eval_model_key in eval_model_keys: #Sweep over models to evaluate on. "White Box" attacks #only have one eval_model_key "Black Box" attack may #have several eval_model_key "defenses" report_view = report[attacker_key] if threat_model == 'white_box': assert model_key == eval_model_key, ( 'for white_box attacks, ', 'generating model and eval model must be the same') eval_model = model elif threat_model == 'black_box': #add black box eval model to report and update report head if not 'black_box' in report_view.keys(): report_view.update( {'black_box': { eval_model_key: { 'model_acc': {} } }}) else: report_view['black_box'].update( {eval_model_key: { 'model_acc': {} }}) report_view = report_view['black_box'][eval_model_key] #load eval model trained on defense dataset eval_model_meta = meta['model'][eval_model_key] filename = eval_model_meta['file_name'].replace( 'CIFAR10', 'CIFAR10' + defender_partition) keras_model = tf.keras.models.load_model( filepath=eval_model_meta['folder_path'] + '/' + filename, custom_objects=custom_object()) eval_model = KerasModelWrapper(keras_model) #evaluate model on clean examples preds = eval_model.get_logits(x) model_acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_par) print('Test accuracy on clean examples %0.4f\n' % model_acc) report_view.update({'clean_model_acc': model_acc}) t1 = 0 #sweep epsilon if sweep_eps and attack_type != 'clean': max_eps = 2 * attack_params['eps'] if 'eps_iter' in attack_params.keys(): max_eps_iter = 2 * attack_params['eps_iter'] epsilons = np.linspace(1 / 255, max_eps, min(int(max_eps * 255), 16)) sweep_e = dict() for e in epsilons: scaled_e = str(int(e * 255)) t1 = time.time() attack_params.update({'eps': e}) if 'eps_iter' in attack_params.keys(): attack_params.update( {'eps_iter': max_eps_iter * e / max_eps}) adv_x = attack(x, attack_params) attack_stats_cur = attack_stats_eval(x, adv_x, 1) preds_adv = eval_model.get_probs(adv_x) if targeted: model_acc = model_eval_wrapper( preds_adv, acc_target='original_class', adv_x=adv_x) target_acc = model_eval_wrapper( preds_adv, acc_target='target_class', adv_x=adx_x) pickle_file_head = '{}_{}_{}_'.format( model_key, attacker_key, e) pickle_m_file = os.path.join( pkl_folderpath, pickle_file_head + "model_acc.p") pickle_t_file = os.path.join( pkl_folderpath, pickle_file_head + "target_acc.p") pickle.dump(model_acc, open(pickle_m_file, "wb")) pickle.dump(target_acc, open(pickle_t_file, "wb")) sweep_e.update({ scaled_e: { 'model_acc': pickle_m_file, 'target_acc': pickle_t_file, 'attack_stats': attack_stats_cur } }) else: if generate_examples: pickle_x_file = os.path.join( pkl_folderpath, pickle_file_head + "x_test_untargeted.p") if os.path.exists(pickle_x_file): adv_x_test = pickle.load( open(pickle_x_file, "rb")) else: adv_x_test = gen_np(sess, x_test, x, adv_x) pickle.dump(adv_x_test, open(pickle_x_file, "wb")) model_acc = model_eval(sess, adv_x, y, preds, adv_x_test, y_test, args=eval_par) else: model_acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_par) sweep_e.update({ scaled_e: { 'model_acc': model_acc, 'attack_stats': attack_stats_cur } }) print('Epsilon %.2f, accuracy on adversarial' % e, 'examples %0.4f\n' % model_acc) print(sweep_e[scaled_e]) report_view.update({'sweep_eps': sweep_e}) t2 = time.time() else: if 'eps' in attack_params: cond_eps = attack_params['eps'] else: cond_eps = 'N/A' print('evaluating {}->{} examples on {} (single epsilon: {})'. format(attacker_key, model_key, eval_model_key, cond_eps)) t1 = time.time() adv_x = attack(x, attack_params) preds_adv = eval_model.get_probs(adv_x) pickle_file_head = '{}_{}_'.format(model_key, attacker_key) if targeted: model_acc = model_eval_wrapper(preds_adv, acc_target='original_class', adv_x=adv_x) target_acc = model_eval_wrapper(preds_adv, acc_target='target_class', adv_x=adv_x) if threat_model == 'black_box': pickle_m_file = os.path.join( pkl_folderpath, pickle_file_head + eval_model_key + "_model_acc.p") pickle_t_file = os.path.join( pkl_folderpath, pickle_file_head + eval_model_key + "_target_acc.p") else: pickle_m_file = os.path.join( pkl_folderpath, pickle_file_head + "_model_acc.p") pickle_t_file = os.path.join( pkl_folderpath, pickle_file_head + "_target_acc.p") pickle.dump(model_acc, open(pickle_m_file, "wb")) pickle.dump(target_acc, open(pickle_t_file, "wb")) report_view.update({ 'model_acc': pickle_m_file, 'target_acc': pickle_t_file, 'attack_stats': attack_stats_eval(x, adv_x, 20) }) else: if generate_examples: pickle_x_file = os.path.join( pkl_folderpath, pickle_file_head + "x_test_untargeted.p") if os.path.exists(pickle_x_file): adv_x_test = pickle.load(open(pickle_x_file, "rb")) else: adv_x_test = gen_np(sess, x_test, x, adv_x) pickle.dump(adv_x_test, open(pickle_x_file, "wb")) #evaluate on self and, if black box, all other eval models model_acc = model_eval(sess, adv_x, y, preds_adv, adv_x_test, y_test, args=eval_par) else: model_acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par) report_view.update({ 'model_acc': model_acc, 'attack_stats': attack_stats_eval(x, adv_x, 20) }) t2 = time.time() if targeted: print('Test accuracy on adversarial examples %0.4f\n' % model_acc[nb_classes, nb_classes]) print('Target accuracy on adversarial examples %0.4f\n' % target_acc[nb_classes, nb_classes]) else: print('Test accuracy on adversarial examples %0.4f\n' % model_acc) print("Took", t2 - t1, "seconds") return report