def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, rng): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. :param sess: TF session :param x: input TF placeholder :param y: output TF placeholder :param bbox_preds: output of black-box model predictions :param X_sub: initial substitute training data :param Y_sub: initial substitute training labels :param nb_classes: number of output classes :param nb_epochs_s: number of epochs to train substitute model :param batch_size: size of training batches :param learning_rate: learning rate for training :param data_aug: number of times substitute training data is augmented :param lmbda: lambda from arxiv.org/abs/1602.02697 :param rng: numpy.random.RandomState instance :return: """ # Define TF model graph (for the black-box model) model_sub = substitute_model() preds_sub = model_sub(x) log_raw.info("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, nb_classes) # Train the substitute and augment dataset alternatively for rho in xrange(data_aug): log_raw.info("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': nb_epochs_s, 'batch_size': batch_size, 'learning_rate': learning_rate } model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub), init_all=False, verbose=False, args=train_params, rng=rng) # If we are not at last substitute training iteration, augment dataset if rho < data_aug - 1: log_raw.info("Augmenting substitute training data.") # Perform the Jacobian augmentation X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda) log_raw.info("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box Y_sub = np.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub)/2):] eval_params = {'batch_size': batch_size} bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev], args=eval_params)[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model Y_sub[int(len(X_sub)/2):] = np.argmax(bbox_val, axis=1) return model_sub, preds_sub
def test_jsma_batch_with_feed(self): with tf.Session() as sess: X = np.random.rand(1, 13) # construct a simple graph that will require extra placeholders x = tf.placeholder('float', shape=(None, 13)) keep_prob = tf.placeholder('float') W = tf.Variable(tf.random_normal([13, 10])) b = tf.Variable(tf.random_normal([10])) logits = tf.nn.dropout(tf.add(tf.matmul(x, W), b), keep_prob=keep_prob) sess.run(tf.global_variables_initializer()) # jsma should work without generating an error jacobian = attacks_tf.jacobian_graph(logits, x, 10) attacks_tf.jsma_batch(sess, x, logits, jacobian, X, theta=1., gamma=0.25, clip_min=0, clip_max=1, nb_classes=10, feed={keep_prob: 1.0})
def saliency_map_method(sess, model, X, Y, theta, gamma, clip_min=None, clip_max=None): """ TODO :param sess: :param model: predictions or after-softmax :param X: :param Y: :param theta: :param gamma: :param clip_min: :param clip_max: :return: """ nb_classes = Y.shape[1] # Define TF placeholder for the input x = tf.placeholder(tf.float32, shape=(None,) + X.shape[1:]) # Define model gradients grads = jacobian_graph(model(x), x, nb_classes) X_adv = np.zeros_like(X) for i in tqdm(range(len(X))): current_class = int(np.argmax(Y[i])) target_class = np.random.choice(other_classes(nb_classes, current_class)) X_adv[i], _, _ = jsma( sess, x, model(x), grads, X[i:(i+1)], target_class, theta=theta, gamma=gamma, increase=True, nb_classes=nb_classes, clip_min=clip_min, clip_max=clip_max ) return X_adv
def generate(self, x, **kwargs): """ Generate symbolic graph for adversarial examples and return. :param x: The model's symbolic inputs. :param kwargs: See `parse_params` """ assert self.sess is not None, \ 'Cannot use `generate` when no `sess` was provided' # Parse and save attack-specific parameters assert self.parse_params(**kwargs) # Define graph wrt to this input placeholder logits = self.model.get_logits(x) self.nb_classes = logits.get_shape().as_list()[-1] assert self.nb_candidate <= self.nb_classes, \ 'nb_candidate should not be greater than nb_classes' preds = tf.reshape( tf.nn.top_k(logits, k=self.nb_candidate)[0], [-1, self.nb_candidate]) # grads will be the shape [batch_size, nb_candidate, image_size] grads = tf.stack(jacobian_graph(preds, x, self.nb_candidate), axis=1) # Define graph def deepfool_wrap(x_val): return deepfool_batch(self.sess, x, preds, logits, grads, x_val, self.nb_candidate, self.overshoot, self.max_iter, self.clip_min, self.clip_max, self.nb_classes, self.Lp_norm) wrap = tf.py_func(deepfool_wrap, [x], self.tf_dtype) wrap.set_shape(x.get_shape()) return wrap
def train_substitute(sess, x, y, bbox_preds, X_sub, Y_sub): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. :param sess: TF session :param x: input TF placeholder :param y: output TF placeholder :param bbox_preds: output of black-box model predictions :param X_sub: initial substitute training data :param Y_sub: initial substitute training labels :return: """ # Define TF model graph (for the black-box model) model_sub = substitute_model() preds_sub = model_sub(x) # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, FLAGS.nb_classes) # Train the substitute and augment dataset alternatively for rho in range(FLAGS.data_aug): print("Epoch #" + str(rho)) train_params = { 'nb_epochs': FLAGS.nb_epochs_s, 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.learning_rate } model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub), init_all=False, verbose=False, args=train_params) # If we are not at last substitute training iteration, augment dataset if rho < FLAGS.data_aug - 1: # Perform the Jacobian augmentation X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, FLAGS.lmbda) # Label the newly generated synthetic points using the black-box Y_sub = np.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub)/2):] # First feed forward a denoising autoencoder. if args.ae: print("Denoising...") num_data = X_sub_prev.shape[0] autoencoder.visualize(sess, X_sub_prev.reshape(num_data, -1), "sub{}".format(rho)) filtered_data = autoencoder.run(sess, X_sub_prev.reshape(num_data, -1)) X_sub_prev = filtered_data.reshape(num_data, 28, 28, 1) if args.alg == "cnn": eval_params = {'batch_size': FLAGS.batch_size} bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev], args=eval_params)[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model Y_sub_prev = np.argmax(bbox_val, axis=1) elif is_not_nn(): x_sub_prev = X_sub_prev.reshape(X_sub_prev.shape[0], -1) Y_sub_prev = bbox_preds.predict(x_sub_prev) Y_sub[int(len(X_sub)/2):] = Y_sub_prev return model_sub, preds_sub
def main(_): # Images for inception classifier are normalized to be in [-1, 1] interval, # eps is a difference between pixels so it should be in [0, 2] interval. # Renormalizing epsilon from [0, 255] to [0, 2]. eps = 2.0 * FLAGS.max_epsilon / 255.0 batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 BUILD_MODEL = True from cleverhans.attacks_tf import jacobian_graph, jsma_batch tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default() as d_graph: # Prepare graph model = InceptionModel(num_classes) if BUILD_MODEL: print("Build the model and try to save the current graph") x_input = tf.placeholder(tf.float32, shape=batch_shape) preds = model(x_input) grads = jacobian_graph(preds, x_input, num_classes) saver = tf.train.Saver(slim.get_model_variables()) tf.add_to_collection("x_input", x_input) tf.add_to_collection("preds", preds) tf.add_to_collection("grads", grads) else: saver = tf.train.Saver( filename='model/saliency_map_model-1000.meta') # Run computation with tf.Session() as sess: #print("Session is closed:",sess._is_closed()) if BUILD_MODEL: saver.save(sess, 'saliency_map_model', global_step=1000) else: saver.restore(sess, "model/saliency_map_model-1000") x_input = tf.get_collection('x_input')[0] preds = tf.get_collection('preds')[0] grads = tf.get_collection('grads')[0] for filenames, images in load_images(FLAGS.input_dir, batch_shape): adv_images = jsma_batch(sess, x_input, preds, grads, images, 1, 0.1, -1, 1, num_classes, y_target=None) save_images(adv_images, filenames, FLAGS.output_dir)
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. :param sess: TF session :param x: input TF placeholder :param y: output TF placeholder :param bbox_preds: output of black-box model predictions :param X_sub: initial substitute training data :param Y_sub: initial substitute training labels :return: """ # Define TF model graph (for the black-box model) model_sub = substitute_model() preds_sub = model_sub(x) print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, FLAGS.nb_classes) # Train the substitute and augment dataset alternatively for rho in xrange(FLAGS.data_aug): print("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': FLAGS.nb_epochs_s, 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.learning_rate } model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub), init_all=False, verbose=False, args=train_params) # If we are not at last substitute training iteration, augment dataset if rho < FLAGS.data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, FLAGS.lmbda) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box Y_sub = np.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub) / 2):] eval_params = {'batch_size': FLAGS.batch_size} bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev], args=eval_params)[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model Y_sub[int(len(X_sub) / 2):] = np.argmax(bbox_val, axis=1) return model_sub, preds_sub
def train_sub(sess, x, y, bbox, X_sub, Y_sub): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. :param sess: TF session :param x: input TF placeholder :param y: output TF placeholder :param bbox: black-box model :param X_sub: initial substitute training data :param Y_sub: initial substitute training labels :return: """ # Define TF model graph (for the black-box model) model_sub = substitute_model() preds_sub = model_sub(x) print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, FLAGS.nb_classes) # Train the substitute and augment dataset alternatively for rho in range(FLAGS.data_aug): print("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': FLAGS.nb_epochs_s, 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.learning_rate } model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub), init_all=False, verbose=False, args=train_params) # If we are not at last substitute training iteration, augment dataset if rho < FLAGS.data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, FLAGS.lmbda) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box Y_sub = np.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub) / 2):] eval_params = {'batch_size': FLAGS.batch_size} x_sub_prev = X_sub_prev.reshape(X_sub_prev.shape[0], -1) xg_sub = xgb.DMatrix(x_sub_prev) Y_sub_prev = bbox.predict(xg_sub) Y_sub[int(len(X_sub) / 2):] = Y_sub_prev return model_sub, preds_sub
def train_sub(sess, x, y, bb_model, X_sub, Y_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, rng): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. :param sess: TF session :param x: input TF placeholder :param y: output TF placeholder :param bbox_preds: output of black-box model predictions :param X_sub: initial substitute training data :param Y_sub: initial substitute training labels :param nb_classes: number of output classes :param nb_epochs_s: number of epochs to train substitute model :param batch_size: size of training batches :param learning_rate: learning rate for training :param data_aug: number of times substitute training data is augmented :param lmbda: lambda from arxiv.org/abs/1602.02697 :param rng: numpy.random.RandomState instance :return: """ # Define TF model graph (for the black-box model) model_sub = substitute_model(img_cols=X_sub.shape[1]) preds_sub = model_sub(x) print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, nb_classes) # Train the substitute and augment dataset alternatively for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': nb_epochs_s, 'batch_size': batch_size, 'learning_rate': learning_rate } with TemporaryLogLevel(logging.WARNING, "cleverhans.utils.tf"): model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub, nb_classes), init_all=False, args=train_params, rng=rng) # If we are not at last substitute training iteration, augment dataset if rho < data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation lmbda_coef = 2 * int(int(rho / 3) != 0) - 1 X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda_coef * lmbda) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box Y_sub = numpy.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub)/2):] #on a double le dataset donc prev = ce qu'il y a de nouveau = la moitie eval_params = {'batch_size': batch_size} bbox_val = bb_model.predict(X_sub_prev) Y_sub[int(len(X_sub)/2):] = numpy.argmax(bbox_val, axis=1) return model_sub, preds_sub
def train_sub(data_aug, sess, x_sub, lmbda, target_model, aug_batch_size=AUG_BATCH_SIZE): placeholder_sub = tf.placeholder(tf.float32, shape=(None, SUB_IMAGE_SIZE, SUB_IMAGE_SIZE, NUM_OF_CHANNELS)) placeholder_bbox = tf.placeholder(tf.float32, shape=(None, BBOX_IMAGE_SIZE, BBOX_IMAGE_SIZE, NUM_OF_CHANNELS)) print("Loading substitute model...") model = get_model_category_by_id(SUBSTITUTE_MODEL_ID, NB_SUB_CLASSES, metric='accuracy') # simple vanilla cnn if SUBSTITUTE_MODEL_ID == '-1': model = get_simple_model(NB_SUB_CLASSES, SUB_IMAGE_SIZE) model.compile(optimizer=Adam(lr=0.1, decay=1e-6), loss="categorical_crossentropy", metrics=['accuracy']) model_sub = KerasModelWrapper(model) preds_sub = model_sub.get_logits(placeholder_sub) print("Subsitute model loaded.") # Define the Jacobian symbolically using TensorFlow print("Defining jacobian graph...") grads = jacobian_graph(preds_sub, placeholder_sub, NB_SUB_CLASSES) print("Jacobian graph defined.") y_sub = bbox_predict(target_model, x_sub, sess, placeholder_bbox, batch_size=1) train_gen = TransferGenerator(x_sub, labels=y_sub, num_classes=NB_SUB_CLASSES, batch_size=BATCH_SIZE, image_size=SUB_IMAGE_SIZE) for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) train_gen.reinitialize(x_sub, y_sub, BATCH_SIZE, SUB_IMAGE_SIZE) print("Fitting the generator with the labels: ") print(train_gen.labels) model_sub.model.fit_generator(generator=train_gen, epochs=NUM_EPOCHS) # print("Saving substitute model that is trained so far") # path = Path(__file__).resolve().parent.parent.joinpath("resources/models") # save_model(str(path) + "sub_model_after_epoch" + str(rho) + ".h5", model_sub.model) # input_sample = np.empty(shape=(1, IMAGE_SIZE_SUB, IMAGE_SIZE_SUB, NUM_OF_CHANNELS), dtype=np.float32) if rho < data_aug - 1: print("Augmenting substitute training data...") # Perform the Jacobian augmentation lmbda_coef = 2 * int(int(rho / 3) != 0) - 1 x_sub = jacobian_augmentation(sess, placeholder_sub, x_sub, y_sub, grads, lmbda_coef * lmbda, aug_batch_size) print("Substitute training data augmented.") print("Labeling substitute training data using bbox...") y_sub = np.hstack([y_sub, y_sub]) x_sub_new = x_sub[int(len(x_sub) / 2):] y_sub[int(len(x_sub)/2):] = bbox_predict(target_model, x_sub_new, sess, placeholder_bbox) return model_sub
def generate_jsma_examples(self, sess, X, y, targets=None, clip_min=None, clip_max=None, theta=1., gamma=0.25): """Wrapper around Cleverhans' underlying JSMA generation code""" from cleverhans import attacks_tf if clip_min is None: clip_min = np.min(X) if clip_max is None: clip_max = np.max(X) if targets is None: targets = onehot((np.argmax(y, 1) + 1) % self.num_classes, self.num_classes) elif isint(targets): targets = onehot([targets]*len(X), self.num_classes) jacobian = attacks_tf.jacobian_graph(self.logits, self.X, self.num_classes) return attacks_tf.jsma_batch(sess, self.X, self.logits, jacobian, X, theta=theta, gamma=gamma, clip_min=clip_min, clip_max=clip_max, nb_classes=self.num_classes, y_target=targets)
def main(_): # Images for inception classifier are normalized to be in [-1, 1] interval, # eps is a difference between pixels so it should be in [0, 2] interval. # Renormalizing epsilon from [0, 255] to [0, 2]. eps = 2.0 * FLAGS.max_epsilon / 255.0 batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 from cleverhans.attacks_tf import jacobian_graph, jsma_batch tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) model = InceptionModel(num_classes) preds = model(x_input) grads = jacobian_graph(preds, x_input, num_classes) # Run computation saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=FLAGS.checkpoint_path, master=FLAGS.master) with tf.train.MonitoredSession( session_creator=session_creator) as sess: print("Session is closed:", sess._is_closed()) def jsma_wrap(x_val): jsma_batch(sess, x, preds, grads, x_val, 1, 0.1, -1, 1, num_classes, y_target=None) x_adv = tf.py_func(jsma_wrap, [x_input], tf.float32) for filenames, images in load_images(FLAGS.input_dir, batch_shape): adv_images = sess.run(x_adv, feed_dict={x_input: images}) save_images(adv_images, filenames, FLAGS.output_dir)
def train_sub(sess, model, x, y, denoise_model, X_sub, Y_sub): # model_sub = substitute_model() model_sub = substitute_model_D_on_paper() preds_sub = model_sub(x) print("Train substitute model") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, FLAGS.nb_classes) # Train the substitute and augment dataset alternatively for rho in range(FLAGS.data_aug): print("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': FLAGS.nb_epochs_s, 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.learning_rate } model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub, num_classes=FLAGS.nb_classes), init_all=False, verbose=False, args=train_params) # If we are not at last substitute training iteration, augment dataset if rho < FLAGS.data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, FLAGS.lmbda) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box Y_sub = np.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub) / 2):] if DENOISE: X_sub_prev = denoise_model.predict(X_sub_prev, verbose=1, batch_size=FLAGS.batch_size) bbox_val = model.predict(X_sub_prev) Y_sub[int(len(X_sub) / 2):] = np.argmax(bbox_val, axis=1) return model_sub, preds_sub
def batch_jsma_with_perturbation_rate(self, batch, max_gamma, theta): # Make random targets x, _ = self.cnn_model.make_inputs() preds = self.cnn_model.get_probs(x) grads = jacobian_graph(preds, x, self.num_classes) results = [] for i, data in enumerate(batch): sample = np.reshape(data[:-1], [self.image_size, self.image_size, self.channels]) * (1. / 255) sample = np.expand_dims(sample, axis=0) target = data[-1] while target == data[-1]: target = random.randint(0, self.num_classes - 1) # Use JSMA and try to make one single adv example for given sample adv_x, perturbation_rate, confused_at, success_at, orig_predict, adv_predict = self.jsma( sess=self.cnn_model.sess, x=x, predictions=preds, grads=grads, sample=sample, target=target, theta=theta, gamma=max_gamma, clip_min=0., clip_max=1., ) print('perturbation_rate={}, confused_at={}, success_at={}'.format(perturbation_rate, confused_at, success_at)) results.append({ 'adv_x': adv_x, 'perturbation_rate': perturbation_rate, 'target': target, 'confused_at': confused_at, 'success_at': success_at, 'orig_predict': orig_predict, 'adv_predict': adv_predict, }) return results
def augmentSubstituteData(self, X, Y, dqn, batchSize, lmbdas, verbose=False): grads = jacobian_graph(self.logits, self.inputs, self.env.action_space.n) epNum = len(lmbdas) Xa = X Ya = Y for ep in range(epNum): mbInds = utils.getMinibatchInds(batchSize, np.arange(X.shape[0])) lmbda = lmbdas[ep] for i, mbi in enumerate(mbInds): mbX = X[mbi] mbY = Y[mbi] mbXa = jacobian_augmentation(self.sess, self.inputs, mbX, mbY, grads, lmbda) mbXa = mbXa[mbX.shape[0]:] mbYa = dqn.run(mbXa) mbYa = np.argmax(mbYa, axis=1) Xa = np.vstack([Xa, mbXa]) Ya = np.hstack([Ya, mbYa]) del mbXa del mbYa if verbose: print("Finished minibatch " + str(i) + " / " + str(len(mbInds)) + " in epoch " + str(ep) + ". Num examples = " + str(Xa.shape[0])) if verbose: print("Finished epoch " + str(ep)) return Xa, Ya
def saliency_map_method(sess, model, X, Y, theta, gamma, clip_min=None, clip_max=None): """ TODO :param sess: :param model: predictions or after-softmax :param X: :param Y: :param theta: :param gamma: :param clip_min: :param clip_max: :return: """ nb_classes = Y.shape[1] # Define TF placeholder for the input x = tf.placeholder(tf.float32, shape=(None,) + X.shape[1:]) # Define model gradients grads = jacobian_graph(model(x), x, nb_classes) X_adv = np.zeros_like(X) for i in tqdm(range(len(X))): PATH_DATA = "data/svhn/jsma" npyfile = os.path.join(PATH_DATA, 'Adv_svhn_jsma_%s.npy' % str(i)) if os.path.exists(npyfile): X_adv[i]=np.load(npyfile) else: current_class = int(np.argmax(Y[i])) target_class = np.random.choice(other_classes(nb_classes, current_class)) X_adv[i], _, _ = jsma( sess, x, model(x), grads, X[i:(i+1)], target_class, theta=theta, gamma=gamma, increase=True, nb_classes=nb_classes, clip_min=clip_min, clip_max=clip_max ) #print(os.path.join(PATH_DATA, 'Adv_cifar_cw-l2_%s.npy' % str(i))) #if (i+1)%100==0: np.save(os.path.join(PATH_DATA, 'Adv_svhn_jsma_%s.npy' % str(i)), X_adv[i]) return X_adv
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, rng, model_arch_sub, merged, opt_type, blocking_option): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. """ # Define TF model graph (for the black-box model) model_sub = substitute_model(model_arch_sub=model_arch_sub, blocking_option=blocking_option) preds_sub = model_sub(x) #return model_sub, preds_sub print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, nb_classes) # Train the substitute and augment dataset alternatively for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': nb_epochs_s, 'batch_size': batch_size, 'learning_rate': learning_rate } model_train( sess, x, y, preds_sub, X_sub, to_categorical(Y_sub), init_all=False, args=train_params, rng=rng, opt_type=opt_type, #summary=merged ) # If we are not at last substitute training iteration, augment dataset if rho < data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation lmbda_coef = 2 * int(int(rho / 3) != 0) - 1 X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda_coef * lmbda) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box Y_sub = np.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub) / 2):] eval_params = {'batch_size': batch_size} bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev], args=eval_params)[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model Y_sub[int(len(X_sub) / 2):] = np.argmax(bbox_val, axis=1) return model_sub, preds_sub
def train_sub(sess, x, y, bbox_preds, x_sub, y_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, aug_batch_size, rng, img_rows=28, img_cols=28, nchannels=1): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. :param sess: TF session :param x: input TF placeholder :param y: output TF placeholder :param bbox_preds: output of black-box model predictions :param x_sub: initial substitute training data :param y_sub: initial substitute training labels :param nb_classes: number of output classes :param nb_epochs_s: number of epochs to train substitute model :param batch_size: size of training batches :param learning_rate: learning rate for training :param data_aug: number of times substitute training data is augmented :param lmbda: lambda from arxiv.org/abs/1602.02697 :param rng: numpy.random.RandomState instance :return: """ # Define TF model graph (for the black-box model) model_sub = ModelSubstitute('model_s', nb_classes) preds_sub = model_sub.get_logits(x) loss_sub = CrossEntropy(model_sub, smoothing=0) print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, nb_classes) # Train the substitute and augment dataset alternatively for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': nb_epochs_s, 'batch_size': batch_size, 'learning_rate': learning_rate } with TemporaryLogLevel(logging.WARNING, "cleverhans.utils.tf"): train(sess, loss_sub, x, y, x_sub, to_categorical(y_sub, nb_classes), init_all=False, args=train_params, rng=rng, var_list=model_sub.get_params()) # If we are not at last substitute training iteration, augment dataset if rho < data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation lmbda_coef = 2 * int(int(rho / 3) != 0) - 1 x_sub = jacobian_augmentation(sess, x, x_sub, y_sub, grads, lmbda_coef * lmbda, aug_batch_size) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box y_sub = np.hstack([y_sub, y_sub]) x_sub_prev = x_sub[int(len(x_sub) / 2):] eval_params = {'batch_size': batch_size} bbox_val = batch_eval(sess, [x], [bbox_preds], [x_sub_prev], args=eval_params)[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model y_sub[int(len(x_sub) / 2):] = np.argmax(bbox_val, axis=1) return model_sub, preds_sub
) # ============== Evaluation of the model with actual instances ============== print("Performance when using actual testing instances") mlp_model_eval(X_test, Y_test, history, 1) # ============== Generate adversarial samples for all test datapoints ============== source_samples = X_test.shape[0] # Jacobian-based Saliency Map results = np.zeros((1, source_samples), dtype=float) perturbations = np.zeros((1, source_samples), dtype=float) grads = jacobian_graph(predictions , X_placeholder, 1) X_adv = np.zeros((source_samples, X_test.shape[1])) for sample_ind in range(0, source_samples): # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) current_class = int(np.argmax(Y_test[sample_ind])) # Target the benign class for target in [0]: if (current_class == 0): break # This call runs the Jacobian-based saliency map approac adv_x , res , percent_perturb = SaliencyMapMethod(sess, X_placeholder, predictions , grads,
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, rng, substitute_model=None, dataset_name=None): """This function trains the substitute model as described in arxiv.org/abs/1602.02697 Args: sess: TF session x: input TF placeholder y: output TF placeholder bbox_preds: output of black-box model predictions X_sub: initial substitute training data Y_sub: initial substitute training labels nb_classes: number of output classes nb_epochs_s: number of epochs to train substitute model batch_size: size of training batches learning_rate: learning rate for training data_aug: number of times substitute training data is augmented lmbda: lambda from arxiv.org/abs/1602.02697 rng: numpy.random.RandomState instance Returns: model_sub: The substitute model function. preds_sub: The substitute prediction tensor. """ model_sub = substitute_model used_vars = model_sub.get_params() if FLAGS.load_sub_model: try: path = tf.train.latest_checkpoint('classifiers/sub_model/{}'.format(dataset_name)) saver = tf.train.Saver(var_list=used_vars) saver.restore(sess, path) print('[+] Sub model loaded successfully ...') pred_eval = model_sub.get_logits(x) return model_sub, pred_eval except: pass pred_train = model_sub.get_logits(x, dropout=True) pred_eval = model_sub.get_logits(x) print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow. grads = jacobian_graph(pred_eval, x, nb_classes) train_params = { 'nb_epochs': nb_epochs_s, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': 'classifiers/sub_model/{}'.format(dataset_name), 'filename': 'model_{}'.format(FLAGS.sub_model) } # Train the substitute and augment dataset alternatively. for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) model_train(sess, x, y, pred_train, X_sub, convert_to_onehot(Y_sub), init_all=False, args=train_params, rng=rng, save=True) # If we are not at last substitute training iteration, augment dataset. if rho < data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation. X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box. Y_sub = np.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub) / 2):] eval_params = {'batch_size': batch_size} # To initialize the local variables of Defense-GAN. sess.run(tf.local_variables_initializer()) bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev], args=eval_params)[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model. Y_sub[int(len(X_sub) / 2):] = np.argmax(bbox_val, axis=1) return model_sub, pred_eval
def train_sub(sess, x, y, bbox_preds, x_sub, y_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, aug_batch_size, rng, img_rows=48, img_cols=48, nchannels=3): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. :param sess: TF session :param x: input TF placeholder :param y: output TF placeholder :param bbox_preds: output of black-box model predictions :param x_sub: initial substitute training data :param y_sub: initial substitute training labels :param nb_classes: number of output classes :param nb_epochs_s: number of epochs to train substitute model :param batch_size: size of training batches :param learning_rate: learning rate for training :param data_aug: number of times substitute training data is augmented :param lmbda: lambda from arxiv.org/abs/1602.02697 :param rng: numpy.random.RandomState instance :return: """ assert(y_sub.shape[1]>1) try: saver.restore(sess, "./model.ckpt") model_sub = tf.get_variable("logits", shape=[1]) preds_sub = tf.get_variable("probs", shape=[1]) return model_sub, preds_sub except: print("Model ckpt is not found. Retrain substitute starts.") # Define TF model graph (for the black-box model) model_sub = ModelSubstitute('model_s',nb_classes, session=sess, istrain=True) logits = model_sub.get_logits(x) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y)) optimiser = tf.train.AdamOptimizer().minimize(loss) preds_sub = tf.nn.softmax(logits=logits) saver = tf.train.Saver() print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, nb_classes) sess.run(tf.global_variables_initializer()) def evaluate(): acc = model_eval(sess, x, y, preds_sub, x_sub, y_sub, args=eval_params) print('Test accuracy on test examples: %0.4f' % (acc)) # Train the substitute and augment dataset alternatively for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) for s in range(batch_size): batch_xs = x_sub[s*batch_size: (s+1)*batch_size] batch_ys = y_sub[s*batch_size: (s+1)*batch_size] feed_dict = {x:batch_xs, y:batch_ys} op, lval,pre = sess.run([optimiser, loss, preds_sub], feed_dict=feed_dict) print("rho = {0}. loss : {1}".format(rho, sess.run(loss, feed_dict={x:batch_xs, y:batch_ys}))) # If we are not at last substitute training iteration, augment dataset if 0: # rho < data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation lmbda_coef = 2 * int(int(rho / 3) != 0) - 1 y_sub_labels = np.argmax(y_sub, axis=1).reshape(-1,1) x_sub = jacobian_augmentation(sess, x, x_sub, y_sub_labels, grads, lmbda_coef * lmbda, aug_batch_size) # Label the newly generated synthetic points using the black-box new_y_sub_labels = np.vstack((y_sub_labels, y_sub_labels)) x_sub_prev = x_sub[int(len(x_sub)/2):] eval_params = {'batch_size': batch_size} tmp = batch_eval(sess,[x],[bbox_preds],[x_sub_prev],batch_size=batch_size) bbox_val = tmp[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model tmp1 = np.argmax(bbox_val, axis=1) tmp2 = y_sub_labels[int(len(x_sub)/2):] new_y_sub_labels[int(len(x_sub)/2):] = np.argmax(bbox_val, axis=1).reshape(-1,1) y_sub = to_categorical(new_y_sub_labels, nb_classes) save_path = saver.save(sess, "./model.ckpt") print("Model saved in path: %s" % save_path) print(preds_sub.shape) print(model_sub.shape) return model_sub, preds_sub
def train_sub(data_aug, sess, x_sub, y_sub, lmbda, target_model, aug_batch_size=1): x = tf.placeholder(tf.float32, shape=(None, IMAGE_SIZE, IMAGE_SIZE, NUM_OF_CHANNELS)) print("Loading substitute model...") model = get_model("InceptionResNetV2") # model = get_simple_model(num_classes=NB_CLASSES, image_size=IMAGE_SIZE) model.compile(optimizer=Adam(), loss="categorical_crossentropy", metrics=[age_mae]) model_sub = KerasModelWrapper(model) preds_sub = model_sub.get_logits(x) print("Subsitute model loaded.") # Define the Jacobian symbolically using TensorFlow print("Defining jacobian graph...") grads = jacobian_graph(preds_sub, x, NB_CLASSES) print("Jacobian graph defined.") train_gen = TransferGenerator(x_sub, y_sub, num_classes=101, batch_size=BATCH_SIZE, image_size=IMAGE_SIZE) for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) train_gen.reinitialize(data=x_sub, labels=y_sub, batch_size=BATCH_SIZE, image_size=IMAGE_SIZE) model_sub.model.fit_generator(generator=train_gen, epochs=1) input_sample = np.empty(shape=(1, IMAGE_SIZE, IMAGE_SIZE, NUM_OF_CHANNELS), dtype=np.float32) if rho < data_aug - 1: print("Augmenting substitute training data...") # Perform the Jacobian augmentation lmbda_coef = 2 * int(int(rho / 3) != 0) - 1 x_sub_tmp = np.vstack([x_sub, x_sub]) for i in range(0, len(y_sub)): input_sample[0, :, :, :] = x_sub[i] adv = jacobian_augmentation(sess=sess, x=x, X_sub_prev=input_sample, Y_sub=[y_sub[i]], grads=grads, lmbda=lmbda_coef * lmbda, aug_batch_size=aug_batch_size) x_sub_tmp[2 * i] = adv[0, :, :, :] x_sub_tmp[2 * i + 1] = adv[1, :, :, :] x_sub = x_sub_tmp print("Substitute training data augmented.") print("Labeling substitute training data using bbox...") y_sub = np.hstack([y_sub, y_sub]) x_sub_prev = x_sub[int(len(x_sub) / 2):] predictions = bbox_predict(target_model, x_sub_prev, sess, x) y_sub[int(len(x_sub) / 2):] = predictions return model_sub
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, rng, substitute_model=None): """This function trains the substitute model as described in arxiv.org/abs/1602.02697 Args: sess: TF session x: input TF placeholder y: output TF placeholder bbox_preds: output of black-box model predictions X_sub: initial substitute training data Y_sub: initial substitute training labels nb_classes: number of output classes nb_epochs_s: number of epochs to train substitute model batch_size: size of training batches learning_rate: learning rate for training data_aug: number of times substitute training data is augmented lmbda: lambda from arxiv.org/abs/1602.02697 rng: numpy.random.RandomState instance Returns: model_sub: The substitute model function. preds_sub: The substitute prediction tensor. """ # Define TF model graph (for the black-box model). model_sub = substitute_model preds_sub = model_sub(x) print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow. grads = jacobian_graph(preds_sub, x, nb_classes) # Train the substitute and augment dataset alternatively. for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': nb_epochs_s, 'batch_size': batch_size, 'learning_rate': learning_rate } model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub), init_all=False, args=train_params, rng=rng, feed={K.learning_phase(): 1}) # If we are not at last substitute training iteration, augment dataset. if rho < data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation. X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda, feed={K.learning_phase(): 0}) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box. Y_sub = np.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub) / 2):] eval_params = {'batch_size': batch_size} # To initialize the local variables of Defense-GAN. sess.run(tf.local_variables_initializer()) bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev], args=eval_params, feed={K.learning_phase(): 0})[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model. Y_sub[int(len(X_sub) / 2):] = np.argmax(bbox_val, axis=1) return model_sub, preds_sub
# DEBUG # for c in [RandomForestClassifier, NaiveBayes]: # rf = c() # rf.fit(X_train_scaled, y_train) # train_pred = rf.predict(X_train_scaled) # print("RF acc trainnig set", accuracy_score(y_train, train_pred)) model_train(sess, x, y, predictions, X_train_scaled, y_train, evaluate=evaluate, args=train_params) # Generate adversarial samples for all test datapoints source_samples = X_test_scaled.shape[0] # Jacobian -based Saliency Map results = np.zeros((FLAGS.nb_classes, source_samples), dtype ='i') perturbations = np.zeros((FLAGS.nb_classes, source_samples), dtype ='f') grads = jacobian_graph(predictions, x, FLAGS.nb_classes) X_adv = np.zeros((source_samples, X_test_scaled.shape[1])) print(type(model)) # <class 'keras.engine.sequential.Sequential'> wrap = KerasModelWrapper(model) jsma = SaliencyMapMethod(wrap, sess=sess) """ Take in a dictionary of parameters and applies attack-specific checks before saving them as attributes. Attack-specific parameters: :param theta: (optional float) Perturbation introduced to modified components (can be positive or negative)
def main(argv=None): """ MNIST tutorial for the Jacobian-based saliency map approach (JSMA) :return: """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) ########################################################################### # Define the dataset and model ########################################################################### # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' " "to 'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) print("Created TensorFlow session and set Keras backend.") # Get MNIST test data X_train, Y_train, X_test, Y_test = data_stl10() print("Loaded STL10 test data") #print("Loaded CIFAR10 test data") #print("Loaded MNIST test data.") # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 96, 96, 3)) #x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph (MNIST) #model = cnn_model() #predictions = model(x) #print("Defined TensorFlow model graph.") # Define TF model graph model = vgg19((96,96,3)) predictions = model(x) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model if it does not exist in the train_dir folder saver = tf.train.Saver() save_path = os.path.join(FLAGS.train_dir, FLAGS.filename) if os.path.isfile(save_path): saver.restore(sess, os.path.join(FLAGS.train_dir, FLAGS.filename)) else: train_params = { 'nb_epochs': FLAGS.nb_epochs, 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.learning_rate } model_train(sess, x, y, predictions, X_train, Y_train, args=train_params) saver.save(sess, save_path) # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': FLAGS.batch_size} accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args=eval_params) # assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### print('Crafting ' + str(FLAGS.source_samples) + ' * ' + str(FLAGS.nb_classes-1) + ' adversarial examples') # This array indicates whether an adversarial example was found for each # test set sample and target class results = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='i') # This array contains the fraction of perturbed features for each test set # sample and target class perturbations = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='f') # Define the TF graph for the model's Jacobian grads = jacobian_graph(predictions, x, FLAGS.nb_classes) # Initialize our array for grid visualization grid_shape = (FLAGS.nb_classes, FLAGS.nb_classes, FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels) grid_viz_data = np.zeros(grid_shape, dtype='f') # Loop over the samples we want to perturb into adversarial examples for sample_ind in xrange(0, FLAGS.source_samples): # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) current_class = int(np.argmax(Y_test[sample_ind])) target_classes = other_classes(FLAGS.nb_classes, current_class) # For the grid visualization, keep original images along the diagonal grid_viz_data[current_class, current_class, :, :, :] = np.reshape( X_test[sample_ind:(sample_ind+1)], (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)) # Loop over all target classes for target in target_classes: print('--------------------------------------') print('Creating adv. example for target class ' + str(target)) # This call runs the Jacobian-based saliency map approach adv_x, res, percent_perturb = jsma(sess, x, predictions, grads, X_test[sample_ind: (sample_ind+1)], target, theta=1, gamma=0.1, increase=True, back='tf', clip_min=0, clip_max=1) # Display the original and adversarial images side-by-side # if FLAGS.viz_enabled: # if 'figure' not in vars(): # figure = pair_visual( # np.reshape(X_test[sample_ind:(sample_ind+1)], # (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)), # np.reshape(adv_x, # (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))) # else: # figure = pair_visual( # np.reshape(X_test[sample_ind:(sample_ind+1)], # (FLAGS.img_rows, FLAGS.img_cols,FLAGS.nb_channels)), # np.reshape(adv_x, (FLAGS.img_rows, # FLAGS.img_cols,FLAGS.nb_channels)), figure) # Add our adversarial example to our grid data grid_viz_data[target, current_class, :, :, :] = np.reshape( adv_x, (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)) sum = np.sum(adv_x) if sum == 0.0 or sum == 0: print('HEY') quit() # Update the arrays for later analysis results[target, sample_ind] = res perturbations[target, sample_ind] = percent_perturb # Compute the number of adversarial examples that were successfuly found nb_targets_tried = ((FLAGS.nb_classes - 1) * FLAGS.source_samples) succ_rate = float(np.sum(results)) / nb_targets_tried print('Avg. rate of successful adv. examples {0:.2f}'.format(succ_rate)) # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(perturbations) print('Avg. rate of perturbed features {0:.2f}'.format(percent_perturbed)) # Compute the average distortion introduced for successful samples only percent_perturb_succ = np.mean(perturbations * (results == 1)) print('Avg. rate of perturbed features for successful ' 'adversarial examples {0:.2f}'.format(percent_perturb_succ)) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if FLAGS.viz_enabled: _ = grid_visual(grid_viz_data)
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes=10, nb_epochs_s=250, batch_size=128, learning_rate=0.001, data_aug=6, lmbda=0.1, rng=None): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. :param sess: TF session :param x: input TF placeholder :param y: output TF placeholder :param bbox_preds: output of black-box model predictions :param X_sub: initial substitute training data :param Y_sub: initial substitute training labels :param nb_classes: number of output classes :param nb_epochs_s: number of epochs to train substitute model :param batch_size: size of training batches :param learning_rate: learning rate for training :param data_aug: number of times substitute training data is augmented :param lmbda: lambda from arxiv.org/abs/1602.02697 :param rng: numpy.random.RandomState instance :return: """ # Define TF model graph (for the black-box model) model_wrapper = cifar10vgg(empty_model=True) model_sub = model_wrapper.model preds_sub = model_sub(x) print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, nb_classes) # Train the substitute and augment dataset alternatively for rho in range(data_aug): print("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': nb_epochs_s, 'batch_size': batch_size, 'learning_rate': learning_rate } with TemporaryLogLevel(tf.logging.WARNING, "cleverhans.utils.tf"): model_train(sess, x, y, preds_sub, X_sub, Y_sub, init_all=False, args=train_params) # If we are not at last substitute training iteration, augment dataset if rho < data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation lmbda_coef = 2 * int(int(rho / 3) != 0) - 1 X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda_coef * lmbda) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box Y_sub = np.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub)/2):] eval_params = {'batch_size': batch_size} bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev], args=eval_params)[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model Y_sub[int(len(X_sub)/2):] = np.argmax(bbox_val, axis=1) return model_sub, preds_sub
def train_sub(sess, logits_scalar, x, y, bbox_preds, X_sub, Y_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, rng, binary=False, phase=None, model_path=None): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. :param sess: TF session :param x: input TF placeholder :param y: output TF placeholder :param bbox_preds: output of black-box model predictions :param X_sub: initial substitute training data :param Y_sub: initial substitute training labels :param nb_classes: number of output classes :param nb_epochs_s: number of epochs to train substitute model :param batch_size: size of training batches :param learning_rate: learning rate for training :param data_aug: number of times substitute training data is augmented :param lmbda: lambda from arxiv.org/abs/1602.02697 :param rng: numpy.random.RandomState instance :param phase: placeholder for batch_norm phase (training or testing) :param phase_val: True if training, False if testing :return: """ # Define TF model graph (for the black-box model) model_sub = substitute_model() preds_sub = model_sub(x) print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, nb_classes) train_params = { 'binary': False, 'nb_epochs': nb_epochs_s, 'batch_size': batch_size, 'learning_rate': learning_rate, 'filename': 'sub_model', 'train_scope': 'sub_model', 'reuse_global_step': False, 'is_training': True } # Train the substitute and augment dataset alternatively for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) if rho > 0: train_params.update({'reuse_global_step': True}) if model_path is not None: train_params.update({'log_dir': model_path}) model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub), phase=phase, save=True, init_all=False, args=train_params, rng=rng) else: model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub), phase=phase, init_all=False, args=train_params, rng=rng) # If we are not at last substitute training iteration, augment dataset if rho < data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box Y_sub = np.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub) / 2):] eval_params = {'batch_size': batch_size} bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev], feed={phase: False}, args=eval_params)[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model Y_sub[int(len(X_sub) / 2):] = np.argmax(bbox_val, axis=1) return model_sub, preds_sub
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, aug_batch_size, rng, img_rows=28, img_cols=28, nchannels=1): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. :param sess: TF session :param x: input TF placeholder :param y: output TF placeholder :param bbox_preds: output of black-box model predictions :param X_sub: initial substitute training data :param Y_sub: initial substitute training labels :param nb_classes: number of output classes :param nb_epochs_s: number of epochs to train substitute model :param batch_size: size of training batches :param learning_rate: learning rate for training :param data_aug: number of times substitute training data is augmented :param lmbda: lambda from arxiv.org/abs/1602.02697 :param rng: numpy.random.RandomState instance :return: """ # Define TF model graph (for the black-box model) model_sub = ModelSubstitute('model_s', nb_classes) preds_sub = model_sub.get_logits(x) loss_sub = LossCrossEntropy(model_sub, smoothing=0) print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, nb_classes) # Train the substitute and augment dataset alternatively for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': nb_epochs_s, 'batch_size': batch_size, 'learning_rate': learning_rate } with TemporaryLogLevel(logging.WARNING, "cleverhans.utils.tf"): train(sess, loss_sub, x, y, X_sub, to_categorical(Y_sub, nb_classes), init_all=False, args=train_params, rng=rng, var_list=model_sub.get_params()) # If we are not at last substitute training iteration, augment dataset if rho < data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation lmbda_coef = 2 * int(int(rho / 3) != 0) - 1 X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda_coef * lmbda, aug_batch_size) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box Y_sub = np.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub)/2):] eval_params = {'batch_size': batch_size} bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev], args=eval_params)[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model Y_sub[int(len(X_sub)/2):] = np.argmax(bbox_val, axis=1) return model_sub, preds_sub
def main(argv=None): """ MNIST cleverhans tutorial for the Jacobian-based saliency map approach (JSMA) :return: """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) ########################################################################### # Define the dataset and model ########################################################################### # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') print( "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'" ) # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) print("Created TensorFlow session and set Keras backend.") # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print("Loaded MNIST test data.") # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = model_mnist() predictions = model(x) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model if it does not exist in the train_dir folder saver = tf.train.Saver() save_path = os.path.join(FLAGS.train_dir, FLAGS.filename) if os.path.isfile(save_path): saver.restore(sess, os.path.join(FLAGS.train_dir, FLAGS.filename)) else: tf_model_train(sess, x, y, predictions, X_train, Y_train) saver.save(sess, save_path) # Evaluate the accuracy of the MNIST model on legitimate test examples accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### print('Crafting ' + str(FLAGS.source_samples) + ' * ' + str(FLAGS.nb_classes) + ' adversarial examples') # This array indicates whether an adversarial example was found for each # test set sample and target class results = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='i') # This array contains the fraction of perturbed features for each test set # sample and target class perturbations = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='f') # Define the TF graph for the model's Jacobian grads = jacobian_graph(predictions, x) # Loop over the samples we want to perturb into adversarial examples for sample_ind in xrange(FLAGS.source_samples): # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) target_classes = other_classes(FLAGS.nb_classes, int(np.argmax(Y_test[sample_ind]))) # Loop over all target classes for target in target_classes: print('--------------------------------------') print('Creating adversarial example for target class ' + str(target)) # This call runs the Jacobian-based saliency map approach _, result, percentage_perterb = jsma( sess, x, predictions, grads, X_test[sample_ind:(sample_ind + 1)], target, theta=1, gamma=0.1, increase=True, back='tf', clip_min=0, clip_max=1) # Update the arrays for later analysis results[target, sample_ind] = result perturbations[target, sample_ind] = percentage_perterb # Compute the number of adversarial examples that were successfuly found success_rate = float(np.sum(results)) / ( (FLAGS.nb_classes - 1) * FLAGS.source_samples) print('Avg. rate of successful misclassifcations {0}'.format(success_rate)) # Compute the average distortion introduced by the algorithm percentage_perturbed = np.mean(perturbations) print('Avg. rate of perterbed features {0}'.format(percentage_perturbed)) # Close TF session sess.close()
def generate(self, x, **kwargs): """ Generate symbolic graph for adversarial examples and return. :param x: The model's symbolic inputs. :param kwargs: See `parse_params` """ # Parse and save attack-specific parameters assert self.parse_params(**kwargs) if self.symbolic_impl: from cleverhans.attacks_tf import jsma_symbolic # Create random targets if y_target not provided if self.y_target is None: from random import randint def random_targets(gt): result = gt.copy() nb_s = gt.shape[0] nb_classes = gt.shape[1] for i in range(nb_s): result[i, :] = np.roll(result[i, :], randint(1, nb_classes - 1)) return result labels, nb_classes = self.get_or_guess_labels(x, kwargs) self.y_target = tf.py_func(random_targets, [labels], self.tf_dtype) self.y_target.set_shape([None, nb_classes]) x_adv = jsma_symbolic( x, model=self.model, y_target=self.y_target, theta=self.theta, gamma=self.gamma, clip_min=self.clip_min, clip_max=self.clip_max) else: assert self.sess is not None, \ 'Cannot use `generate` when no `sess` was provided' from cleverhans.attacks_tf import jacobian_graph, jsma_batch # Define Jacobian graph wrt to this input placeholder preds = self.model.get_probs(x) nb_classes = preds.get_shape().as_list()[-1] grads = jacobian_graph(preds, x, nb_classes) # Define appropriate graph (targeted / random target labels) if self.y_target is not None: def jsma_wrap(x_val, y_target): return jsma_batch( self.sess, x, preds, grads, x_val, self.theta, self.gamma, self.clip_min, self.clip_max, nb_classes, y_target=y_target) # Attack is targeted, target placeholder will need to be fed x_adv = tf.py_func(jsma_wrap, [x, self.y_target], self.tf_dtype) else: def jsma_wrap(x_val): return jsma_batch( self.sess, x, preds, grads, x_val, self.theta, self.gamma, self.clip_min, self.clip_max, nb_classes, y_target=None) # Attack is untargeted, target values will be chosen at random x_adv = tf.py_func(jsma_wrap, [x], self.tf_dtype) x_adv.set_shape(x.get_shape()) return x_adv
def generate_images(): print('==> Preparing data..') if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print( "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.5 sess = tf.Session(config=config) keras.backend.set_session(sess) print "==> Beginning Session" # Get CIFAR10 test data X_train, Y_train, X_test, Y_test = data_cifar10() assert Y_train.shape[1] == 10. label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Load model print "==> loading vgg model" args = load_args() if args.model == 'vgg6': model = vggbn(top=True, pool=args.pool) if args.model == 'vgg15': model = vgg15(top=True, pool=args.pool) if args.model == 'generic': model = generic(top=True, pool=args.pool) if args.model == 'resnet18': model = resnet.build_resnet_18(args.pool) predictions = model(x) model.load_weights(args.load) eval_params = {'batch_size': FLAGS.batch_size} accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args=eval_params) print '==> Accuracy : {}'.format(accuracy) def evaluate(): # Evaluate the accuracy of the CIFAR10 model on legitimate test examples eval_params = {'batch_size': FLAGS.batch_size} accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args=eval_params) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Train an CIFAR10 model train_params = { 'nb_epochs': FLAGS.nb_epochs, 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.learning_rate } im_base = '/im_' model_name = args.model + '_p' + str(args.pool) if args.attack == 'fgsm' or args.attack == 'FGSM': result_dir = os.getcwd() + '/images/fgsm/' print "==> creating fgsm adversarial wrapper" adv_x = fgsm(x, predictions, eps=0.3) print "==> sending to batch evaluator to finalize adversarial images" eval_params = {'batch_size': FLAGS.batch_size} X_train_adv, = batch_eval(sess, [x], [adv_x], [X_train], args=eval_params) i = 0 if not os.path.exists(result_dir + model_name): os.makedirs(result_dir + model_name) print "==> saving images to {}".format(result_dir + model_name) for ad in X_train_adv: scipy.misc.imsave( result_dir + model_name + im_base + str(i) + '.png', ad) i += 1 sess.close() """ JSMA """ if args.attack == 'jsma' or args.attack == 'JSMA': result_dir = os.getcwd() + '/images/jsma/trial_single_adv' print('Crafting ' + str(FLAGS.source_samples) + ' * ' + str(FLAGS.nb_classes - 1) + ' adversarial examples') results = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='i') # This array contains the fraction of perturbed features for each test set perturbations = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='f') # Define the TF graph for the model's Jacobian grads = jacobian_graph(predictions, x, FLAGS.nb_classes) # Initialize our array for grid visualization grid_shape = (FLAGS.nb_classes, FLAGS.nb_classes, FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels) grid_viz_data = np.zeros(grid_shape, dtype='f') i_saved = 0 n_image = 0 # Loop over the samples we want to perturb into adversarial examples print "==> saving images to {}".format(result_dir + model_name) for sample_ind in xrange(7166, FLAGS.source_samples): # We want to find an adversarial example for each possible target class current_class = int(np.argmax(Y_train[sample_ind])) target_classes = other_classes(FLAGS.nb_classes, current_class) # For the grid visualization, keep original images along the diagonal grid_viz_data[current_class, current_class, :, :, :] = np.reshape( X_train[sample_ind:(sample_ind + 1)], (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)) # Loop over all target classes adversarials = [] for idx, target in enumerate(target_classes): print "image {}".format(sample_ind) # here we hold all successful adversarials for this iteration # since we dont want 500k images, we will uniformly sample an image to save after each target print('--------------------------------------') print('Creating adv. example for target class ' + str(target)) # This call runs the Jacobian-based saliency map approach adv_x, res, percent_perturb = jsma( sess, x, predictions, grads, X_train[sample_ind:(sample_ind + 1)], target, theta=1, gamma=0.1, increase=True, back='tf', clip_min=0, clip_max=1) # Display the original and adversarial images side-by-side adversarial = np.reshape( adv_x, (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)) original = np.reshape( X_train[sample_ind:(sample_ind + 1)], (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)) if FLAGS.viz_enabled: if 'figure' not in vars(): figure = pair_visual(original, adversarial) else: figure = pair_visual(original, adversarial, figure) if not os.path.exists(result_dir + model_name): os.makedirs(result_dir + model_name) if res == 1: adversarials.append(adversarial) if idx == FLAGS.nb_classes - 2: try: if len(adversarials) == 1: idx_uniform = 0 else: idx_uniform = np.random.randint( 0, len(adversarials) - 1) print idx_uniform scipy.misc.imsave( result_dir + model_name + im_base + str(sample_ind) + '.png', adversarials[idx_uniform]) i_saved += 1 print "==> images saved: {}".format(i_saved) except: print "No adversarials generated" # Add our adversarial example to our grid data grid_viz_data[target, current_class, :, :, :] = np.reshape( adv_x, (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)) # Update the arrays for later analysis results[target, sample_ind] = res perturbations[target, sample_ind] = percent_perturb n_image += 1 # Compute the number of adversarial examples that were successfuly found nb_targets_tried = ((FLAGS.nb_classes - 1) * FLAGS.source_samples) succ_rate = float(np.sum(results)) / nb_targets_tried print( 'Avg. rate of successful adv. examples {0:.2f}'.format(succ_rate)) # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(perturbations) print('Avg. rate of perturbed features {0:.2f}'.format( percent_perturbed)) # Compute the average distortion introduced for successful samples only percent_perturb_succ = np.mean(perturbations * (results == 1)) print( 'Avg. rate of perturbed features for successful ' 'adversarial examples {0:.2f}'.format(percent_perturb_succ)) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if FLAGS.viz_enabled: _ = grid_visual(grid_viz_data)