def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=64, num_threads=None, label_smoothing=0.1): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST test data x_train, y_train, x_test, y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, x, y, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval') print('Repeating the process, using adversarial training') print("after clean train>>>>>>>>>>>>>>>>>>>>>") # Create a new model and train it to be robust to FastGradientMethod model2 = ModelBasicCNN('model2', nb_classes, nb_filters) fgsm2 = FastGradientMethod(model2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, x, y, x_train, y_train, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params()) # Calculate training errors if testing: do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval') do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval') return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, backprop_through_attack=False, nb_filters=64, num_threads=None, label_smoothing=0.1): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} #sess = tf.Session(config=tf.ConfigProto(**config_args)) sess = tf.Session(config=tf.ConfigProto(device_count={'GPU': 1})) # Get MNIST test data x_train, y_train, x_test, y_test = data_mnist(file, train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] rng = np.random.RandomState([2017, 8, 30]) ################ color training initialization #################### color_training_epochs = 5000 color_learning_rate = 0.1 colorCategory = [ [0.0, 0.4], # Black [0.3, 0.7], # Grey [0.6, 1.0] # White ] numOfPRModel = 20 minColorEpoch = 300 maxColorEpoch = 3000 numColorInput = 1 #numColorOutput = len(colorCategory) color_x = tf.placeholder( tf.float32, [None, numColorInput]) # mnist data image of shape 28*28=784 color_y = tf.placeholder( tf.float32, [None, numColorOutput]) # 0-9 digits recognition => 10 classes # Set multiple models' weights and biases color_W = {} color_b = {} color_pred_out = {} color_cost = {} color_optimizer = {} color_argmax = {} color_correct_prediction = {} color_accuracy = {} for i in range(numOfPRModel): color_W["w" + str(i)] = tf.Variable( tf.random_normal([numColorInput, numColorOutput])) color_b["b" + str(i)] = tf.Variable(tf.random_normal([numColorOutput])) color_pred_out["out" + str(i)] = tf.matmul( color_x, color_W["w" + str(i)]) + color_b["b" + str(i)] # Softmax color_cost["cost" + str(i)] = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=color_pred_out["out" + str(i)], labels=color_y)) # Gradient Descent color_optimizer["opt" + str(i)] = tf.train.GradientDescentOptimizer( color_learning_rate).minimize(color_cost["cost" + str(i)]) # Test model color_argmax["argmax" + str(i)] = tf.argmax( color_pred_out["out" + str(i)], 1) color_correct_prediction["pred" + str(i)] = tf.equal( tf.argmax(color_pred_out["out" + str(i)], 1), tf.argmax(color_y, 1)) # Calculate accuracy color_accuracy["acc" + str(i)] = tf.reduce_mean( tf.cast(color_correct_prediction["pred" + str(i)], tf.float32)) # Graph for re-generating the original image into a new image by using trained color model pr_model_x = tf.placeholder( tf.float32, [None, n_input, numColorInput]) # mnist data image of shape 28*28=784 pr_model_W = tf.placeholder(tf.float32, [None, numColorInput, numColorOutput ]) # mnist data image of shape 28*28=784 pr_model_b = tf.placeholder(tf.float32, [None, numColorInput, numColorOutput ]) # mnist data image of shape 28*28=784 pr_model_output = tf.one_hot( tf.argmax((tf.matmul(pr_model_x, pr_model_W) + pr_model_b), 2), numColorOutput) # Merge the random generated output for new image based on the colorCategory randomColorCategory = [] for i in range(len(colorCategory)): tmp = [] tmpRandomColorCategory = my_tf_round( tf.random_uniform(tf.shape(pr_model_x), colorCategory[i][0], colorCategory[i][1], dtype=tf.float32), 2) tmp.append(tmpRandomColorCategory) randomColorCategory.append(tf.concat(tmp, 1)) random_merge = tf.reshape(tf.concat(randomColorCategory, -1), [-1, n_input, numColorOutput]) random_color_set = tf.reduce_sum( tf.multiply(pr_model_output, random_merge), 2) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) # x = tf.reshape(random_color_set, shape=(-1, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) print(random_color_set) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': save_dir, 'filename': filename, 'numColorOutput': numColorOutput } eval_params = {'batch_size': batch_size, 'numColorOutput': numColorOutput} fgsm_params = {'eps': 8 / 256, 'clip_min': 0., 'clip_max': 1.} #sess = tf.Session() def do_eval(preds, x_set, y_set, report_key, is_adv=None, pred2=None, c_w=None, c_b=None, pr_model_x=None, random_color_set=None, pr_model_W=None, pr_model_b=None, pr_model_output=None, ae=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params, pred2=pred2, c_w=c_w, c_b=c_b, pr_model_x=pr_model_x, random_color_set=random_color_set, pr_model_W=pr_model_W, pr_model_b=pr_model_b, pr_model_output=pr_model_output, is_adv=is_adv, ae=ae) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) with sess.as_default(): if hasattr(tf, "global_variables_initializer"): tf.global_variables_initializer().run() else: warnings.warn("Update your copy of tensorflow; future versions of " "CleverHans may drop support for this version.") sess.run(tf.initialize_all_variables()) ################# color training #################### print("Trying to load pr model from: " + model_path2) if os.path.exists(model_path2 + ".meta"): tf_model_load(sess, model_path2) c_w, c_b = sess.run([color_W, color_b]) print("Load color trained model in training") else: # Training the PR model c_w = {} c_b = {} for modelcount in range(numOfPRModel): color_training_epochs = np.random.randint( minColorEpoch, maxColorEpoch) for epoch in range(color_training_epochs): outputColorY = [] p1 = np.random.random(100) for i in range(len(p1)): outputOverlapColorY = [] for j in range(len(colorCategory)): if p1[i] >= colorCategory[j][0] and p1[ i] <= colorCategory[j][1]: colorIndexSeq = [] for k in range(len(colorCategory)): if j == k: colorIndexSeq.append(1) else: colorIndexSeq.append(0) outputOverlapColorY.append(colorIndexSeq) # break # Randomly choose the output for color Y if the outputOverlapColorY has more than 1 item outputColorY.append( outputOverlapColorY[np.random.randint( 0, len(outputOverlapColorY))]) inputColorX = p1.reshape(100, 1) _, c, _c_w, _c_b = sess.run([ color_optimizer["opt" + str(modelcount)], color_cost["cost" + str(modelcount)], color_W["w" + str(modelcount)], color_b["b" + str(modelcount)] ], feed_dict={ color_x: inputColorX, color_y: outputColorY }) avg_cost = c # Evaluating color model outputColorY = [] p1 = np.random.random(100) # Generate output for random color inputs (test case) for i in range(len(p1)): for j in range(len(colorCategory)): outputOverlapColorY = [] if p1[i] >= colorCategory[j][0] and p1[ i] <= colorCategory[j][1]: colorIndexSeq = [] for k in range(len(colorCategory)): if j == k: colorIndexSeq.append(1) else: colorIndexSeq.append(0) outputOverlapColorY.append(colorIndexSeq) break # Randomly choose the output for color Y if the outputOverlapColorY has more than 1 item outputColorY.append( outputOverlapColorY[np.random.randint( 0, len(outputOverlapColorY))]) inputColorX = p1.reshape(100, 1) # print(random_xs) acc, argmax = sess.run([ color_accuracy["acc" + str(modelcount)], color_argmax["argmax" + str(modelcount)] ], feed_dict={ color_x: inputColorX, color_y: outputColorY }) print(str(modelcount + 1) + ") Epoch:", '%04d' % (epoch + 1) + "/" + str(color_training_epochs) + ", Cost= " + \ "{:.9f}".format(avg_cost) + ", Training Accuracy= " + \ "{:.5f}".format(acc) + " ") c_w["w" + str(modelcount)] = _c_w c_b["b" + str(modelcount)] = _c_b # print(c_w) save_path = os.path.join(save_dir2, filename2) saver = tf.train.Saver() saver.save(sess, save_path) ##################### end of color training ------------------------------ ################# model training #################### if clean_train: model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = LossCrossEntropy(model, smoothing=label_smoothing) # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph saveFileNum = 50 # saveFileNum = 500 # saveFileNum = 1000 model_path = os.path.join(save_dir, filename + "-" + str(saveFileNum)) fgsm = FastGradientMethod(model) # fgsm = BasicIterativeMethod(model) # fgsm = MomentumIterativeMethod(model) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False, pred2=preds, c_w=c_w, c_b=c_b, pr_model_x=pr_model_x, random_color_set=random_color_set, pr_model_W=pr_model_W, pr_model_b=pr_model_b) #do_eval(preds, x_test, y_test, 'clean_train_adv_eval', True, #pred2=preds, c_w=c_w, c_b=c_b, ae=adv_x, #pr_model_x=pr_model_x, random_color_set=random_color_set, #pr_model_W=pr_model_W, pr_model_b=pr_model_b, pr_model_output=pr_model_output #) print("Trying to load trained model from: " + model_path) if os.path.exists(model_path + ".meta"): tf_model_load(sess, model_path) print("Load trained model") else: train(sess, loss, x, y, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params(), save=True, c_w=c_w, c_b=c_b, pr_model_x=pr_model_x, random_color_set=random_color_set, pr_model_W=pr_model_W, pr_model_b=pr_model_b) # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds, x_test, y_test, 'clean_train_adv_eval', True, pred2=preds, c_w=c_w, c_b=c_b, ae=adv_x, pr_model_x=pr_model_x, random_color_set=random_color_set, pr_model_W=pr_model_W, pr_model_b=pr_model_b) # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')
def mnist_tutorial(train_start=0, train_end=1000, test_start=0, test_end=1666, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, backprop_through_attack=False, nb_filters=64, num_threads=None): # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST test data x_train, y_train, x_test, y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) sess = tf.Session() def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' # added by hhkim # print('cur:', y_set) # feed_dict = {x: x_set} # probabilities = sess.run(preds, feed_dict) # print(probabilities) else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = ModelBasicCNN('model1', 10, nb_filters) preds = model.get_logits(x) loss = LossCrossEntropy(model, smoothing=0.1) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, x, y, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) print('adv_x shape:', adv_x.shape) # Get array of output # updated by hak hyun kim feed_dict = {x: x_test[:1]} probabilities = sess.run(preds_adv, feed_dict) print(probabilities) print('original answer :', y_test[:1]) # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds_adv, x_test[:1], y_test[:1], 'clean_train_adv_eval', True) # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval') print('Repeating the process, using adversarial training')
print("STEP 3: Start training model...") x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) sess = tf.Session(config=tf.ConfigProto(**config_args)) model = ModelBasicCNN('model1', nb_classes, NB_FILTERS) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) train(sess, loss, x_train, y_train, evaluate=None, args=train_params, rng=rng, var_list=model.get_params()) fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) adv_image = adv_x.eval(session=sess, feed_dict={x: my_data}) # 快速显示,debug用 # plt.imshow(adv_image[0,:,:,0]) # plt.show() # 是否生成对抗图片 print("STEP 4: Build melicious data...") printimage = True name_num = 0 directory = "image_adv_" + str(datetime.datetime.now()) if printimage is True:
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) #x########################################################## #Tensor("Placeholder:0", shape=(?, 28, 28, 1), dtype=float32) #<class 'tensorflow.python.framework.ops.Tensor'> ########################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) return acc if clean_train: model = ModelBasicCNN( 'model1', nb_classes, nb_filters ) # <cleverhans_tutorials.tutorial_models.ModelBasicCNN object at 0x7f81feaae240> preds = model.get_logits( x ) # Tensor("model1_1/dense/BiasAdd:0", shape=(?, 10), dtype=float32) loss = CrossEntropy( model, smoothing=label_smoothing ) # <cleverhans.loss.CrossEntropy object at 0x7f819466b470> def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = ProjectedGradientDescent( model, sess=sess ) # TODO # <cleverhans.attacks.FastGradientMethod object at 0x7feabc77ce80> start = time.time() adv_x = fgsm.generate( x, **fgsm_params ) # Tensor("Identity_1:0", shape=(?, 28, 28, 1), dtype=float32) #imagetest = np.squeeze(adv_x) #plt.imshow(imagetest) preds_adv = model.get_logits( adv_x ) # Tensor("model1_5/dense/BiasAdd:0", shape=(?, 10), dtype=float32) end = time.time() a = end - start print("Attack time = ") print(a) print("") #Tensor("Identity_1:0", shape=(?, 28, 28, 1), dtype=float32) #Tensor("model1_5/dense/BiasAdd:0", shape=(?, 10), dtype=float32) # Evaluate the accuracy of the MNIST model on adversarial examples start = time.time() acc_result = do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) end = time.time() b = end - start print("") print("Inference function time = ") print(b) print("") values = [b, acc_result * 100, 0, 0, 0] x_labels = [ 'Time(s)', 'Accuracy(%)', '', 'Method2 Time(s)', 'Method2 Accuracy(%)' ] plt.bar(x_labels, values) plt.show() # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval') print("END!")
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, backprop_through_attack=False, nb_filters=64, num_threads=None, label_smoothing=True): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST test data x_train, y_train, x_test, y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] if label_smoothing: label_smooth = .1 y_train = y_train.clip(label_smooth / (nb_classes-1), 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} fgsm_params = { 'eps': 0.3, 'clip_min': 0., 'clip_max': 1. } rng = np.random.RandomState([2017, 8, 30]) sess = tf.Session() def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = LossCrossEntropy(model, smoothing=0.1) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, x, y, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval') print('Repeating the process, using adversarial training') # Create a new model and train it to be robust to FastGradientMethod model2 = ModelBasicCNN('model2', nb_classes, nb_filters) fgsm2 = FastGradientMethod(model2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) loss2 = LossCrossEntropy(model2, smoothing=0.1, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, x, y, x_train, y_train, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params()) # Calculate training errors if testing: do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval') do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval') return report