class AdverseCNN: def __init__(self, nb_epochs, batch_size, learning_rate, eps = 0.3, clip_min=0, clip_max=1): self.train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } self.eval_params = {'batch_size': batch_size} self.fgsm_params = { 'eps': eps, 'clip_min': clip_min, 'clip_max': clip_max } self.x_train = None self.y_train = None self.x_test = None self.y_test = None self.range = np.random.RandomState([2019, 11, 25]) self.model = None self.preds = None self.loss = None self.img_rows = None self.img_cols = None self.nchannels = None self.nb_classes = None self.preds_adv = None def get_data(self, train_start, train_end, test_start, test_end): mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) self.x_train, self.y_train = mnist.get_set('train') self.x_test, self.y_test = mnist.get_set('test') self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4] self.nb_classes = self.y_train.shape[1] def adverse_train(self, nb_filters, label_smoothing): self.model = ModelBasicCNN('model2', self.nb_classes, nb_filters) fgsm = FastGradientMethod(self.model, sess=sess) def attack(x): return fgsm.generate(x, **self.fgsm_params) self.preds = self.model.get_logits(x) self.loss = CrossEntropy(self.model, smoothing=label_smoothing, attack=attack) adv_x = attack(x) self.preds_adv = self.model.get_logits(adv_x) train(sess, self.loss, self.x_train, self.y_train, evaluate=self.evaluate, args=self.train_params, rng=self.range, var_list=self.model.get_params()) def evaluate(self): do_eval(self.preds, self.eval_params, self.x_test, self.y_test, 'adv_train_clean_eval', False) do_eval(self.preds_adv, self.eval_params, self.x_test, self.y_test, 'adv_train_adv_eval', True) def test(self): do_eval(self.preds, self.eval_params, self.x_train, self.y_train, 'train_adv_train_clean_eval') do_eval(self.preds_adv, self.eval_params, self.x_train, self.y_train, 'train_adv_train_adv_eval')
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {'allow_soft_placement': True} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_fgsm_x = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model.get_logits(adv_fgsm_x) # Generate fgsm adversarial examples and save to disk dir = 'images/fgsm_adv/' if not os.path.exists('images'): os.mkdir('images') if not os.path.exists(dir): os.mkdir(dir) if not os.path.exists(dir + 'train/'): os.mkdir(dir + 'train/') if not os.path.exists(dir + 'test/'): os.mkdir(dir + 'test/') for index in range(len(y_test)): print('test ' + str(index)) x_ = x_test[index] label = np.argmax(y_test[index]) raw_data = (fgsm.generate_np(x_.reshape( (1, 28, 28, 1)), **fgsm_params).reshape( (28, 28)) * 255).astype('uint8') im = Image.fromarray(raw_data, mode='P') im.save(dir + 'test/' + str(label) + '_' + str(uuid.uuid4()) + '.png') for index in range(len(y_train)): print('train ' + str(index)) x_ = x_train[index] label = np.argmax(y_train[index]) raw_data = (fgsm.generate_np(x_.reshape( (1, 28, 28, 1)), **fgsm_params).reshape( (28, 28)) * 255).astype('uint8') im = Image.fromarray(raw_data, mode='P') im.save(dir + 'train/' + str(label) + '_' + str(uuid.uuid4()) + '.png') return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval') print('Repeating the process, using adversarial training') # Create a new model and train it to be robust to FastGradientMethod model2 = ModelBasicCNN('model2', nb_classes, nb_filters) fgsm2 = FastGradientMethod(model2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, x_train, y_train, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params()) # Calculate training errors if testing: do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval') do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval') return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get MNIST data mnist = MNIST(path=file, train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # x_train = x_train[0:1].reshape(784) # k = np.unique(x_train.reshape(-1, 784)) # k = list(set(x_train.reshape(784))) # nk = [k.index(x_train[x]) for x in len(x_train)] # print(k, np.shape(k), nk) ############################### # Transform image to uniimage # ############################### # x_train = convert_uniimage(x_train) # x_test = transform_4_in_1(x_test) # trans_x_text = np.copy(x_test) # x_test = convert_uniimage(x_test) # uni_x_test = np.copy(x_test) # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': save_dir, 'filename': filename, } eval_params = {'batch_size': batch_size} fgsm_params = {'eps': eps, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None, ae=None, type=None, datasetName=None, discretizeColor=1): accuracy, distortion = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params, is_adv=is_adv, ae=ae, type=type, datasetName=datasetName, discretizeColor=discretizeColor) setattr(report, report_key, accuracy) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, accuracy)) return accuracy, distortion if clean_train: model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False, type=type, datasetName="MNIST", discretizeColor=discretizeColor) saveFileNumArr = [] # saveFileNumArr = [50, 500, 1000] count = 0 appendNum = 50 while count < 1000: count = count + appendNum saveFileNumArr.append(count) distortionArr = [] accuracyArr = [] for i in range(len(saveFileNumArr)): saveFileNum = saveFileNumArr[i] model_path = os.path.join(save_dir, filename + "-" + str(saveFileNum)) print("Trying to load trained model from: " + model_path) if os.path.exists(model_path + ".meta"): tf_model_load(sess, model_path) print("Load trained model") else: # train(sess, loss, x_train, y_train, evaluate=evaluate, # args=train_params, rng=rng, var_list=model.get_params()) train_with_noise(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params(), save=save, type=type, datasetName="MNIST", retrain=retrain, discretizeColor=discretizeColor) # Calculate training error accuracy, distortion = do_eval(preds, x_test, y_test, 'train_clean_train_clean_eval', False, type=type, datasetName="MNIST", discretizeColor=discretizeColor) # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) # fgsm = BasicIterativeMethod(model, sess=sess) # fgsm = MomentumIterativeMethod(model, sess=sess) # fgsm_params = { # 'clip_min': 0., # 'clip_max': 1., # 'verbose': False # } # fgsm = HopSkipJumpAttack(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) # adv_x = fgsm.generate_np(x, **fgsm_params) # adv = sess.run(adv_x, feed_dict={x: x_test}) preds_adv = model.get_logits(adv_x) # print(sess.run(preds_adv, feed_dict={x: x_test})) ############################# # Create adversarial images # ############################# # We have to produce adversarial image 1 by 1 by using HopSkipJumpAttack # adv_test = [] # for i in range(len(x_test)): # tmp_adv_test = sess.run(adv_x, feed_dict={x: [x_test[i]]}) # adv_test.append(tmp_adv_test[0]) # if (i+1) % 100 == 0: # print((i+1),"/",len(x_test), " adversarial images") # # adv_test = np.array(adv_test) # print(np.shape(adv_test)) # Evaluate the accuracy of the MNIST model on adversarial examples # do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) # accuracy, distortion = do_eval(preds, x_test, y_test, 'clean_train_adv_eval', True, ae=adv_x, # type=type, datasetName="MNIST", discretizeColor=discretizeColor) # do_eval(preds, adv_test, y_test, 'clean_train_adv_eval', True) distortionArr.append(distortion) accuracyArr.append(accuracy) # print(str(accuracy)) # print(str(distortion)) print("accuracy:") for accuracy in accuracyArr: print(accuracy) print("distortion:") for distortion in distortionArr: print(distortion) return report
class CleanCNN: def __init__(self, nb_epochs, batch_size, learning_rate, eps = 0.3, clip_min=0, clip_max=1): self.train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } self.eval_params = {'batch_size': batch_size} self.fgsm_params = { 'eps': eps, 'clip_min': clip_min, 'clip_max': clip_max } self.x_train = None self.y_train = None self.x_test = None self.y_test = None self.range = np.random.RandomState([2019, 11, 25]) self.model = None self.preds = None self.loss = None self.img_rows = None self.img_cols = None self.nchannels = None self.nb_classes = None ''' def get_data(self, train_start, train_end, test_start, test_end): train = np.genfromtxt("drive/My Drive/train.csv", delimiter=',') test = np.genfromtxt("drive/My Drive/test.csv", delimiter=',') self.x_test = test[:,1:].astype(int) self.y_test = oneHotEncodeY(test[:,0].astype(int),47) self.x_train = train[:,1:].astype(int) self.y_train = oneHotEncodeY(train[:,0].astype(int),47) self.x_train = np.reshape(self.x_train,(self.x_train.shape[0], 28, 28, 1)) self.x_test = np.reshape(self.x_test,(self.x_test.shape[0], 28, 28, 1)) print(self.x_train.shape, self.y_train.shape, self.x_test.shape, self.y_test.shape) self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4] self.nb_classes = self.y_train.shape[1] mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) self.x_train, self.y_train = mnist.get_set('train') self.x_test, self.y_test = mnist.get_set('test') self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4] self.nb_classes = self.y_train.shape[1] ''' def get_data(self): self.x_train, self.y_train = extract_training_samples('byclass') self.x_test, self.y_test = extract_test_samples('byclass') self.y_test = oneHotEncodeY(self.y_test, 62) self.y_train = oneHotEncodeY(self.y_train, 62) self.x_train = self.x_train.astype('float32') self.y_train = self.y_train.astype('float32') self.x_test = self.x_test.astype('float32') self.y_test = self.y_test.astype('float32') #print(np.amax(self.y_train)) #print(self.x_train.shape, self.y_train.shape, self.x_test.shape, self.y_test.shape) self.x_train = self.x_train /255. self.y_train = self.y_train self.x_test = self.x_test/ 255. self.y_test = self.y_test self.x_train = np.reshape(self.x_train,(self.x_train.shape[0], 28, 28, 1)) self.x_test = np.reshape(self.x_test,(self.x_test.shape[0], 28, 28, 1)) #self.y_test = np.reshape(self.y_test,(self.y_test.shape[0],1)) #self.y_train = np.reshape(self.y_train,(self.y_train.shape[0],1)) self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4] #images = np.reshape(images,(images.shape[0], 28, 28, 1)) #self.x_train, self.y_train = mnist.get_set('train') #self.x_test, self.y_test = mnist.get_set('test') #print(type(images)) #print(images.shape[1:4]) #print(labels.shape) #print(images.shape) ''' self.x_train, self.y_train = mnist.get_set('train') self.x_test, self.y_test = mnist.get_set('test') self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4] self.nb_classes = self.y_train.shape[1] print(np.amax(self.y_train)) ''' self.nb_classes = 62 #self.x_sub = self.x_test[:s0] #self.y_sub = np.argmax(self.y_test[:s0], axis=1) #self.x_test = self.x_test[s0:] #self.y_test = self.y_test[s0:] def train(self, nb_filters, label_smoothing): self.model = ModelBasicCNN('model1', self.nb_classes, nb_filters) self.preds = self.model.get_logits(x) self.loss = CrossEntropy(self.model, smoothing=label_smoothing) train(sess, self.loss, self.x_train, self.y_train, evaluate=self.evaluate, args=self.train_params, rng=self.range, var_list=self.model.get_params()) def evaluate(self): do_eval(self.preds, self.eval_params, self.x_test, self.y_test, 'clean_train_clean_eval', False) def test(self): do_eval(self.preds, self.eval_params, self.x_train, self.y_train, 'train_clean_train_clean_eval') def adverserial_testing(self): fgsm = FastGradientMethod(self.model, sess=sess) adv_x = fgsm.generate(x, **self.fgsm_params) preds_adv = self.model.get_logits(adv_x) #Call from mail do_eval(preds_adv, self.eval_params, self.x_test, self.y_test, 'clean_train_adv_eval', True) do_eval(preds_adv, self.eval_params, self.x_train, self.y_train, 'train_clean_train_adv_eval') print('Repeating the process, using adversarial training')
class AdverseCNN: def __init__(self, nb_epochs, batch_size, learning_rate, eps = 0.3, clip_min=0, clip_max=1): self.train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } self.eval_params = {'batch_size': batch_size} self.fgsm_params = { 'eps': eps, 'clip_min': clip_min, 'clip_max': clip_max } self.x_train = None self.y_train = None self.x_test = None self.y_test = None self.range = np.random.RandomState([2019, 11, 25]) self.model = None self.preds = None self.loss = None self.img_rows = None self.img_cols = None self.nchannels = None self.nb_classes = None self.preds_adv = None def get_data(self): self.x_train, self.y_train = extract_training_samples('byclass') self.x_test, self.y_test = extract_test_samples('byclass') self.y_test = oneHotEncodeY(self.y_test, 62) self.y_train = oneHotEncodeY(self.y_train, 62) self.x_train = self.x_train.astype('float32') self.y_train = self.y_train.astype('float32') self.x_test = self.x_test.astype('float32') self.y_test = self.y_test.astype('float32') print(np.amax(self.y_train)) print(self.x_train.shape, self.y_train.shape, self.x_test.shape, self.y_test.shape) self.x_train = self.x_train /255. self.y_train = self.y_train self.x_test = self.x_test/ 255. self.y_test = self.y_test self.x_train = np.reshape(self.x_train,(self.x_train.shape[0], 28, 28, 1)) self.x_test = np.reshape(self.x_test,(self.x_test.shape[0], 28, 28, 1)) #self.y_test = np.reshape(self.y_test,(self.y_test.shape[0],1)) #self.y_train = np.reshape(self.y_train,(self.y_train.shape[0],1)) self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4] #images = np.reshape(images,(images.shape[0], 28, 28, 1)) #self.x_train, self.y_train = mnist.get_set('train') #self.x_test, self.y_test = mnist.get_set('test') print("//////////////////////////////") #print(type(images)) #print(images.shape[1:4]) #print(labels.shape) #print(images.shape) ''' self.x_train, self.y_train = mnist.get_set('train') self.x_test, self.y_test = mnist.get_set('test') self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4] self.nb_classes = self.y_train.shape[1] print(np.amax(self.y_train)) ''' self.nb_classes = 62 #self.x_sub = self.x_test[:s0] #self.y_sub = np.argmax(self.y_test[:s0], axis=1) #self.x_test = self.x_test[s0:] #self.y_test = self.y_test[s0:] def adverse_train(self, nb_filters, label_smoothing): self.model = ModelBasicCNN('model2', self.nb_classes, nb_filters) fgsm = FastGradientMethod(self.model, sess=sess) def attack(x): return fgsm.generate(x, **self.fgsm_params) self.preds = self.model.get_logits(x) self.loss = CrossEntropy(self.model, smoothing=label_smoothing, attack=attack) adv_x = attack(x) self.preds_adv = self.model.get_logits(adv_x) train(sess, self.loss, self.x_train, self.y_train, evaluate=self.evaluate, args=self.train_params, rng=self.range, var_list=self.model.get_params()) def evaluate(self): do_eval(self.preds, self.eval_params, self.x_test, self.y_test, 'adv_train_clean_eval', False) do_eval(self.preds_adv, self.eval_params, self.x_test, self.y_test, 'adv_train_adv_eval', True) def test(self): do_eval(self.preds, self.eval_params, self.x_train, self.y_train, 'train_adv_train_clean_eval') do_eval(self.preds_adv, self.eval_params, self.x_train, self.y_train, 'train_adv_train_adv_eval')
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) x_train1, y_train1 = get_train(FLAGS.train1) x_test1, y_test1 = get_test(FLAGS.test1) x_train, y_train = x_train1, y_train1 x_test, y_test = x_test1, y_test1 if (FLAGS.train2): x_train2, y_train2, x_test2, y_test2 = get_train(FLAGS.train2) x_train, y_train = zip_trains(x_train1, y_train1, x_train2, y_train2, 0.5) x_test, y_test = zip_tests(x_test1, y_test1, x_test2, y_test2, 0.5) # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) print(x) print(y) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') return report
def main(argv=None): from cleverhans_tutorials import check_installation check_installation(__file__) if not os.path.exists( CONFIG.SAVE_PATH ): os.makedirs( CONFIG.SAVE_PATH ) save_path_data = CONFIG.SAVE_PATH + 'data/' if not os.path.exists( save_path_data ): os.makedirs( save_path_data ) model_path = CONFIG.SAVE_PATH + '../all/' + CONFIG.DATASET + '/' if not os.path.exists( model_path ): os.makedirs( model_path ) os.makedirs( model_path + 'data/' ) nb_epochs = FLAGS.nb_epochs batch_size = FLAGS.batch_size learning_rate = FLAGS.learning_rate nb_filters = FLAGS.nb_filters len_x = int(CONFIG.NUM_TEST/2) start = time.time() # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set seeds to improve reproducibility if CONFIG.DATASET == 'mnist' or CONFIG.DATASET == 'cifar10': tf.set_random_seed(1234) np.random.seed(1234) rd.seed(1234) elif CONFIG.DATASET == 'moon' or CONFIG.DATASET == 'dims': tf.set_random_seed(13) np.random.seed(1234) rd.seed(0) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session tf_config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=True) tf_config.gpu_options.per_process_gpu_memory_fraction = 0.2 sess = tf.Session(config=tf_config) if CONFIG.DATASET == 'mnist': # Get MNIST data mnist = MNIST(train_start=0, train_end=CONFIG.NUM_TRAIN, test_start=0, test_end=CONFIG.NUM_TEST) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') elif CONFIG.DATASET == 'cifar10': # Get CIFAR10 data data = CIFAR10(train_start=0, train_end=CONFIG.NUM_TRAIN, test_start=0, test_end=CONFIG.NUM_TEST) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') elif CONFIG.DATASET == 'moon': # Create a two moon example X, y = make_moons(n_samples=(CONFIG.NUM_TRAIN+CONFIG.NUM_TEST), noise=0.2, random_state=0) X = StandardScaler().fit_transform(X) x_train1, x_test1, y_train1, y_test1 = train_test_split(X, y, test_size=(CONFIG.NUM_TEST/(CONFIG.NUM_TRAIN +CONFIG.NUM_TEST)), random_state=0) x_train, y_train, x_test, y_test = normalize_reshape_inputs_2d(model_path, x_train1, y_train1, x_test1, y_test1) elif CONFIG.DATASET == 'dims': X, y = make_moons(n_samples=(CONFIG.NUM_TRAIN+CONFIG.NUM_TEST), noise=0.2, random_state=0) X = StandardScaler().fit_transform(X) x_train1, x_test1, y_train1, y_test1 = train_test_split(X, y, test_size=(CONFIG.NUM_TEST/(CONFIG.NUM_TRAIN +CONFIG.NUM_TEST)), random_state=0) x_train2, y_train, x_test2, y_test = normalize_reshape_inputs_2d(model_path, x_train1, y_train1,x_test1, y_test1) x_train, x_test = add_noise_and_QR(x_train2, x_test2, CONFIG.NUM_DIMS) np.save(os.path.join(save_path_data, 'x_test'), x_test) np.save(os.path.join(save_path_data, 'y_test'), y_test) # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': 1} rng = np.random.RandomState([2017, 8, 30]) with open(CONFIG.SAVE_PATH + 'acc_param.txt', 'a') as fi: def do_eval(adv_x, preds, x_set, y_set, report_key): acc, pred_np, adv_x_np = model_eval(sess, x, y, preds, adv_x, nb_classes, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if report_key: print('Accuracy on %s examples: %0.4f' % (report_key, acc), file=fi) return pred_np, adv_x_np if CONFIG.DATASET == 'mnist': trained_model_path = model_path + 'data/trained_model' model = ModelBasicCNN('model1', nb_classes, nb_filters) elif CONFIG.DATASET == 'cifar10': trained_model_path = model_path + 'data/trained_model' model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) elif CONFIG.DATASET == 'moon': trained_model_path = model_path + 'data/trained_model' model = ModelMLP('model1', nb_classes) elif CONFIG.DATASET == 'dims': trained_model_path = save_path_data + 'trained_model' model = ModelMLP_dyn('model1', nb_classes, CONFIG.NUM_DIMS) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) def evaluate(): _, _ = do_eval(x, preds, x_test, y_test, 'test during train') if os.path.isfile( trained_model_path + '.index' ): tf_model_load(sess, trained_model_path) else: if CONFIG.DATASET == 'mnist': train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) elif CONFIG.DATASET == 'cifar10': train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) elif CONFIG.DATASET == 'moon': train_2d(sess, loss, x, y, x_train, y_train, save=False, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) elif CONFIG.DATASET == 'dims': train_2d(sess, loss, x, y, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) saver = tf.train.Saver() saver.save(sess, trained_model_path) # Evaluate the accuracy on test examples if os.path.isfile( save_path_data + 'logits_zero_attacked.npy' ): logits_0 = np.load(save_path_data + 'logits_zero_attacked.npy') else: _, _ = do_eval(x, preds, x_train, y_train, 'train') logits_0, _ = do_eval(x, preds, x_test, y_test, 'test') np.save(os.path.join(save_path_data, 'logits_zero_attacked'), logits_0) if CONFIG.DATASET == 'moon': num_grid_points = 5000 if os.path.isfile( model_path + 'data/images_mesh' + str(num_grid_points) + '.npy' ): x_mesh = np.load(model_path + 'data/images_mesh' + str(num_grid_points) + '.npy') logits_mesh = np.load(model_path + 'data/logits_mesh' + str(num_grid_points) + '.npy') else: xx, yy = np.meshgrid(np.linspace(0, 1, num_grid_points), np.linspace(0, 1, num_grid_points)) x_mesh1 = np.stack([np.ravel(xx), np.ravel(yy)]).T y_mesh1 = np.ones((x_mesh1.shape[0]),dtype='int64') x_mesh, y_mesh, _, _ = normalize_reshape_inputs_2d(model_path, x_mesh1, y_mesh1) logits_mesh, _ = do_eval(x, preds, x_mesh, y_mesh, 'mesh') x_mesh = np.squeeze(x_mesh) np.save(os.path.join(model_path, 'data/images_mesh'+str(num_grid_points)), x_mesh) np.save(os.path.join(model_path, 'data/logits_mesh'+str(num_grid_points)), logits_mesh) points_x = x_test[:len_x] points_y = y_test[:len_x] points_x_bar = x_test[len_x:] points_y_bar = y_test[len_x:] # Initialize the CW attack object and graph cw = CarliniWagnerL2(model, sess=sess) # first attack attack_params = { 'learning_rate': CONFIG.CW_LEARNING_RATE, 'max_iterations': CONFIG.CW_MAX_ITERATIONS } if CONFIG.DATASET == 'moon': out_a = compute_polytopes_a(x_mesh, logits_mesh, model_path) attack_params['const_a_min'] = out_a attack_params['const_a_max'] = 100 adv_x = cw.generate(x, **attack_params) if os.path.isfile( save_path_data + 'images_once_attacked.npy' ): adv_img_1 = np.load(save_path_data + 'images_once_attacked.npy') logits_1 = np.load(save_path_data + 'logits_once_attacked.npy') else: #Evaluate the accuracy on adversarial examples preds_adv = model.get_logits(adv_x) logits_1, adv_img_1 = do_eval(adv_x, preds_adv, points_x_bar, points_y_bar, 'test once attacked') np.save(os.path.join(save_path_data, 'images_once_attacked'), adv_img_1) np.save(os.path.join(save_path_data, 'logits_once_attacked'), logits_1) # counter attack attack_params['max_iterations'] = 1024 if CONFIG.DATASET == 'moon': out_alpha2 = compute_epsilons_balls_alpha(x_mesh, np.squeeze(x_test), np.squeeze(adv_img_1), model_path, CONFIG.SAVE_PATH) attack_params['learning_rate'] = out_alpha2 attack_params['const_a_min'] = -1 attack_params['max_iterations'] = 2048 plot_data(np.squeeze(adv_img_1), logits_1, CONFIG.SAVE_PATH+'data_pred1.png', x_mesh, logits_mesh) adv_adv_x = cw.generate(x, **attack_params) x_k = np.concatenate((points_x, adv_img_1), axis=0) y_k = np.concatenate((points_y, logits_1), axis=0) if os.path.isfile( save_path_data + 'images_twice_attacked.npy' ): adv_img_2 = np.load(save_path_data + 'images_twice_attacked.npy') logits_2 = np.load(save_path_data + 'logits_twice_attacked.npy') else: # Evaluate the accuracy on adversarial examples preds_adv_adv = model.get_logits(adv_adv_x) logits_2, adv_img_2 = do_eval(adv_adv_x, preds_adv_adv, x_k, y_k, 'test twice attacked') np.save(os.path.join(save_path_data, 'images_twice_attacked'), adv_img_2) np.save(os.path.join(save_path_data, 'logits_twice_attacked'), logits_2) if CONFIG.DATASET == 'moon': plot_data(np.squeeze(adv_img_2[:len_x]), logits_2[:len_x], CONFIG.SAVE_PATH+'data_pred2.png', x_mesh, logits_mesh) plot_data(np.squeeze(adv_img_2[len_x:]), logits_2[len_x:], CONFIG.SAVE_PATH+'data_pred12.png', x_mesh, logits_mesh) test_balls(np.squeeze(x_k), np.squeeze(adv_img_2), logits_0, logits_1, logits_2, CONFIG.SAVE_PATH) compute_returnees(logits_0[len_x:], logits_1, logits_2[len_x:], logits_0[:len_x], logits_2[:len_x], CONFIG.SAVE_PATH) if x_test.shape[-1] > 1: num_axis=(1,2,3) else: num_axis=(1,2) D_p = np.squeeze(np.sqrt(np.sum(np.square(points_x-adv_img_2[:len_x]), axis=num_axis))) D_p_p = np.squeeze(np.sqrt(np.sum(np.square(adv_img_1-adv_img_2[len_x:]), axis=num_axis))) D_p_mod, D_p_p_mod = modify_D(D_p, D_p_p, logits_0[len_x:], logits_1, logits_2[len_x:], logits_0[:len_x], logits_2[:len_x]) if D_p_mod != [] and D_p_p_mod != []: plot_violins(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH) threshold_evaluation(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH) _ = compute_auroc(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH) plot_results_models(len_x, CONFIG.DATASET, CONFIG.SAVE_PATH) print('Time needed:', time.time()-start) return report