def __init__(self, filename=None, absolute=False, tolerance=0.1): try: if filename is not None: f = open(filename, 'r') # Get the scan's pose (stored in cartesian form) l = f.readline() scan_loc = reg.findall(l)[0] self.posx = float(scan_loc[0]) self.posy = float(scan_loc[1]) self.rot = float(scan_loc[2]) self.scan_points = [] if absolute: self.scan_points.append(polar2origincartesian(self, scan_loc[3], scan_loc[4])) else: self.scan_points.append(polar2cartesian(scan_loc[3], scan_loc[4])) line = f.readline() # Data is in polar form while line: coords = map(float, reg.findall(line)[0][5:7]) if absolute: self.scan_points.append(polar2origincartesian(self, coords[0], coords[1])) else: self.scan_points.append(polar2cartesian(coords[0], coords[1])) line = f.readline() f.close() except ValueError as e: print "Error in file: ", filename raise e self.scan_points = subsample(self.scan_points, tolerance)
def subsample_and_serialize(data_root, in_folder, out_folder): """Read MatLab files, optionally attenuate and subsample, and write to TFRecords files NOTE - place test set files in different folder than training mat files Arguments: data_root -- root folder for project's data in_folder -- folder where training set .mat files are located out_folder -- folder where *.train and *.valid Protobuf files will be written """ raw_folder = os.path.join(data_root, in_folder) file_names = filter(lambda file_name: file_name.endswith(".mat"), os.listdir(raw_folder)) preprocessed_dir = os.path.join(data_root, out_folder) if not os.path.exists(preprocessed_dir): os.mkdir(preprocessed_dir) for mat_file_name in file_names: train_file_name = os.path.join(preprocessed_dir, mat_file_name.replace(".mat", ".train")) valid_file_name = os.path.join(preprocessed_dir, mat_file_name.replace(".mat", ".valid")) if os.path.exists(train_file_name) and os.path.exists(valid_file_name): print("Skipping existing file:", train_file_name) print("Skipping existing file:", valid_file_name) continue label = get_label(mat_file_name) try: data = mat_to_data(os.path.join(raw_folder, mat_file_name)) except ValueError: print("Skipping broken file:", mat_file_name) continue xs = data["data"] xs = normalize(xs) if SUBSAMPLE: xs = subsample(xs, channels=CHANNELS, rate=SUBSAMPLE_RATE) num_windows = xs.shape[0] // WINDOW_SIZE xs = np.reshape(xs, (num_windows, WINDOW_SIZE, CHANNELS)) train_writer = tf.python_io.TFRecordWriter(train_file_name) valid_writer = tf.python_io.TFRecordWriter(valid_file_name) print("Writing file:", train_file_name) print("Writing file:", valid_file_name) for idx, x in enumerate(xs): example = to_example_proto(x, label) if idx % 20 == 0: valid_writer.write(example.SerializeToString()) else: train_writer.write(example.SerializeToString()) train_writer.close() valid_writer.close()
def _sample_from_m_matches(m): indicators = tf.equal(num_matches, tf.cast(m, tf.float32)) ### debug # hist = tf.bincount(tf.cast(num_matches, tf.int32), minlength=n, maxlength=n) #indicators = tf.Print(indicators, [m, self._k, num_matches, hist], # summarize=1000) #### if self._subsample_hard_examples: return util.topk_or_pad_inds_with_resampling( indicators, difficulties, num_samples) else: return util.subsample(indicators, num_samples)
def __init__(self, filename, numpoints, samplesize=-1, tolerance=0.1): self.points = [] self.minX = sys.maxint self.minY = sys.maxint self.maxX = -sys.maxint - 1 self.maxY = -sys.maxint - 1 try: if filename is not None: with open(filename, 'r') as f: l = f.readline() while l: point = map(float, l.rstrip('\n').split(",")) if point[0] < self.minX: self.minX = point[0] elif point[0] > self.maxX: self.maxX = point[0] if point[1] < self.minY: self.minY = point[1] elif point[1] > self.maxY: self.maxY = point[1] self.points.append(point) l = f.readline() # Data is in polar form except ValueError as e: print "Error in file: ", filename raise e self.points = subsample(self.points, tolerance) dimX = self.maxX - self.minX dimY = self.maxY - self.minY Xpoints = int( math.sqrt(((dimX * numpoints) / dimY) + (math.pow(dimX - dimY, 2) / (4 * (dimY**2)))) - ((dimX - dimY) / (2 * dimY))) Ypoints = int(numpoints / (Xpoints)) self.Xstep = dimX / (Xpoints - 1) self.Ystep = dimY / (Ypoints - 1) self.grid = [] for x in trange(Xpoints): row = [] for y in range(Ypoints): Xrange = (x * self.Xstep, (x + 1) * self.Xstep) Yrange = (y * self.Xstep, (y + 1) * self.Ystep) hasPoint = False for point in self.points: if inRange(point[0], Xrange) & inRange(point[1], Yrange): hasPoint = True break row.append(1 if hasPoint else 0) self.grid.append(row)
def __init__(self, filename=None, samplesize=-1, tolerance=0.1): self.points = [] try: if filename is not None: with open(filename, 'r') as f: l = f.readline() while l: point = l.rstrip('\n').split(",") self.points.append(map(float, point)) l = f.readline() # Data is in polar form except ValueError as e: print "Error in file: ", filename raise e self.points = subsample(self.points, tolerance)
def generate_test_segment(data_root, test_folder="test"): """ Emit preprocessed segment along with filename Already chopped up and ready to send windows into feed dict """ test_path = os.path.join(data_root, test_folder) file_names = filter(lambda x: x.endswith(".mat"), os.listdir(test_path)) for file_name in file_names: file_path = os.path.join(test_path, file_name) data = mat_to_data(file_path) segment = data["data"] segment = normalize(segment) if SUBSAMPLE: segment = subsample(segment, channels=CHANNELS, rate=SUBSAMPLE_RATE) num_windows = segment.shape[0] // WINDOW_SIZE segment = np.reshape(segment, (num_windows, WINDOW_SIZE, CHANNELS)) yield segment, file_name
val_result = np.load('dataset/val_result_mtrx.npy') annotations_val = '/root/MedleyDB_selected/Annotations/Melody_Annotations/MELODY1/val/' val_set = PitchEstimationDataSet(annotations_val, '/root/data/val/', sr_ratio=2, audio_type='MIX') val_pitches = [] for i in range(val_result.shape[0]): pitch_frame = val_result[i] max = -1 max_pitch = 0 for j in range(val_result.shape[1]): if (pitch_frame[j][0] > max): max = pitch_frame[j][0] max_pitch = pitch_frame[j][1] val_pitches.append(max_pitch) val_pitches = np.asarray(val_pitches) val_labels = [] for pitches in val_set.pitches: val_labels += pitches val_labels = np.asarray(val_labels) sampled_val_pitches = util.subsample(val_pitches, val_labels) labels = range(109) cnf_matrix = confusion_matrix(val_labels, sampled_val_pitches, labels=labels) plot_confusion_matrix(cnf_matrix, title='cnf matrix')
def main(argv): print("Start Main") # Set arguments: Save_Dir Structure Learning_Rate Earling_Stoping Batch_Size Data_Dir epochs = FLAGS.epochs data_dir = FLAGS.data_dir save_dir = FLAGS.save_dir learning_rate = FLAGS.lr early_stop = FLAGS.early_stop batch_size = FLAGS.batch_size reg_coeff = FLAGS.reg_coeff split = FLAGS.split master = FLAGS.master checkpoint_path = FLAGS.checkpoint_path input_dir = FLAGS.input_dir output_dir = FLAGS.output_dir image_width = FLAGS.image_width image_height = FLAGS.image_height num_classes = FLAGS.num_classes eps = FLAGS.eps batch_shape = [batch_size, image_height, image_width, 3] num_ens = FLAGS.num_ens tf.logging.set_verbosity(tf.logging.INFO) (x_train, y_train), (x_test, y_test) = mnist.load_data() if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, image_width, image_height) x_test = x_test.reshape(x_test.shape[0], 1, image_width, image_height) input_shape = (1, image_width, image_height) else: x_train = x_train.reshape(x_train.shape[0], image_width, image_height, 1) x_test = x_test.reshape(x_test.shape[0], image_width, image_height, 1) input_shape = (image_width, image_height, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 #Our model architecture for MNIST dataset def model_arch(): model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) return model y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) sess = tf.Session() keras.backend.set_session(sess) x_noisy = util.add_gaussian_noise(x_train, 0, 64) #Add gaussian noise to all images preds_ens = np.zeros( (x_test.shape[0], 10) ) #variable to store the predictions of each model in the ensemble (10) max_vote_ens = np.zeros( x_test.shape[0] ) #variable to store Majority vote from all models in ensemble for i in range(num_ens): model = model_arch( ) #Build a new model architecture for every model in the ensemble sub_imgs, sub_labels = util.subsample( x_noisy, y_train) #subsample from the entire data, bagging model.fit(sub_imgs, sub_labels, batch_size=batch_size, epochs=epochs, verbose=1) #train the model model.save("models/mnist/" + str(i) + ".h5") #save the model ans = sess.run(tf.argmax(model.predict(x_test), axis=1)) #get the predictions of the model preds_ens[:, i] = ans.reshape( (x_test.shape[0]) ) #store the predictions of this particular model(i) in ith column of pred_ens variable del model #erase the model #Now the variable pred_ens consists of the predictions of all test_data for each model in ensemble. #ith column contains predictions of ith model. #go through every row ens_acc = np.zeros(num_ens) for i in range(num_ens): for j in range(preds_ens.shape[0]): b = Counter( preds_ens[j][0:i + 1] ) #get the entire row which consists of predictions for that particular instance from all models. max_vote_ens[j] = b.most_common(1)[0][ 0] #get the maximum vote i.e which number has more frequency. ens_acc_i = sess.run( tf.reduce_mean( tf.cast(tf.equal(max_vote_ens, tf.argmax(y_test, axis=1)), tf.float32))) ens_acc[i] = ens_acc_i #accuracy of ensemble #TODO print the nonperturbed test accuracy to the output file. #Build a model for normal training on the entire noisy data. model = model.model_arch() model.fit(x_noisy, y_train, batch_size=batch_size, epochs=epochs, verbose=1) acc = model.evaluate(x_test, y_test, verbose=0) acc_noisy_normal = acc[1] #accuracy of normal model on noisy train data del model #Build a new model for normal training (without ensemble) on entire train data (with out bagging and noise). model = model_arch() model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1) acc = model.evaluate(x_test, y_test, verbose=0) model.save("models/original_model.h5") #accuracy of normal model acc_normal = acc[1] #generate fgsm adversarial examples on test_data adv_fgsm = util.fgsm_attack(x_test, model, sess) acc_fgsm = model.evaluate(adv_fgsm, y_test, verbose=0) acc_fgsm = acc_fgsm[ 1] #accuracy of normal model on fgsm adversarial examples #generate bim adversarial examples on test_data adv_bim = util.bim_attack(x_test, model, sess) acc_bim = model.evaluate(adv_bim, y_test, verbose=0) acc_bim = acc_bim[1] #accuracy of normal model on bim adversarial examples #generate lbfgs adversarial examples on test_data # The target is chosen as 6 adv_lbfgs = util.lbfgs_attack(x_test, model, sess, 6) acc_lbfgs = model.evaluate(adv_lbfgs, y_test, verbose=0) acc_lbfgs = acc_lbfgs[ 1] #accuracy of normal model on lbfgs adversarial examples preds_ens_fgsm = np.zeros( (x_test.shape[0], 10) ) #variable to store the predictions of each model in the ensemble (10) for fgsm adversarial examples max_vote_ens_fgsm = np.zeros( x_test.shape[0] ) #variable to store Majority vote from all models in ensemble for fgsm adversarial examples preds_ens_bim = np.zeros( (x_test.shape[0], 10) ) #variable to store the predictions of each model in the ensemble (10) for bim adversarial examples max_vote_ens_bim = np.zeros( x_test.shape[0] ) #variable to store Majority vote from all models in ensemble for bim adversarial examples preds_ens_lbfgs = np.zeros( (x_test.shape[0], 10) ) #variable to store the predictions of each model in the ensemble (10) for lbfgs adversarial examples max_vote_ens_lbfgs = np.zeros( x_test.shape[0] ) #variable to store Majority vote from all models in ensemble for lbfgs adversarial examples del model for i in range(num_ens): model = load_model("models/" + str(i) + ".h5") #get predictions of model i for fgsm adversarial examples ans = sess.run(tf.argmax(model.predict(adv_fgsm), axis=1)) preds_ens_fgsm[:, i] = ans.reshape((adv_fgsm.shape[0])) #get predictions of model i for bim adversarial examples ans = sess.run(tf.argmax(model.predict(adv_bim), axis=1)) preds_ens_bim[:, i] = ans.reshape((adv_bim.shape[0])) #get predictions of model i for lbfgs adversarial examples ans = sess.run(tf.argmax(model.predict(adv_lbfgs), axis=1)) preds_ens_lbfgs[:, i] = ans.reshape((adv_lbfgs.shape[0])) del model #Now the variable pred_ens consists of the predictions of all fgsm adversarial test_data for each model in ensemble. #ith column contains predictions of ith model. #go through every row ens_acc_fgsm = np.zeros(num_ens) for i in range(num_ens): for j in range(preds_ens_fgsm.shape[0]): b = Counter( preds_ens_fgsm[j][0:i + 1] ) #get the entire row which consists of predictions for that particular instance from all models. max_vote_ens_fgsm[j] = b.most_common(1)[0][ 0] #get the maximum vote i.e which number has more frequency. #accuracy of ensemble ens_acc_fgsm_i = sess.run( tf.reduce_mean( tf.cast(tf.equal(max_vote_ens_fgsm, tf.argmax(y_test, axis=1)), tf.float32))) ens_acc_fgsm[i] = ens_acc_fgsm_i #Now the variable pred_ens consists of the predictions of all bim adversarial test_data for each model in ensemble. #ith column contains predictions of ith model. #go through every row ens_acc_bim = np.zeros(num_ens) for i in range(num_ens): for j in range(preds_ens_bim.shape[0]): b = Counter(preds_ens_bim[j][0:i + 1]) max_vote_ens_bim[j] = b.most_common(1)[0][0] #accuracy of ensemble on bim_adv ens_acc_bim_i = sess.run( tf.reduce_mean( tf.cast(tf.equal(max_vote_ens_bim, tf.argmax(y_test, axis=1)), tf.float32))) ens_acc_bim[i] = ens_acc_bim_i #Now the variable pred_ens consists of the predictions of all lbfgs adversarial test_data for each model in ensemble. #ith column contains predictions of ith model. #go through every row ens_acc_lbfgs = np.zeros(num_ens) for i in range(num_ens): for i in range(preds_ens_lbfgs.shape[0]): b = Counter(preds_ens_lbfgs[j][0:i + 1]) max_vote_ens_lbfgs[j] = b.most_common(1)[0][0] #accuracy of ensemble on lbfgs_adv ens_acc_lbfgs_i = sess.run( tf.reduce_mean( tf.cast( tf.equal(max_vote_ens_lbfgs, tf.argmax(y_test, axis=1)), tf.float32))) ens_acc_lbfgs[i] = ens_acc_lbfgs_i #-----------------------------------Adversarial Training-------------------------------------------------------------- #first adversarial examples are generated using train_data, then the model is trained on train_data+adv_train_data. #Then the model is tested on normal test_data, then the model is tested on adversarial_test_data. #So, we are generating the adversarial examples twice both on train and test data. model = load_model("models/original_model.h5") wrap = KerasModelWrapper(model) #generate adversarial examples on train data. adv_fgsm_train = util.fgsm_attack(x_train, model, sess) adv_bim_train = util.bim_attack(x_train, model, sess) adv_lbfgs_train = util.lbfgs_attack(x_train, model, sess, 6) train_plus_adv_fgsm = np.concatenate([x_train, adv_fgsm_train]) y_train_plus_adv_fgsm = np.concatenate([y_train, y_train]) train_plus_adv_bim = np.concatenate([x_train, adv_bim_train]) y_train_plus_adv_bim = np.concatenate([y_train, y_train]) train_plus_adv_lbfgs = np.concatenate([x_train, adv_lbfgs_train]) y_train_plus_adv_lbfgs = np.concatenate([y_train, y_train]) del model #FGSM TRAINING #build a fresh model for fgsm training model = model_arch() wrap = KerasModelWrapper(model) model.fit(train_plus_adv_fgsm, y_train_plus_adv_fgsm, batch_size=batch_size, epochs=epochs, verbose=1) model.save("models/mnist_fgsm_model.h5") fgsm_acc_train = model.evaluate(x_test, y_test, verbose=0) fgsm_acc_train[ 1] #Accuracy of adversarially trained model on clean examples #generate adversarial examples for adversarially trained model on test_data adv_fgsm_test = util.fgsm_attack(x_test, model, sess) fgsm_adv_acc_train = model.evaluate(adv_fgsm_test, y_test, verbose=0) fgsm_adv_acc_train[ 1] #Accuracy of adversarially trained model on adv_test images del model #BIM TRAINING #build a fresh model for bim training model = model_arch() wrap = KerasModelWrapper(model) model.fit(train_plus_adv_bim, y_train_plus_adv_bim, batch_size=batch_size, epochs=epochs, verbose=1) bim_acc_train = model.evaluate(x_test, y_test, verbose=0) bim_acc_train[ 1] #Accuracy of adversarially trained model on clean examples #generate adversarial examples for adversarially trained model on test_data adv_bim_test = util.bim_attack(x_test, model, sess) bim_adv_acc_train = model.evaluate(adv_bim_test, y_test, verbose=0) bim_adv_acc_train[ 1] #Accuracy of adversarially trained model on adv_test images del model #LBFGS TRAINING #build a fresh model for lbfgs training model = model_arch() wrap = KerasModelWrapper(model) model.fit(train_plus_adv_lbfgs, y_train_plus_adv_lbfgs, batch_size=batch_size, epochs=epochs, verbose=1) lbfgs_acc_train = model.evaluate(x_test, y_test, verbose=0) #Accuracy of adversarially trained model on clean examples lbfgs_acc_train[1] adv_lbfgs_test = util.lbfgs_attack(x_test, model, sess, 6) lbfgs_adv_acc_train = model.evaluate(adv_lbfgs_test, y_test, verbose=0) lbfgs_adv_acc_train[ 1] #Accuracy of adversarially trained model on adv_test images del model
def main(argv): print("Start Main") # Set arguments: Save_Dir Structure Learning_Rate Earling_Stoping Batch_Size Data_Dir data_dir = FLAGS.data_dir save_dir = FLAGS.save_dir learning_rate = FLAGS.lr early_stop = FLAGS.early_stop batch_size = FLAGS.batch_size reg_coeff = FLAGS.reg_coeff split = FLAGS.split master = FLAGS.master checkpoint_path = FLAGS.checkpoint_path input_dir = FLAGS.input_dir output_dir = FLAGS.output_dir image_width = FLAGS.image_width image_height = FLAGS.eps num_classes = FLAGS.num_classes eps = FLAGS.eps batch_shape = [batch_size, image_height, image_width, 3] input_shape = [image_height, image_width, 3] tf.logging.set_verbosity(tf.logging.INFO) def model_arch(): model = Sequential() model.add( Conv2D(50, kernel_size=(5, 5), activation='relu', input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(100, (5, 5), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(200, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(400, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(200, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) return model model = model_arch #load training data imgs, labels, names = util.load_training_images('tiny-imagenet-200/train/') print("Training Images Loaded") #retrype and resize training data imgs = imgs[0:100] labels = labels[0:100] names = names[0:100] imgs_large = np.ndarray(shape=[imgs.shape[0], 299, 299, 3]) for i in range(imgs.shape[0]): imgs_large[i, :, :, :] = util.rescale(imgs[i]) imgs_large = imgs_large.astype('uint8') imgs_noisy = np.ndarray(shape=imgs_large.shape) for i in range(imgs_large.shape[0]): imgs_noisy[i, :, :, :] = util.noisy(1, imgs_large[i]) imgs_noisy = imgs_noisy.astype('uint8') sub_imgs, sub_labels = util.subsample(imgs_noisy, labels) batch_shape = [20, 299, 299, 3] num_classes = 200
def main(argv): print("Start Main") # Set arguments: Save_Dir Structure Learning_Rate Earling_Stoping Batch_Size Data_Dir data_dir = FLAGS.data_dir save_dir = FLAGS.save_dir learning_rate = FLAGS.lr early_stop = FLAGS.early_stop batch_size = FLAGS.batch_size epochs = FLAGS.epochs reg_coeff = FLAGS.reg_coeff split = FLAGS.split master = FLAGS.master checkpoint_path = FLAGS.checkpoint_path input_dir = FLAGS.input_dir output_dir = FLAGS.output_dir image_width = FLAGS.image_width image_height = FLAGS.eps num_classes = FLAGS.num_classes eps = FLAGS.eps batch_shape = [batch_size, image_height, image_width, 3] input_shape = [image_height, image_width, 3] num_ens = FLAGS.num_ens tf.logging.set_verbosity(tf.logging.INFO) def model_arch(): model = Sequential() model.add(Conv2D(50, kernel_size=(5, 5), activation='relu', input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(100, (5, 5), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(200, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(400, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(200, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) return model model = model_arch #load training data x_train,y_train,train_names = util.load_training_images('tiny-imagenet-200/train/') print("Training Images Loaded") x_test,y_test,test_names = util.load_training_images('tiny-imagenet-200/test/') print("Testing Images Loaded") #retrype and resize training data x_train = x_train[0:100] y_train = y_train[0:100] train_names = train_names[0:100] x_train_large = np.ndarray(shape= [x_train.shape[0],299,299,3]) for i in range(x_train.shape[0]): x_train_large[i,:,:,:] = util.rescale(x_train[i]) x_train_large=x_train_large.astype('uint8') x_train_noisy = np.ndarray(shape= x_train_large.shape) for i in range(x_train_large.shape[0]): x_train_noisy[i,:,:,:] = util.noisy(1,x_train_large[i]) x_train_noisy=x_train_noisy.astype('uint8') x_train_sub,y_train_sub = util.subsample(x_train_noisy,y_train) batch_shape = [20, 299, 299, 3] num_classes = 200 y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) sess = tf.Session() keras.backend.set_session(sess) x_noisy = util.add_gaussian_noise(x_train,0,64) #Add gaussian noise to all images preds_ens = np.zeros((x_test.shape[0],10)) #variable to store the predictions of each model in the ensemble (10) max_vote_ens = np.zeros(x_test.shape[0]) #variable to store Majority vote from all models in ensemble for i in range(num_ens): model = model_arch() #Build a new model architecture for every model in the ensemble x_train_sub,y_train_sub = util.subsample(x_train_noisy,y_train) #subsample from the entire data, bagging model.fit(x_train_sub, y_train_sub, batch_size=batch_size,epochs=epochs,verbose=1) #train the model model.save("models/imgnet/"+str(i)+".h5") #save the model ans = sess.run(tf.argmax(model.predict(x_test),axis=1)) #get the predictions of the model preds_ens[:,i]= ans.reshape((x_test.shape[0])) #store the predictions of this particular model(i) in ith column of pred_ens variable del model #erase the model #Now the variable pred_ens consists of the predictions of all test_data for each model in ensemble. #ith column contains predictions of ith model. #go through every row print("Ensemble method Clean") ens_acc = np.zeros(num_ens) for i in range(num_ens): for j in range(preds_ens.shape[0]): b= Counter(preds_ens[j][0:i+1]) #get the entire row which consists of predictions for that particular instance from all models. max_vote_ens[j] = b.most_common(1)[0][0] #get the maximum vote i.e which number has more frequency. ens_acc_i = sess.run(tf.reduce_mean(tf.cast(tf.equal(max_vote_ens, tf.argmax(y_test, axis=1)) , tf.float32))) ens_acc[i] = ens_acc_i #accuracy of ensemble #TODO print the nonperturbed test accuracy to the output file. print("Accuracy : " + str(np.mean(ens_acc))) #Build a model for normal training on the entire noisy data. model = model.model_arch() model.fit(x_train_noisy, y_train, batch_size=batch_size, epochs=epochs, verbose=1) acc = model.evaluate(x_test, y_test, verbose=0) acc_noisy_normal = acc[1] #accuracy of normal model on noisy train data del model #Build a new model for normal training (without ensemble) on entire train data (with out bagging and noise). model = model_arch() model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1) acc = model.evaluate(x_test, y_test, verbose=0) model.save("models/imgnet/original_model.h5") #accuracy of normal model acc_normal = acc[1] print("accuracy of normal model : " + str(acc_normal)) print("accuracy of normal model on noisy train data : " + str(acc_noisy_normal)) #generate fgsm adversarial examples on test_data adv_fgsm = util.fgsm_attack(x_test,model,sess) acc_fgsm = model.evaluate(adv_fgsm, y_test, verbose=0) acc_fgsm = acc_fgsm[1] print("accuracy of normal model on fgsm adversarial examples : " + str(acc_fgsm)) #generate bim adversarial examples on test_data adv_bim = util.bim_attack(x_test,model,sess) acc_bim = model.evaluate(adv_bim,y_test,verbose=0) acc_bim = acc_bim[1] #accuracy of normal model on bim adversarial examples print("accuracy of normal model on bim adversarial examples : " + str(acc_bim)) #generate lbfgs adversarial examples on test_data # The target is chosen as 6 adv_lbfgs = util.lbfgs_attack(x_test,model,sess,6) acc_lbfgs = model.evaluate(adv_lbfgs,y_test,verbose=0) acc_lbfgs = acc_lbfgs[1] #accuracy of normal model on lbfgs adversarial examples print("accuracy of normal model on lbfgs adversarial examples : " + str(acc_lbfgs)) preds_ens_fgsm = np.zeros((x_test.shape[0],10)) #variable to store the predictions of each model in the ensemble (10) for fgsm adversarial examples max_vote_ens_fgsm = np.zeros(x_test.shape[0]) #variable to store Majority vote from all models in ensemble for fgsm adversarial examples preds_ens_bim = np.zeros((x_test.shape[0],10)) #variable to store the predictions of each model in the ensemble (10) for bim adversarial examples max_vote_ens_bim = np.zeros(x_test.shape[0]) #variable to store Majority vote from all models in ensemble for bim adversarial examples preds_ens_lbfgs = np.zeros((x_test.shape[0],10)) #variable to store the predictions of each model in the ensemble (10) for lbfgs adversarial examples max_vote_ens_lbfgs = np.zeros(x_test.shape[0]) #variable to store Majority vote from all models in ensemble for lbfgs adversarial examples del model for i in range(num_ens): model = load_model("models/"+str(i)+".h5") #get predictions of model i for fgsm adversarial examples ans = sess.run(tf.argmax(model.predict(adv_fgsm),axis=1)) preds_ens_fgsm[:,i]= ans.reshape((adv_fgsm.shape[0])) #get predictions of model i for bim adversarial examples ans = sess.run(tf.argmax(model.predict(adv_bim),axis=1)) preds_ens_bim[:,i]= ans.reshape((adv_bim.shape[0])) #get predictions of model i for lbfgs adversarial examples ans = sess.run(tf.argmax(model.predict(adv_lbfgs),axis=1)) preds_ens_lbfgs[:,i]= ans.reshape((adv_lbfgs.shape[0])) del model print("Now the variable pred_ens consists of the predictions of all fgsm adversarial test_data for each model in ensemble.") #ith column contains predictions of ith model. #go through every row ens_acc_fgsm = np.zeros(num_ens) for i in range(num_ens): for j in range(preds_ens_fgsm.shape[0]): b= Counter(preds_ens_fgsm[j][0:i+1]) #get the entire row which consists of predictions for that particular instance from all models. max_vote_ens_fgsm[j] = b.most_common(1)[0][0] #get the maximum vote i.e which number has more frequency. #accuracy of ensemble ens_acc_fgsm_i = sess.run(tf.reduce_mean(tf.cast(tf.equal(max_vote_ens_fgsm, tf.argmax(y_test, axis=1)) , tf.float32))) ens_acc_fgsm[i] = ens_acc_fgsm_i print(str(np.mean(ens_acc_fgsm))) print("Now the variable pred_ens consists of the predictions of all bim adversarial test_data for each model in ensemble.") #ith column contains predictions of ith model. #go through every row ens_acc_bim = np.zeros(num_ens) for i in range(num_ens): for j in range(preds_ens_bim.shape[0]): b= Counter(preds_ens_bim[j][0:i+1]) max_vote_ens_bim[j] = b.most_common(1)[0][0] #accuracy of ensemble on bim_adv ens_acc_bim_i = sess.run(tf.reduce_mean(tf.cast(tf.equal(max_vote_ens_bim, tf.argmax(y_test, axis=1)) , tf.float32))) ens_acc_bim[i] = ens_acc_bim_i print(str(np.mean(ens_acc_bim))) print("Now the variable pred_ens consists of the predictions of all lbfgs adversarial test_data for each model in ensemble.") #ith column contains predictions of ith model. #go through every row ens_acc_lbfgs = np.zeros(num_ens) for i in range(num_ens): for i in range(preds_ens_lbfgs.shape[0]): b= Counter(preds_ens_lbfgs[j][0:i+1]) max_vote_ens_lbfgs[j] = b.most_common(1)[0][0] #accuracy of ensemble on lbfgs_adv ens_acc_lbfgs_i = sess.run(tf.reduce_mean(tf.cast(tf.equal(max_vote_ens_lbfgs, tf.argmax(y_test, axis=1)) , tf.float32))) ens_acc_lbfgs[i] = ens_acc_lbfgs_i print(str(np.mean(ens_acc_lbfgs)))
labels = util.make_justpot(labels) if args.pot_part: partitions = [] no_pot_imgs = util.filter_no_pot(labels, image_paths) for color, subjs in util.pot_map.items(): subj_imgs = [] subj_imgs.extend([ x for x in image_paths if os.path.basename(x).startswith(tuple(subjs)) ]) imgs = list(set(no_pot_imgs + subj_imgs)) if args.make_uniform: imgs = util.make_uniform(imgs, labels) train, test = util.subsample(imgs, percent=args.samp_percent) partitions.append((color, train, test)) for p in partitions: labels = util.read_labels(os.path.join(annot_input_dir, "labels")) gen_and_write(output_dir, exp_num, p[1], labels, mode=f"{p[0]}_train", args=args) gen_and_write(output_dir, exp_num, p[2],
def main(argv): print("Start Main") # Set arguments: Save_Dir Structure Learning_Rate Earling_Stoping Batch_Size Data_Dir data_dir = FLAGS.data_dir save_dir = FLAGS.save_dir learning_rate = FLAGS.lr early_stop = FLAGS.early_stop batch_size = FLAGS.batch_size epochs = FLAGS.epochs reg_coeff = FLAGS.reg_coeff split = FLAGS.split master = FLAGS.master checkpoint_path = FLAGS.checkpoint_path input_dir = FLAGS.input_dir output_dir = FLAGS.output_dir image_width = FLAGS.image_width image_height = FLAGS.eps num_classes = FLAGS.num_classes eps = FLAGS.eps batch_shape = [batch_size, image_height, image_width, 3] input_shape = [image_height, image_width, 3] num_ens = FLAGS.num_ens tf.logging.set_verbosity(tf.logging.INFO) def model_arch(): model = Sequential() model.add( Conv2D(50, kernel_size=(5, 5), activation='relu', input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(100, (5, 5), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(200, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(400, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(200, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) return model model = model_arch #load training data x_train, y_train, train_names = util.load_training_images( 'tiny-imagenet-200/train/') print("Training Images Loaded") x_test, y_test, test_names = util.load_training_images( 'tiny-imagenet-200/test/') print("Testing Images Loaded") #retrype and resize training data x_train = x_train[0:100] y_train = y_train[0:100] train_names = train_names[0:100] x_train_large = np.ndarray(shape=[x_train.shape[0], 299, 299, 3]) for i in range(x_train.shape[0]): x_train_large[i, :, :, :] = util.rescale(x_train[i]) x_train_large = x_train_large.astype('uint8') x_train_noisy = np.ndarray(shape=x_train_large.shape) for i in range(x_train_large.shape[0]): x_train_noisy[i, :, :, :] = util.noisy(1, x_train_large[i]) x_train_noisy = x_train_noisy.astype('uint8') x_train_sub, y_train_sub = util.subsample(x_train_noisy, y_train) batch_shape = [20, 299, 299, 3] num_classes = 200 y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) sess = tf.Session() keras.backend.set_session(sess) #-----------------------------------Adversarial Training-------------------------------------------------------------- #first adversarial examples are generated using train_data, then the model is trained on train_data+adv_train_data. #Then the model is tested on normal test_data, then the model is tested on adversarial_test_data. #So, we are generating the adversarial examples twice both on train and test data. model = load_model("models/imgnet/original_model.h5") wrap = KerasModelWrapper(model) #generate adversarial examples on train data. adv_fgsm_train = util.fgsm_attack(x_train, model, sess) adv_bim_train = util.bim_attack(x_train, model, sess) adv_lbfgs_train = util.lbfgs_attack(x_train, model, sess, 6) train_plus_adv_fgsm = np.concatenate([x_train, adv_fgsm_train]) y_train_plus_adv_fgsm = np.concatenate([y_train, y_train]) train_plus_adv_bim = np.concatenate([x_train, adv_bim_train]) y_train_plus_adv_bim = np.concatenate([y_train, y_train]) train_plus_adv_lbfgs = np.concatenate([x_train, adv_lbfgs_train]) y_train_plus_adv_lbfgs = np.concatenate([y_train, y_train]) del model print("FGSM TRAINING") #build a fresh model for fgsm training model = model_arch() wrap = KerasModelWrapper(model) model.fit(train_plus_adv_fgsm, y_train_plus_adv_fgsm, batch_size=batch_size, epochs=epochs, verbose=1) model.save("models/imgnet/fgsm_model.h5") fgsm_acc_train = model.evaluate(x_test, y_test, verbose=0) fgsm_acc_train[ 1] #Accuracy of adversarially trained model on clean examples #generate adversarial examples for adversarially trained model on test_data adv_fgsm_test = util.fgsm_attack(x_test, model, sess) fgsm_adv_acc_train = model.evaluate(adv_fgsm_test, y_test, verbose=0) fgsm_adv_acc_train[ 1] #Accuracy of adversarially trained model on adv_test images del model print("BIM TRAINING") #BIM TRAINING #build a fresh model for bim training model = model_arch() wrap = KerasModelWrapper(model) model.fit(train_plus_adv_bim, y_train_plus_adv_bim, batch_size=batch_size, epochs=epochs, verbose=1) bim_acc_train = model.evaluate(x_test, y_test, verbose=0) print("Accuracy of adversarially trained model on clean examples\n" + str(bim_acc_train[1])) #generate adversarial examples for adversarially trained model on test_data adv_bim_test = util.bim_attack(x_test, model, sess) bim_adv_acc_train = model.evaluate(adv_bim_test, y_test, verbose=0) print("Accuracy of adversarially trained model on adv_test images\n" + str(bim_adv_acc_train[1])) del model print("LBFGS TRAINING") #build a fresh model for lbfgs training model = model_arch() wrap = KerasModelWrapper(model) model.fit(train_plus_adv_lbfgs, y_train_plus_adv_lbfgs, batch_size=batch_size, epochs=epochs, verbose=1) print("Accuracy of adversarially trained model on clean examples") lbfgs_acc_train = model.evaluate(x_test, y_test, verbose=0) print(str(lbfgs_acc_train[1])) print("Accuracy of adversarially trained model on lbfgs examples") lbfgs_acc_train[1] adv_lbfgs_test = util.lbfgs_attack(x_test, model, sess, 6) lbfgs_adv_acc_train = model.evaluate(adv_lbfgs_test, y_test, verbose=0) print(str(lbfgs_adv_acc_train[1]) ) #Accuracy of adversarially trained model on adv_test images del model