def load_app_globals(): app.graph = tf.get_default_graph() app.sess = tf.Session(graph=app.graph) # Load Facenet model for whitebox with app.sess.as_default(): model_exp = os.path.expanduser(FACENET_WEIGHTS_PATH) # print(f'Model filename: {model_exp}') with gfile.FastGFile(model_exp, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name='facenet') set_session(app.sess) # Load MTCNN model app.mtcnn = MTCNN() app.substitute_model = load_model( 'resnet50', weights_path=params.RESNET50_WEIGHTS_PATH) atexit.register(webservice_cleanup, app.sess)
def evaluate_attack(sub_architecture, sub_weights, sub_classes, sub_label, eval_dataset, blackbox_architecture, batch_size, step_size, max_iter, normalize_images): global DESTINATION_PATH DESTINATION_PATH = f'{DESTINATION_PATH}-{sub_label}' if blackbox_architecture is None: if sub_architecture == 'squeezenet': raise ValueError( 'Blackbox architecture must be specified for squeezenet substitute' ) blackbox_architecture = sub_architecture datagen = tf.keras.preprocessing.image.ImageDataGenerator() val_it = datagen.flow_from_directory(eval_dataset, shuffle=False, class_mode='sparse', batch_size=batch_size, target_size=(224, 224)) nbatches = val_it.n // batch_size if nbatches * batch_size < val_it.n: nbatches += 1 class_map = { idx: int(name[1:]) for name, idx in val_it.class_indices.items() } vectorized_get = np.vectorize(class_map.get) lpips = PerceptualLoss(model='net-lin', net='alex', use_gpu=torch.cuda.is_available()) def gen(): while True: x_val, y_unmapped = val_it.next() y_mapped = vectorized_get(y_unmapped) idx = (val_it.batch_index - 1) * (val_it.batch_size) batch_filenames = [ os.path.basename(filename) for filename in val_it.filenames[idx:( idx + val_it.batch_size if idx > 0 else None)] ] yield x_val.astype(np.float32) / (255.0 if normalize_images else 1.0), \ y_mapped.astype(np.int), batch_filenames train_ds = gen() attack_bounds = (0., 255. / (255. if normalize_images else 1.)) substitute = load_model(sub_architecture, num_classes=sub_classes, trained=True, weights_path=sub_weights) blackbox = load_model('blackbox', architecture=blackbox_architecture) with sess: batch_ph = tf.placeholder(tf.float32, shape=[None, 224, 224, 3], name="batch_in") label_ph = tf.placeholder(tf.int32, shape=[None], name="pred_in") print(f"Evaluating using step size {step_size:.3f}") stat_rows = [] stat_index = [] adv_im_batch = fgsm(substitute, batch_ph, label_ph, step_size * (attack_bounds[1] - attack_bounds[0])) for step_num, (batch, labels, filenames) in enumerate(train_ds): if step_num >= nbatches: break print(f"Evaluating batch #{step_num + 1}/{nbatches}") batch_rows, batch_index = evaluate_fgsm( substitute, blackbox, batch, labels, filenames, batch_ph, label_ph, adv_im_batch, lpips, preprocess_func=standardize_batch, eps=step_size, num_iter=max_iter, attack_bounds=attack_bounds) stat_rows += batch_rows stat_index += batch_index df = pd.DataFrame(stat_rows, columns=[ 'iter_num', 'bb_conf', 'bb_pred', 'bb_is_same', 'sub_conf', 'sub_pred', 'sub_is_same', 'bb_init_pred', 'bb_init_conf', 'lpips_dist', 'l1_dist', 'l2_dist', 'file_name', ], index=stat_index) res_file_name = f'eps_{step_size:.3f}_res.csv' df.to_csv(os.path.join(DESTINATION_PATH, res_file_name)) print(f"Created result file {res_file_name}") analyze_statistics(df, DESTINATION_PATH, sub_label)
def train(oracle, substitute_type, nepochs_substitute, nepochs_training, batch_size): assert nepochs_substitute > 0 and nepochs_training > 0 train_set_dir = params.TRAIN_SET validation_dir = params.VALIDATION_SET train_dir = params.TRAIN_SET_WORKING print("1) Preprocess dataset - acquire oracle predictions and prune") # oracle_classify_and_save(oracle, train_set_dir, train_dir, batch_size, prune_threshold=10) model = None train_losses = [] train_accuracies = [] val_losses = [] val_accuracies = [] for epoch_sub in range(1, nepochs_substitute + 1): print( f"2) Training substitute model #{epoch_sub}/{nepochs_substitute}") train_ds, nsteps_train, num_classes, class_indices = get_train_set( train_dir, batch_size) model = models.load_model(model_type=substitute_type, trained=False, num_classes=num_classes) for epoch in range(nepochs_training): print(f"2.1) Training epoch #{epoch + 1}") epoch_start_time = perf_counter() epoch_loss = 0. epoch_acc = 0. step_train = 0 for im_batch, label_batch in train_ds: if step_train >= nsteps_train: break [loss, acc] = model.train_on_batch(im_batch, label_batch) epoch_loss += loss epoch_acc += acc step_train += 1 print( f"Step {step_train}/{nsteps_train} ({100 * (step_train) / nsteps_train:.2f}%) " f"- Loss={loss}, accuracy={acc}; ", end="") time_now = perf_counter() time_elapsed = time_now - epoch_start_time time_per_step = time_elapsed / step_train steps_remaining = nsteps_train - step_train time_remaining = steps_remaining * time_per_step print( f"Est. time remaining for epoch: {timedelta(seconds=time_remaining)}" ) epoch_loss /= nsteps_train epoch_acc /= nsteps_train train_losses.append(epoch_loss) train_accuracies.append(epoch_acc) print( f"Average training loss: {epoch_loss} ; Average accuracy: {epoch_acc}" ) # TODO Validation dir is assumed to be mapped using oracle predictions print(f"2.2) Validation epoch #{epoch + 1}") validation_ds, nsteps_val = get_validation_set( validation_dir, class_indices, batch_size) step_val = 0 validation_loss = 0. validation_acc = 0. for im_batch, label_batch in validation_ds: if step_val >= nsteps_val: break [loss, acc] = model.test_on_batch(im_batch, label_batch) validation_loss += loss validation_acc += acc step_val += 1 print( f"Step {step_val}/{nsteps_val} ({100 * step_val / nsteps_val:.2f}%) " f"- Loss={loss}, accuracy={acc}") validation_loss /= nsteps_val validation_acc /= nsteps_val val_losses.append(validation_loss) val_accuracies.append(validation_acc) print( f"Validation loss for epoch: {validation_loss} ; Validation accuracy: {validation_acc}" ) print("2.2) Save checkpoint") models.save_model(model, substitute_type, override=True) if epoch_sub < nepochs_substitute: print("3) Augment dataset") augmented_images_dir = augment_dataset(model, train_dir, params.LAMBDA) print("4) Acquire oracle predictions for new samples") oracle_classify_and_save(oracle, augmented_images_dir, train_dir, batch_size) print( f"Number of output classes in model #{nepochs_substitute}: {num_classes}" ) models.save_model(model, substitute_type, override=False) print("=" * 50) print("\n\nFinished training, generating graphs") graphs_path = os.path.join(params.PROJECT_DIR, "outputs", "graphs_resnet50", "accuracies", datetime.now().strftime("%Y%m%d%H%M%S%f")) os.makedirs(graphs_path) best_train_accuracy_idx = int(np.argmax(train_accuracies)) best_val_accuracy_idx = int(np.argmax(val_accuracies)) print( f"Epoch with best training accuracy: epoch #{best_train_accuracy_idx + 1}, value = {train_accuracies[best_train_accuracy_idx]:.3f}" ) print( f"Epoch with best validation accuracy: epoch #{best_val_accuracy_idx + 1}, value = {val_accuracies[best_val_accuracy_idx]:.3f}" ) plt.figure() plt.title(f"Training loss over {nepochs_training} epochs") plt.xlabel("# Epochs") plt.ylabel("Crossentropy Loss") plt.scatter( np.arange(1, nepochs_training + 1).astype(np.int), train_losses) plt.savefig(os.path.join(graphs_path, "train_loss.jpg")) plt.figure() plt.xlabel("# Epochs") plt.ylabel("Accuracy") plt.title(f"Training accuracy over {nepochs_training} epochs") plt.scatter( np.arange(1, nepochs_training + 1).astype(np.int), train_accuracies) plt.savefig(os.path.join(graphs_path, "train_accuracy.jpg")) plt.figure() plt.xlabel("# Epochs") plt.ylabel("Crossentropy Loss") plt.title(f"Validation loss over {nepochs_training} epochs") plt.scatter(np.arange(1, nepochs_training + 1).astype(np.int), val_losses) plt.savefig(os.path.join(graphs_path, "val_loss.jpg")) plt.figure() plt.xlabel("# Epochs") plt.ylabel("Accuracy") plt.title(f"Validation accuracy over {nepochs_training} epochs") plt.scatter( np.arange(1, nepochs_training + 1).astype(np.int), val_accuracies) plt.savefig(os.path.join(graphs_path, "val_accuracy.jpg")) return model
import os import attacks.blackbox.params as params from attacks.blackbox import models from attacks.blackbox.utilities import oracle_classify_and_save import sys os.umask(2) if __name__ == '__main__': threshold = 0.5 if len(sys.argv) <= 1 else float(sys.argv[1]) dst = params.TRAIN_SET_WORKING if len(sys.argv) <= 3 else sys.argv[3] if len(sys.argv) > 2: src = sys.argv[2] else: augmented_images_dir = params.AUGMENTATION_BASE_PATH dirs = os.listdir(augmented_images_dir) dirs_sorted = sorted(dirs, reverse=True) latest_dir = dirs_sorted[0] src = os.path.join(augmented_images_dir, latest_dir) os.makedirs(dst, exist_ok=True) oracle = models.load_model(model_type='blackbox', architecture='resnet50') print(f"Predicting dataset {src} and sorting into directory {dst}") oracle_classify_and_save(oracle, src, dst, params.BATCH_SIZE, prune_threshold=threshold, min_num_samples=2)
label_pred = pred[0, label_orig] print("b) calculating Jacobian") # Shape size is (batch_size, num_classes, 224, 224, 3) jacobians = tf.gradients(label_pred, batch_tensor)[0] # Shape size is (batch_size, 224, 224, 3) print("c) Augmenting") diff = tf.scalar_mul(scale, tf.sign(jacobians, name="SignJacobians"), name="ScaleJacobians") return diff print(f"Successfully loaded module {__file__}") if __name__ == '__main__': tf.keras.backend.set_session(sess) datagen = tf.keras.preprocessing.image.ImageDataGenerator() input_dir_it = datagen.flow_from_directory(params.TRAIN_SET_WORKING, class_mode='sparse', batch_size=4, shuffle=True, target_size=(224, 224)) print(f"Augmenting images found in {input_dir_it.directory}") model = load_model('resnet50', trained=True, num_classes=input_dir_it.num_classes) augment_dataset(model, params.TRAIN_SET_WORKING, params.LAMBDA) sess.close()