def compute_benign_raw_edge_influences(class_name): input_filepath = \ DataPaths.get_benign_images_datapath(class_name) output_filepath = \ DataPaths.get_benign_raw_edge_influences_datapath(class_name) compute_raw_edge_influences(input_filepath, output_filepath)
def compute_attacked_influence_matrix(original_class, target_class, attack_name, attack_strength): input_filepath = \ DataPaths.get_attacked_raw_edge_influences_datapath( original_class, target_class, attack_name, attack_strength) output_filepath = \ DataPaths.get_attacked_influence_matrix_datapath( original_class, target_class, attack_name, attack_strength) compute_influence_matrix(input_filepath, output_filepath)
def save_benign_images(class_name: str): output_filepath = DataPaths.get_benign_images_datapath(class_name) if output_filepath.exists(): return print('Saving images for class %s to %s' % (class_name, output_filepath)) dataset = hdf5utils.create_image_dataset(h5py.File(output_filepath, 'w'), dataset_name='images') tfrecords_filepaths = DataPaths.imagenet_tfrecords_filepaths labels_by_name = load_labels_by_name() tf_dataset = tfutils.make_dataset( tfrecords_filepaths, preprocessing_fn=preprocess_input, filter_label=labels_by_name[class_name].tfrecord_label) iterator = tf_dataset.make_one_shot_iterator() x, _ = iterator.get_next() with tf.Session() as sess: pbar = tqdm(unit='imgs') try: while True: img = sess.run(x) hdf5utils.add_image_to_dataset(img, dataset) pbar.update() except tf.errors.OutOfRangeError: pass print(dataset)
def save_attacked_activations(original_class, target_class, attack_name, attack_strength): save_activation_scores( str( DataPaths.get_attacked_images_datapath(original_class, target_class, attack_name, attack_strength)))
def preprocess_scores(original_class, target_class, attack_name='pgd'): keep_top_n = 100 attack_strengths = list(ATTACK_STRENGTHS) neuron_scores_data_path = DataPaths.get_neuron_data_datapath( original_class, target_class, attack_name) assert os.path.exists(neuron_scores_data_path) vulnerabilities_data_path = DataPaths.get_neuron_vulnerabilities_datapath( original_class, target_class, attack_name) assert not os.path.exists(vulnerabilities_data_path) top_neurons_data_path = DataPaths.get_top_neurons_datapath( original_class, target_class, attack_name) assert not os.path.exists(top_neurons_data_path) neuron_scores_data = json.load(open(neuron_scores_data_path, 'r')) layers = list(neuron_scores_data.keys()) top_neurons = dict() vulnerabilities = dict() for layer in layers: neurons_set = set() top_neurons[layer] = dict() vulnerabilities[layer] = dict() top_neurons[layer]['original'] = list( map( lambda x: x[0], sorted(neuron_scores_data[layer].items(), key=lambda x: x[1]['original'][ 'median_activation_percentile'], reverse=True)[:keep_top_n])) neurons_set.update(top_neurons[layer]['original']) top_neurons[layer]['target'] = list( map( lambda x: x[0], sorted(neuron_scores_data[layer].items(), key=lambda x: x[1]['target'][ 'median_activation_percentile'], reverse=True)[:keep_top_n])) neurons_set.update(top_neurons[layer]['target']) for strength in attack_strengths: key = 'attacked-%s-%0.02f' % (attack_name, strength) top_neurons[layer][key] = list( map( lambda x: x[0], sorted(neuron_scores_data[layer].items(), key=lambda x: x[1][key][ 'median_activation_percentile'], reverse=True)[:keep_top_n])) neurons_set.update(top_neurons[layer][key]) for neuron in neurons_set: vulnerabilities[layer][neuron] = calculate_neuron_vulnerabilities( neuron_scores_data[layer][neuron], attack_name, attack_strengths) with open(vulnerabilities_data_path, 'w') as f: json.dump(vulnerabilities, f, indent=2) with open(top_neurons_data_path, 'w') as f: json.dump(top_neurons, f, indent=2)
def save_pgd_attacked_images(original_class, target_class, attack_strength, nb_iter=50, seed=1000): random.seed(seed) np.random.seed(seed) tf.set_random_seed(seed) eps = attack_strength labels_by_name = load_labels_by_name() target_label = labels_by_name[target_class].lucid_label benign_dataset_path = DataPaths.get_benign_images_datapath(original_class) assert benign_dataset_path.exists() attacked_dataset_path = DataPaths.get_attacked_images_datapath( original_class, target_class, attack_name='pgd', attack_strength=eps) assert not attacked_dataset_path.exists() print('Saving attacked images to %s' % attacked_dataset_path) img_dataset = hdf5utils.load_image_dataset_from_file(benign_dataset_path) output_file = h5py.File(attacked_dataset_path, 'w') out_dataset = hdf5utils.create_image_dataset(output_file, dataset_name='images') indices_dataset = hdf5utils.create_dataset(output_file, data_shape=(1, ), dataset_name='indices') graph = tf.Graph() with graph.as_default(): model = InceptionV1Model() x = model.default_input_placeholder y_pred = model.get_predicted_class(x) with tf.Session(graph=graph) as sess: attack = ProjectedGradientDescent(model, sess=sess) target_one_hot_encoded = get_one_hot_encoded_targets(target_label) x_adv = attack.generate(x, eps=eps, nb_iter=nb_iter, clip_min=-1, clip_max=1, eps_iter=(eps / 5), ord=2, y_target=target_one_hot_encoded) num_attack_success = 0 pbar = tqdm(unit='imgs', total=len(img_dataset)) try: for i, img in enumerate(img_dataset): ben_img = np.array(img) adv_img = sess.run(x_adv, feed_dict={x: [ben_img]}) attack_pred = sess.run(y_pred, feed_dict={x: adv_img}) adv_img = adv_img[0] attack_pred = attack_pred[0] assert not np.any(np.isnan(adv_img)) assert not np.isnan(attack_pred) if attack_pred == target_label: index = np.array([i]) num_attack_success += 1 hdf5utils.add_image_to_dataset(adv_img, out_dataset) hdf5utils.add_item_to_dataset(index, indices_dataset) pbar.set_postfix(num_attack_success=num_attack_success) pbar.update() except tf.errors.OutOfRangeError: pass
def save_neuron_importances_to_db(original_class: str, target_class: str, attack_name: str): def _calculate_importances_from_scores(scores): num_images, num_neurons = scores.shape median_activations = np.median(scores, axis=0) median_activation_percentiles = \ rankdata(median_activations) / num_neurons return median_activations, median_activation_percentiles attack_strengths = list(ATTACK_STRENGTHS) original_activation_scores_filepath = \ DataPaths.get_benign_activations_datapath(original_class) target_activation_scores_filepath = \ DataPaths.get_benign_activations_datapath(target_class) data = dict() model_klass = InceptionV1Model # Save original neuron importances original_activation_scores = \ hdf5utils.load_activation_scores_datasets_from_file( original_activation_scores_filepath, model_klass.LAYERS) for layer in model_klass.LAYERS: data[layer] = dict() median_activations, median_activation_percentiles = \ _calculate_importances_from_scores( original_activation_scores[layer]) for i in range(model_klass.LAYER_SIZES[layer]): neuron = '%s-%d' % (layer, i) data[layer][neuron] = dict() data[layer][neuron]['original'] = { 'median_activation': float(median_activations[i]), 'median_activation_percentile': \ float(median_activation_percentiles[i])} # Save target neuron importances target_activation_scores = \ hdf5utils.load_activation_scores_datasets_from_file( target_activation_scores_filepath, model_klass.LAYERS) for layer in model_klass.LAYERS: median_activations, median_activation_percentiles = \ _calculate_importances_from_scores( target_activation_scores[layer]) for i in range(model_klass.LAYER_SIZES[layer]): neuron = '%s-%d' % (layer, i) data[layer][neuron]['target'] = { 'median_activation': float(median_activations[i]), 'median_activation_percentile': \ float(median_activation_percentiles[i])} # Save attacked neuron importances for eps in attack_strengths: attacked_activation_scores_filepath = \ DataPaths.get_attacked_activations_datapath( original_class, target_class, attack_name, attack_strength=eps) attacked_activation_scores = \ hdf5utils.load_activation_scores_datasets_from_file( attacked_activation_scores_filepath, model_klass.LAYERS) for layer in model_klass.LAYERS: median_activations, median_activation_percentiles = \ _calculate_importances_from_scores( attacked_activation_scores[layer]) for i in range(model_klass.LAYER_SIZES[layer]): neuron = '%s-%d' % (layer, i) key = 'attacked-%s-%0.02f' % (attack_name, eps) data[layer][neuron][key] = { 'median_activation': float(median_activations[i]), 'median_activation_percentile': \ float(median_activation_percentiles[i])} neuron_importances_filepath = DataPaths.get_neuron_data_datapath( original_class, target_class, attack_name) with open(neuron_importances_filepath, 'w') as f: json.dump(data, f, indent=2)
def save_benign_activations(class_name): save_activation_scores( str(DataPaths.get_benign_images_datapath(class_name)))