def run_membership_probability_analysis( attack_input: AttackInputData, slicing_spec: SlicingSpec = None) -> MembershipProbabilityResults: """Perform membership probability analysis on all given slice types. Args: attack_input: input data for compute membership probabilities slicing_spec: specifies attack_input slices Returns: the membership probability results. """ attack_input.validate() membership_prob_results = [] if slicing_spec is None: slicing_spec = SlicingSpec(entire_dataset=True) num_classes = None if slicing_spec.by_class: num_classes = attack_input.num_classes input_slice_specs = get_single_slice_specs(slicing_spec, num_classes) for single_slice_spec in input_slice_specs: attack_input_slice = get_slice(attack_input, single_slice_spec) membership_prob_results.append( _compute_membership_probability(attack_input_slice)) return MembershipProbabilityResults( membership_prob_results=membership_prob_results)
def test_slice_by_correcness(self): input_data = SlicingSpec(entire_dataset=False, by_classification_correctness=True) expected = SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, True) output = get_single_slice_specs(input_data) self.assertLen(output, 2) self.assertTrue(_are_all_fields_equal(output[0], expected))
def main(unused_argv): # Load training and test data. x_train, y_train, x_test, y_test = load_cifar10() # Get model, optimizer and specify loss. model = small_cnn() optimizer = tf.keras.optimizers.SGD(lr=FLAGS.learning_rate, momentum=0.9) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) # Get callback for membership inference attack. mia_callback = MembershipInferenceCallback( (x_train, y_train), (x_test, y_test), slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), attack_types=[ AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS ], tensorboard_dir=FLAGS.model_dir, tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers, is_logit=True, batch_size=2048) # Train model with Keras model.fit( x_train, y_train, epochs=FLAGS.epochs, validation_data=(x_test, y_test), batch_size=FLAGS.batch_size, callbacks=[mia_callback], verbose=2) print('End of training attack:') attack_results = run_attack_on_keras_model( model, (x_train, y_train), (x_test, y_test), slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), attack_types=[ AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS ], is_logit=True, batch_size=2048) att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( attack_results) print('\n'.join([ ' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values) ]))
def test_run_attack_data_size(self): result = mia.run_attacks( get_test_input(100, 80), SlicingSpec(by_class=True), (AttackType.THRESHOLD_ATTACK,)) self.assertEqual(result.single_attack_results[0].data_size, DataSize(ntrain=100, ntest=80)) self.assertEqual(result.single_attack_results[3].data_size, DataSize(ntrain=20, ntest=16))
def test_run_attack_by_slice(self): result = mia.run_attacks( get_test_input(100, 100), SlicingSpec(by_class=True), (AttackType.THRESHOLD_ATTACK,)) self.assertLen(result.single_attack_results, 6) expected_slice = SingleSliceSpec(SlicingFeature.CLASS, 2) self.assertEqual(result.single_attack_results[3].slice_spec, expected_slice)
def test_slice_by_percentiles(self): input_data = SlicingSpec(entire_dataset=False, by_percentiles=True) expected0 = SingleSliceSpec(SlicingFeature.PERCENTILE, (0, 10)) expected5 = SingleSliceSpec(SlicingFeature.PERCENTILE, (50, 60)) output = get_single_slice_specs(input_data) self.assertLen(output, 10) self.assertTrue(_are_all_fields_equal(output[0], expected0)) self.assertTrue(_are_all_fields_equal(output[5], expected5))
def test_slice_by_classes(self): input_data = SlicingSpec(by_class=True) n_classes = 5 expected = [self.ENTIRE_DATASET_SLICE] + [ SingleSliceSpec(SlicingFeature.CLASS, c) for c in range(n_classes) ] output = get_single_slice_specs(input_data, n_classes) self.assertTrue(_are_lists_equal(output, expected))
def test_slicing_by_multiple_features(self): input_data = SlicingSpec(entire_dataset=True, by_class=True, by_percentiles=True, by_classification_correctness=True) n_classes = 10 expected_slices = n_classes expected_slices += 1 # entire dataset slice expected_slices += 10 # percentiles slices expected_slices += 2 # correcness classification slices output = get_single_slice_specs(input_data, n_classes) self.assertLen(output, expected_slices)
def run_attacks(attack_input: AttackInputData, slicing_spec: SlicingSpec = None, attack_types: Iterable[AttackType] = ( AttackType.THRESHOLD_ATTACK, ), privacy_report_metadata: PrivacyReportMetadata = None, balance_attacker_training: bool = True, min_num_samples: int = 1) -> AttackResults: """Runs membership inference attacks on a classification model. It runs attacks specified by attack_types on each attack_input slice which is specified by slicing_spec. Args: attack_input: input data for running an attack slicing_spec: specifies attack_input slices to run attack on attack_types: attacks to run privacy_report_metadata: the metadata of the model under attack. balance_attacker_training: Whether the training and test sets for the membership inference attacker should have a balanced (roughly equal) number of samples from the training and test sets used to develop the model under attack. min_num_samples: minimum number of examples in either training or test data. Returns: the attack result. """ attack_input.validate() attack_results = [] if slicing_spec is None: slicing_spec = SlicingSpec(entire_dataset=True) num_classes = None if slicing_spec.by_class: num_classes = attack_input.num_classes input_slice_specs = get_single_slice_specs(slicing_spec, num_classes) for single_slice_spec in input_slice_specs: attack_input_slice = get_slice(attack_input, single_slice_spec) for attack_type in attack_types: attack_result = _run_attack(attack_input_slice, attack_type, balance_attacker_training, min_num_samples) if attack_result is not None: attack_results.append(attack_result) privacy_report_metadata = _compute_missing_privacy_report_metadata( privacy_report_metadata, attack_input) return AttackResults(single_attack_results=attack_results, privacy_report_metadata=privacy_report_metadata)
def test_trained_attacks_logits_only_size(self): result = mia.run_attacks( get_test_input_logits_only(100, 100), SlicingSpec(), (AttackType.LOGISTIC_REGRESSION,)) self.assertLen(result.single_attack_results, 1)
def test_run_attacks_size(self): result = mia.run_attacks( get_test_input(100, 100), SlicingSpec(), (AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION)) self.assertLen(result.single_attack_results, 2)
def main(unused_argv): logger = tf.get_logger() logger.set_level(logging.ERROR) # Load training and test data. x_train, y_train, x_test, y_test = load_cifar10() # Instantiate the tf.Estimator. classifier = tf_estimator.Estimator(model_fn=small_cnn_fn, model_dir=FLAGS.model_dir) # A function to construct input_fn given (data, label), to be used by the # membership inference training hook. def input_fn_constructor(x, y): return tf_compat_v1_estimator.inputs.numpy_input_fn(x={'x': x}, y=y, shuffle=False) # Get hook for membership inference attack. mia_hook = MembershipInferenceTrainingHook( classifier, (x_train, y_train), (x_test, y_test), input_fn_constructor, slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), attack_types=[ AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS ], tensorboard_dir=FLAGS.model_dir, tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers) # Create tf.Estimator input functions for the training and test data. train_input_fn = tf_compat_v1_estimator.inputs.numpy_input_fn( x={'x': x_train}, y=y_train, batch_size=FLAGS.batch_size, num_epochs=FLAGS.epochs, shuffle=True) eval_input_fn = tf_compat_v1_estimator.inputs.numpy_input_fn( x={'x': x_test}, y=y_test, num_epochs=1, shuffle=False) # Training loop. steps_per_epoch = 50000 // FLAGS.batch_size for epoch in range(1, FLAGS.epochs + 1): # Train the model, with the membership inference hook. classifier.train(input_fn=train_input_fn, steps=steps_per_epoch, hooks=[mia_hook]) # Evaluate the model and print results eval_results = classifier.evaluate(input_fn=eval_input_fn) test_accuracy = eval_results['accuracy'] print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy)) print('End of training attack') attack_results = run_attack_on_tf_estimator_model( classifier, (x_train, y_train), (x_test, y_test), input_fn_constructor, slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), attack_types=[ AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS ]) att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( attack_results) print('\n'.join([ ' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values) ]))
def test_entire_dataset(self): input_data = SlicingSpec() expected = [self.ENTIRE_DATASET_SLICE] output = get_single_slice_specs(input_data) self.assertTrue(_are_lists_equal(output, expected))
def test_no_slices(self): input_data = SlicingSpec(entire_dataset=False) expected = [] output = get_single_slice_specs(input_data) self.assertTrue(_are_lists_equal(output, expected))