def run_membership_probability_analysis(
        attack_input: AttackInputData,
        slicing_spec: SlicingSpec = None) -> MembershipProbabilityResults:
    """Perform membership probability analysis on all given slice types.

  Args:
    attack_input: input data for compute membership probabilities
    slicing_spec: specifies attack_input slices

  Returns:
    the membership probability results.
  """
    attack_input.validate()
    membership_prob_results = []

    if slicing_spec is None:
        slicing_spec = SlicingSpec(entire_dataset=True)
    num_classes = None
    if slicing_spec.by_class:
        num_classes = attack_input.num_classes
    input_slice_specs = get_single_slice_specs(slicing_spec, num_classes)
    for single_slice_spec in input_slice_specs:
        attack_input_slice = get_slice(attack_input, single_slice_spec)
        membership_prob_results.append(
            _compute_membership_probability(attack_input_slice))

    return MembershipProbabilityResults(
        membership_prob_results=membership_prob_results)
예제 #2
0
 def test_slice_by_correcness(self):
     input_data = SlicingSpec(entire_dataset=False,
                              by_classification_correctness=True)
     expected = SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, True)
     output = get_single_slice_specs(input_data)
     self.assertLen(output, 2)
     self.assertTrue(_are_all_fields_equal(output[0], expected))
예제 #3
0
def main(unused_argv):
  # Load training and test data.
  x_train, y_train, x_test, y_test = load_cifar10()

  # Get model, optimizer and specify loss.
  model = small_cnn()
  optimizer = tf.keras.optimizers.SGD(lr=FLAGS.learning_rate, momentum=0.9)
  loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
  model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

  # Get callback for membership inference attack.
  mia_callback = MembershipInferenceCallback(
      (x_train, y_train), (x_test, y_test),
      slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
      attack_types=[
          AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
      ],
      tensorboard_dir=FLAGS.model_dir,
      tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers,
      is_logit=True,
      batch_size=2048)

  # Train model with Keras
  model.fit(
      x_train,
      y_train,
      epochs=FLAGS.epochs,
      validation_data=(x_test, y_test),
      batch_size=FLAGS.batch_size,
      callbacks=[mia_callback],
      verbose=2)

  print('End of training attack:')
  attack_results = run_attack_on_keras_model(
      model, (x_train, y_train), (x_test, y_test),
      slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
      attack_types=[
          AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
      ],
      is_logit=True,
      batch_size=2048)
  att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
      attack_results)
  print('\n'.join([
      '  %s: %.4f' % (', '.join([s, t, m]), v)
      for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
  ]))
예제 #4
0
 def test_run_attack_data_size(self):
   result = mia.run_attacks(
       get_test_input(100, 80), SlicingSpec(by_class=True),
       (AttackType.THRESHOLD_ATTACK,))
   self.assertEqual(result.single_attack_results[0].data_size,
                    DataSize(ntrain=100, ntest=80))
   self.assertEqual(result.single_attack_results[3].data_size,
                    DataSize(ntrain=20, ntest=16))
예제 #5
0
  def test_run_attack_by_slice(self):
    result = mia.run_attacks(
        get_test_input(100, 100), SlicingSpec(by_class=True),
        (AttackType.THRESHOLD_ATTACK,))

    self.assertLen(result.single_attack_results, 6)
    expected_slice = SingleSliceSpec(SlicingFeature.CLASS, 2)
    self.assertEqual(result.single_attack_results[3].slice_spec, expected_slice)
예제 #6
0
 def test_slice_by_percentiles(self):
     input_data = SlicingSpec(entire_dataset=False, by_percentiles=True)
     expected0 = SingleSliceSpec(SlicingFeature.PERCENTILE, (0, 10))
     expected5 = SingleSliceSpec(SlicingFeature.PERCENTILE, (50, 60))
     output = get_single_slice_specs(input_data)
     self.assertLen(output, 10)
     self.assertTrue(_are_all_fields_equal(output[0], expected0))
     self.assertTrue(_are_all_fields_equal(output[5], expected5))
예제 #7
0
 def test_slice_by_classes(self):
     input_data = SlicingSpec(by_class=True)
     n_classes = 5
     expected = [self.ENTIRE_DATASET_SLICE] + [
         SingleSliceSpec(SlicingFeature.CLASS, c) for c in range(n_classes)
     ]
     output = get_single_slice_specs(input_data, n_classes)
     self.assertTrue(_are_lists_equal(output, expected))
예제 #8
0
 def test_slicing_by_multiple_features(self):
     input_data = SlicingSpec(entire_dataset=True,
                              by_class=True,
                              by_percentiles=True,
                              by_classification_correctness=True)
     n_classes = 10
     expected_slices = n_classes
     expected_slices += 1  # entire dataset slice
     expected_slices += 10  # percentiles slices
     expected_slices += 2  # correcness classification slices
     output = get_single_slice_specs(input_data, n_classes)
     self.assertLen(output, expected_slices)
def run_attacks(attack_input: AttackInputData,
                slicing_spec: SlicingSpec = None,
                attack_types: Iterable[AttackType] = (
                    AttackType.THRESHOLD_ATTACK, ),
                privacy_report_metadata: PrivacyReportMetadata = None,
                balance_attacker_training: bool = True,
                min_num_samples: int = 1) -> AttackResults:
    """Runs membership inference attacks on a classification model.

  It runs attacks specified by attack_types on each attack_input slice which is
   specified by slicing_spec.

  Args:
    attack_input: input data for running an attack
    slicing_spec: specifies attack_input slices to run attack on
    attack_types: attacks to run
    privacy_report_metadata: the metadata of the model under attack.
    balance_attacker_training: Whether the training and test sets for the
      membership inference attacker should have a balanced (roughly equal)
      number of samples from the training and test sets used to develop the
      model under attack.
    min_num_samples: minimum number of examples in either training or test data.

  Returns:
    the attack result.
  """
    attack_input.validate()
    attack_results = []

    if slicing_spec is None:
        slicing_spec = SlicingSpec(entire_dataset=True)
    num_classes = None
    if slicing_spec.by_class:
        num_classes = attack_input.num_classes
    input_slice_specs = get_single_slice_specs(slicing_spec, num_classes)
    for single_slice_spec in input_slice_specs:
        attack_input_slice = get_slice(attack_input, single_slice_spec)
        for attack_type in attack_types:
            attack_result = _run_attack(attack_input_slice, attack_type,
                                        balance_attacker_training,
                                        min_num_samples)
            if attack_result is not None:
                attack_results.append(attack_result)

    privacy_report_metadata = _compute_missing_privacy_report_metadata(
        privacy_report_metadata, attack_input)

    return AttackResults(single_attack_results=attack_results,
                         privacy_report_metadata=privacy_report_metadata)
예제 #10
0
  def test_trained_attacks_logits_only_size(self):
    result = mia.run_attacks(
        get_test_input_logits_only(100, 100), SlicingSpec(),
        (AttackType.LOGISTIC_REGRESSION,))

    self.assertLen(result.single_attack_results, 1)
예제 #11
0
  def test_run_attacks_size(self):
    result = mia.run_attacks(
        get_test_input(100, 100), SlicingSpec(),
        (AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION))

    self.assertLen(result.single_attack_results, 2)
def main(unused_argv):
    logger = tf.get_logger()
    logger.set_level(logging.ERROR)

    # Load training and test data.
    x_train, y_train, x_test, y_test = load_cifar10()

    # Instantiate the tf.Estimator.
    classifier = tf_estimator.Estimator(model_fn=small_cnn_fn,
                                        model_dir=FLAGS.model_dir)

    # A function to construct input_fn given (data, label), to be used by the
    # membership inference training hook.
    def input_fn_constructor(x, y):
        return tf_compat_v1_estimator.inputs.numpy_input_fn(x={'x': x},
                                                            y=y,
                                                            shuffle=False)

    # Get hook for membership inference attack.
    mia_hook = MembershipInferenceTrainingHook(
        classifier, (x_train, y_train), (x_test, y_test),
        input_fn_constructor,
        slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
        attack_types=[
            AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
        ],
        tensorboard_dir=FLAGS.model_dir,
        tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers)

    # Create tf.Estimator input functions for the training and test data.
    train_input_fn = tf_compat_v1_estimator.inputs.numpy_input_fn(
        x={'x': x_train},
        y=y_train,
        batch_size=FLAGS.batch_size,
        num_epochs=FLAGS.epochs,
        shuffle=True)
    eval_input_fn = tf_compat_v1_estimator.inputs.numpy_input_fn(
        x={'x': x_test}, y=y_test, num_epochs=1, shuffle=False)

    # Training loop.
    steps_per_epoch = 50000 // FLAGS.batch_size
    for epoch in range(1, FLAGS.epochs + 1):
        # Train the model, with the membership inference hook.
        classifier.train(input_fn=train_input_fn,
                         steps=steps_per_epoch,
                         hooks=[mia_hook])

        # Evaluate the model and print results
        eval_results = classifier.evaluate(input_fn=eval_input_fn)
        test_accuracy = eval_results['accuracy']
        print('Test accuracy after %d epochs is: %.3f' %
              (epoch, test_accuracy))

    print('End of training attack')
    attack_results = run_attack_on_tf_estimator_model(
        classifier, (x_train, y_train), (x_test, y_test),
        input_fn_constructor,
        slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
        attack_types=[
            AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
        ])
    att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
        attack_results)
    print('\n'.join([
        '  %s: %.4f' % (', '.join([s, t, m]), v)
        for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
    ]))
예제 #13
0
 def test_entire_dataset(self):
     input_data = SlicingSpec()
     expected = [self.ENTIRE_DATASET_SLICE]
     output = get_single_slice_specs(input_data)
     self.assertTrue(_are_lists_equal(output, expected))
예제 #14
0
 def test_no_slices(self):
     input_data = SlicingSpec(entire_dataset=False)
     expected = []
     output = get_single_slice_specs(input_data)
     self.assertTrue(_are_lists_equal(output, expected))