Beispiel #1
0
    def __init__(self, *args, **kwargs):
        super(AttackResultsCollectionTest, self).__init__(*args, **kwargs)

        self.some_attack_result = SingleAttackResult(
            slice_spec=SingleSliceSpec(None),
            attack_type=AttackType.THRESHOLD_ATTACK,
            roc_curve=RocCurve(tpr=np.array([0.0, 0.5, 1.0]),
                               fpr=np.array([0.0, 0.5, 1.0]),
                               thresholds=np.array([0, 1, 2])))

        self.results_epoch_10 = AttackResults(
            single_attack_results=[self.some_attack_result],
            privacy_report_metadata=PrivacyReportMetadata(
                accuracy_train=0.4,
                accuracy_test=0.3,
                epoch_num=10,
                model_variant_label='default'))

        self.results_epoch_15 = AttackResults(
            single_attack_results=[self.some_attack_result],
            privacy_report_metadata=PrivacyReportMetadata(
                accuracy_train=0.5,
                accuracy_test=0.4,
                epoch_num=15,
                model_variant_label='default'))

        self.attack_results_no_metadata = AttackResults(
            single_attack_results=[self.some_attack_result])

        self.collection_with_metadata = AttackResultsCollection(
            [self.results_epoch_10, self.results_epoch_15])

        self.collection_no_metadata = AttackResultsCollection(
            [self.attack_results_no_metadata, self.attack_results_no_metadata])
  def __init__(self, *args, **kwargs):
    super(PrivacyReportTest, self).__init__(*args, **kwargs)

    # Classifier that achieves an AUC of 0.5.
    self.imperfect_classifier_result = SingleAttackResult(
        slice_spec=SingleSliceSpec(None),
        attack_type=AttackType.THRESHOLD_ATTACK,
        roc_curve=RocCurve(
            tpr=np.array([0.0, 0.5, 1.0]),
            fpr=np.array([0.0, 0.5, 1.0]),
            thresholds=np.array([0, 1, 2])),
        data_size=DataSize(ntrain=1, ntest=1))

    # Classifier that achieves an AUC of 1.0.
    self.perfect_classifier_result = SingleAttackResult(
        slice_spec=SingleSliceSpec(None),
        attack_type=AttackType.THRESHOLD_ATTACK,
        roc_curve=RocCurve(
            tpr=np.array([0.0, 1.0, 1.0]),
            fpr=np.array([1.0, 1.0, 0.0]),
            thresholds=np.array([0, 1, 2])),
        data_size=DataSize(ntrain=1, ntest=1))

    self.results_epoch_0 = AttackResults(
        single_attack_results=[self.imperfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.4,
            accuracy_test=0.3,
            epoch_num=0,
            model_variant_label='default'))

    self.results_epoch_10 = AttackResults(
        single_attack_results=[self.imperfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.4,
            accuracy_test=0.3,
            epoch_num=10,
            model_variant_label='default'))

    self.results_epoch_15 = AttackResults(
        single_attack_results=[self.perfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.5,
            accuracy_test=0.4,
            epoch_num=15,
            model_variant_label='default'))

    self.results_epoch_15_model_2 = AttackResults(
        single_attack_results=[self.perfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.6,
            accuracy_test=0.7,
            epoch_num=15,
            model_variant_label='model 2'))

    self.attack_results_no_metadata = AttackResults(
        single_attack_results=[self.perfect_classifier_result])
Beispiel #3
0
    def test_save_load(self):
        results = AttackResults(
            [self.perfect_classifier_result, self.random_classifier_result])

        with tempfile.TemporaryDirectory() as tmpdirname:
            filepath = os.path.join(tmpdirname, 'results.pickle')
            results.save(filepath)
            loaded_results = AttackResults.load(filepath)

        self.assertEqual(repr(results), repr(loaded_results))
Beispiel #4
0
 def test_summary_without_slices(self):
     results = AttackResults(
         [self.perfect_classifier_result, self.random_classifier_result])
     self.assertEqual(
         results.summary(by_slices=False),
         'Best-performing attacks over all slices\n' +
         '  THRESHOLD_ATTACK achieved an AUC of 1.00 ' +
         'on slice CORRECTLY_CLASSIFIED=True\n' +
         '  THRESHOLD_ATTACK achieved an advantage of 1.00 ' +
         'on slice CORRECTLY_CLASSIFIED=True')
Beispiel #5
0
 def test_calculate_pd_dataframe(self):
     single_results = [
         self.perfect_classifier_result, self.random_classifier_result
     ]
     results = AttackResults(single_results)
     df = results.calculate_pd_dataframe()
     df_expected = pd.DataFrame({
         'slice feature': ['correctly_classified', 'Entire dataset'],
         'slice value': ['True', ''],
         'attack type': ['THRESHOLD_ATTACK', 'THRESHOLD_ATTACK'],
         'Attacker advantage': [1.0, 0.0],
         'AUC': [1.0, 0.5]
     })
     pd.testing.assert_frame_equal(df, df_expected)
 def test_calculate_pd_dataframe(self):
   single_results = [
       self.perfect_classifier_result, self.random_classifier_result
   ]
   results = AttackResults(single_results)
   df = results.calculate_pd_dataframe()
   df_expected = pd.DataFrame({
       'slice feature': ['correctly_classfied', 'entire_dataset'],
       'slice value': ['True', ''],
       'attack type': ['threshold', 'threshold'],
       'Attacker advantage': [1.0, 0.0],
       'AUC': [1.0, 0.5]
   })
   self.assertTrue(df.equals(df_expected))
Beispiel #7
0
 def test_summary_by_slices(self):
     results = AttackResults(
         [self.perfect_classifier_result, self.random_classifier_result])
     self.assertEqual(
         results.summary(by_slices=True),
         'Best-performing attacks over all slices\n' +
         '  THRESHOLD_ATTACK achieved an AUC of 1.00 ' +
         'on slice CORRECTLY_CLASSIFIED=True\n' +
         '  THRESHOLD_ATTACK achieved an advantage of 1.00 ' +
         'on slice CORRECTLY_CLASSIFIED=True\n\n' +
         'Best-performing attacks over slice: "CORRECTLY_CLASSIFIED=True"\n'
         + '  THRESHOLD_ATTACK achieved an AUC of 1.00\n' +
         '  THRESHOLD_ATTACK achieved an advantage of 1.00\n\n' +
         'Best-performing attacks over slice: "Entire dataset"\n' +
         '  THRESHOLD_ATTACK achieved an AUC of 0.50\n' +
         '  THRESHOLD_ATTACK achieved an advantage of 0.00')
Beispiel #8
0
def run_seq2seq_attack(
        attack_input: Seq2SeqAttackInputData,
        privacy_report_metadata: PrivacyReportMetadata = None,
        balance_attacker_training: bool = True) -> AttackResults:
    """Runs membership inference attacks on a seq2seq model.

  Args:
    attack_input: input data for running an attack
    privacy_report_metadata: the metadata of the model under attack.
    balance_attacker_training: Whether the training and test sets for the
      membership inference attacker should have a balanced (roughly equal)
      number of samples from the training and test sets used to develop the
      model under attack.

  Returns:
    the attack result.
  """
    attack_input.validate()

    # The attacker uses the average rank (a single number) of a seq2seq dataset
    # record to determine membership. So only Logistic Regression is supported,
    # as it makes the most sense for single-number features.
    attacker = models.LogisticRegressionAttacker()

    # Create attacker data and populate fields of privacy_report_metadata
    privacy_report_metadata = privacy_report_metadata or PrivacyReportMetadata(
    )
    prepared_attacker_data = create_seq2seq_attacker_data(
        attack_input_data=attack_input,
        balance=balance_attacker_training,
        privacy_report_metadata=privacy_report_metadata)

    attacker.train_model(prepared_attacker_data.features_train,
                         prepared_attacker_data.is_training_labels_train)

    # Run the attacker on (permuted) test examples.
    predictions_test = attacker.predict(prepared_attacker_data.features_test)

    # Generate ROC curves with predictions.
    fpr, tpr, thresholds = metrics.roc_curve(
        prepared_attacker_data.is_training_labels_test, predictions_test)

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    attack_results = [
        SingleAttackResult(slice_spec=SingleSliceSpec(),
                           attack_type=AttackType.LOGISTIC_REGRESSION,
                           roc_curve=roc_curve,
                           data_size=prepared_attacker_data.data_size)
    ]

    return AttackResults(single_attack_results=attack_results,
                         privacy_report_metadata=privacy_report_metadata)
def run_attacks(attack_input: AttackInputData,
                slicing_spec: SlicingSpec = None,
                attack_types: Iterable[AttackType] = (
                    AttackType.THRESHOLD_ATTACK, ),
                privacy_report_metadata: PrivacyReportMetadata = None,
                balance_attacker_training: bool = True,
                min_num_samples: int = 1) -> AttackResults:
    """Runs membership inference attacks on a classification model.

  It runs attacks specified by attack_types on each attack_input slice which is
   specified by slicing_spec.

  Args:
    attack_input: input data for running an attack
    slicing_spec: specifies attack_input slices to run attack on
    attack_types: attacks to run
    privacy_report_metadata: the metadata of the model under attack.
    balance_attacker_training: Whether the training and test sets for the
          membership inference attacker should have a balanced (roughly equal)
          number of samples from the training and test sets used to develop
          the model under attack.
    min_num_samples: minimum number of examples in either training or test data.

  Returns:
    the attack result.
  """
    attack_input.validate()
    attack_results = []

    if slicing_spec is None:
        slicing_spec = SlicingSpec(entire_dataset=True)
    num_classes = None
    if slicing_spec.by_class:
        num_classes = attack_input.num_classes
    input_slice_specs = get_single_slice_specs(slicing_spec, num_classes)
    for single_slice_spec in input_slice_specs:
        attack_input_slice = get_slice(attack_input, single_slice_spec)
        for attack_type in attack_types:
            attack_result = _run_attack(attack_input_slice, attack_type,
                                        balance_attacker_training,
                                        min_num_samples)
            if attack_result is not None:
                attack_results.append(attack_result)

    privacy_report_metadata = _compute_missing_privacy_report_metadata(
        privacy_report_metadata, attack_input)

    return AttackResults(single_attack_results=attack_results,
                         privacy_report_metadata=privacy_report_metadata)
Beispiel #10
0
def run_attacks(
    attack_input: AttackInputData,
    slicing_spec: SlicingSpec = None,
    attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK, )
) -> AttackResults:
    """Run all attacks."""
    attack_input.validate()
    attack_results = []

    if slicing_spec is None:
        slicing_spec = SlicingSpec(entire_dataset=True)
    input_slice_specs = get_single_slice_specs(slicing_spec,
                                               attack_input.num_classes)
    for single_slice_spec in input_slice_specs:
        attack_input_slice = get_slice(attack_input, single_slice_spec)
        for attack_type in attack_types:
            attack_results.append(run_attack(attack_input_slice, attack_type))

    return AttackResults(single_attack_results=attack_results)
Beispiel #11
0
def run_attacks(
        attack_input: AttackInputData,
        slicing_spec: SlicingSpec = None,
        attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK, ),
        privacy_report_metadata: PrivacyReportMetadata = None
) -> AttackResults:
    """Runs membership inference attacks on a classification model.

  It runs attacks specified by attack_types on each attack_input slice which is
   specified by slicing_spec.

  Args:
    attack_input: input data for running an attack
    slicing_spec: specifies attack_input slices to run attack on
    attack_types: attacks to run
    privacy_report_metadata: the metadata of the model under attack.

  Returns:
    the attack result.
  """
    attack_input.validate()
    attack_results = []

    if slicing_spec is None:
        slicing_spec = SlicingSpec(entire_dataset=True)
    input_slice_specs = get_single_slice_specs(slicing_spec,
                                               attack_input.num_classes)
    for single_slice_spec in input_slice_specs:
        attack_input_slice = get_slice(attack_input, single_slice_spec)
        for attack_type in attack_types:
            attack_results.append(_run_attack(attack_input_slice, attack_type))

    privacy_report_metadata = _compute_missing_privacy_report_metadata(
        privacy_report_metadata, attack_input)

    return AttackResults(single_attack_results=attack_results,
                         privacy_report_metadata=privacy_report_metadata)
Beispiel #12
0
 def test_get_result_with_max_attacker_advantage_second(self):
     results = AttackResults(
         [self.random_classifier_result, self.perfect_classifier_result])
     self.assertEqual(results.get_result_with_max_attacker_advantage(),
                      self.perfect_classifier_result)
Beispiel #13
0
 def test_get_result_with_max_auc_first(self):
     results = AttackResults(
         [self.perfect_classifier_result, self.random_classifier_result])
     self.assertEqual(results.get_result_with_max_auc(),
                      self.perfect_classifier_result)
Beispiel #14
0
attack_results = mia.run_attacks(
    AttackInputData(
        labels_train=training_labels,
        labels_test=test_labels,
        logits_train=training_pred,
        logits_test=test_pred,
        loss_train=crossentropy(training_labels, training_pred),
        loss_test=crossentropy(test_labels, test_pred)),
    SlicingSpec(entire_dataset=True, by_class=True),
    attack_types=(AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION))

# Example of saving the results to the file and loading them back.
with tempfile.TemporaryDirectory() as tmpdirname:
  filepath = os.path.join(tmpdirname, "results.pickle")
  attack_results.save(filepath)
  loaded_results = AttackResults.load(filepath)

# Print attack metrics
for attack_result in attack_results.single_attack_results:
  print("Slice: %s" % attack_result.slice_spec)
  print("Attack type: %s" % attack_result.attack_type)
  print("AUC: %.2f" % attack_result.roc_curve.get_auc())

  print("Attacker advantage: %.2f\n" %
        attack_result.roc_curve.get_attacker_advantage())

max_auc_attacker = attack_results.get_result_with_max_attacker_advantage()
print("Attack type with max AUC: %s, AUC of %.2f" %
      (max_auc_attacker.attack_type, max_auc_attacker.roc_curve.get_auc()))

max_advantage_attacker = attack_results.get_result_with_max_attacker_advantage()
Beispiel #15
0
def main(unused_argv):
    epoch_results = AttackResultsCollection([])

    num_epochs = 2
    models = {
        "two layer model": two_layer_model,
        "three layer model": three_layer_model,
    }
    for model_name in models:
        # Incrementally train the model and store privacy metrics every num_epochs.
        for i in range(1, 6):
            models[model_name].fit(
                training_features,
                to_categorical(training_labels, num_clusters),
                validation_data=(test_features,
                                 to_categorical(test_labels, num_clusters)),
                batch_size=64,
                epochs=num_epochs,
                shuffle=True)

            training_pred = models[model_name].predict(training_features)
            test_pred = models[model_name].predict(test_features)

            # Add metadata to generate a privacy report.
            privacy_report_metadata = PrivacyReportMetadata(
                accuracy_train=metrics.accuracy_score(
                    training_labels, np.argmax(training_pred, axis=1)),
                accuracy_test=metrics.accuracy_score(
                    test_labels, np.argmax(test_pred, axis=1)),
                epoch_num=num_epochs * i,
                model_variant_label=model_name)

            attack_results = mia.run_attacks(
                AttackInputData(labels_train=training_labels,
                                labels_test=test_labels,
                                probs_train=training_pred,
                                probs_test=test_pred,
                                loss_train=crossentropy(
                                    training_labels, training_pred),
                                loss_test=crossentropy(test_labels,
                                                       test_pred)),
                SlicingSpec(entire_dataset=True, by_class=True),
                attack_types=(AttackType.THRESHOLD_ATTACK,
                              AttackType.LOGISTIC_REGRESSION),
                privacy_report_metadata=privacy_report_metadata)
            epoch_results.append(attack_results)

    # Generate privacy reports
    epoch_figure = privacy_report.plot_by_epochs(
        epoch_results, [PrivacyMetric.ATTACKER_ADVANTAGE, PrivacyMetric.AUC])
    epoch_figure.show()
    privacy_utility_figure = privacy_report.plot_privacy_vs_accuracy_single_model(
        epoch_results, [PrivacyMetric.ATTACKER_ADVANTAGE, PrivacyMetric.AUC])
    privacy_utility_figure.show()

    # Example of saving the results to the file and loading them back.
    with tempfile.TemporaryDirectory() as tmpdirname:
        filepath = os.path.join(tmpdirname, "results.pickle")
        attack_results.save(filepath)
        loaded_results = AttackResults.load(filepath)
        print(loaded_results.summary(by_slices=False))

    # Print attack metrics
    for attack_result in attack_results.single_attack_results:
        print("Slice: %s" % attack_result.slice_spec)
        print("Attack type: %s" % attack_result.attack_type)
        print("AUC: %.2f" % attack_result.roc_curve.get_auc())

        print("Attacker advantage: %.2f\n" %
              attack_result.roc_curve.get_attacker_advantage())

    max_auc_attacker = attack_results.get_result_with_max_auc()
    print("Attack type with max AUC: %s, AUC of %.2f" %
          (max_auc_attacker.attack_type, max_auc_attacker.roc_curve.get_auc()))

    max_advantage_attacker = attack_results.get_result_with_max_attacker_advantage(
    )
    print("Attack type with max advantage: %s, Attacker advantage of %.2f" %
          (max_advantage_attacker.attack_type,
           max_advantage_attacker.roc_curve.get_attacker_advantage()))

    # Print summary
    print("Summary without slices: \n")
    print(attack_results.summary(by_slices=False))

    print("Summary by slices: \n")
    print(attack_results.summary(by_slices=True))

    # Print pandas data frame
    print("Pandas frame: \n")
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    print(attack_results.calculate_pd_dataframe())

    # Example of ROC curve plotting.
    figure = plotting.plot_roc_curve(
        attack_results.single_attack_results[0].roc_curve)
    figure.show()
    plt.show()