Beispiel #1
0
 def test_summary_without_slices(self):
   results = AttackResults(
       [self.perfect_classifier_result, self.random_classifier_result])
   self.assertEqual(
       results.summary(by_slices=False),
       'Best-performing attacks over all slices\n' +
       '  THRESHOLD_ATTACK (with 1 training and 1 test examples) achieved an' +
       ' AUC of 1.00 on slice CORRECTLY_CLASSIFIED=True\n' +
       '  THRESHOLD_ATTACK (with 1 training and 1 test examples) achieved an' +
       ' advantage of 1.00 on slice CORRECTLY_CLASSIFIED=True')
Beispiel #2
0
  def test_save_load(self):
    results = AttackResults(
        [self.perfect_classifier_result, self.random_classifier_result])

    with tempfile.TemporaryDirectory() as tmpdirname:
      filepath = os.path.join(tmpdirname, 'results.pickle')
      results.save(filepath)
      loaded_results = AttackResults.load(filepath)

    self.assertEqual(repr(results), repr(loaded_results))
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # Classifier that achieves an AUC of 0.5.
        self.imperfect_classifier_result = SingleAttackResult(
            slice_spec=SingleSliceSpec(None),
            attack_type=AttackType.THRESHOLD_ATTACK,
            roc_curve=RocCurve(tpr=np.array([0.0, 0.5, 1.0]),
                               fpr=np.array([0.0, 0.5, 1.0]),
                               thresholds=np.array([0, 1, 2])),
            data_size=DataSize(ntrain=1, ntest=1))

        # Classifier that achieves an AUC of 1.0.
        self.perfect_classifier_result = SingleAttackResult(
            slice_spec=SingleSliceSpec(None),
            attack_type=AttackType.THRESHOLD_ATTACK,
            roc_curve=RocCurve(tpr=np.array([0.0, 1.0, 1.0]),
                               fpr=np.array([1.0, 1.0, 0.0]),
                               thresholds=np.array([0, 1, 2])),
            data_size=DataSize(ntrain=1, ntest=1))

        self.results_epoch_0 = AttackResults(
            single_attack_results=[self.imperfect_classifier_result],
            privacy_report_metadata=PrivacyReportMetadata(
                accuracy_train=0.4,
                accuracy_test=0.3,
                epoch_num=0,
                model_variant_label='default'))

        self.results_epoch_10 = AttackResults(
            single_attack_results=[self.imperfect_classifier_result],
            privacy_report_metadata=PrivacyReportMetadata(
                accuracy_train=0.4,
                accuracy_test=0.3,
                epoch_num=10,
                model_variant_label='default'))

        self.results_epoch_15 = AttackResults(
            single_attack_results=[self.perfect_classifier_result],
            privacy_report_metadata=PrivacyReportMetadata(
                accuracy_train=0.5,
                accuracy_test=0.4,
                epoch_num=15,
                model_variant_label='default'))

        self.results_epoch_15_model_2 = AttackResults(
            single_attack_results=[self.perfect_classifier_result],
            privacy_report_metadata=PrivacyReportMetadata(
                accuracy_train=0.6,
                accuracy_test=0.7,
                epoch_num=15,
                model_variant_label='model 2'))

        self.attack_results_no_metadata = AttackResults(
            single_attack_results=[self.perfect_classifier_result])
Beispiel #4
0
 def test_calculate_pd_dataframe(self):
   single_results = [
       self.perfect_classifier_result, self.random_classifier_result
   ]
   results = AttackResults(single_results)
   df = results.calculate_pd_dataframe()
   df_expected = pd.DataFrame({
       'slice feature': ['correctly_classified', 'Entire dataset'],
       'slice value': ['True', ''],
       'train size': [1, 1],
       'test size': [1, 1],
       'attack type': ['THRESHOLD_ATTACK', 'THRESHOLD_ATTACK'],
       'Attacker advantage': [1.0, 0.0],
       'AUC': [1.0, 0.5]
   })
   pd.testing.assert_frame_equal(df, df_expected)
def run_attacks(attack_input: AttackInputData,
                slicing_spec: SlicingSpec = None,
                attack_types: Iterable[AttackType] = (
                    AttackType.THRESHOLD_ATTACK, ),
                privacy_report_metadata: PrivacyReportMetadata = None,
                balance_attacker_training: bool = True,
                min_num_samples: int = 1) -> AttackResults:
    """Runs membership inference attacks on a classification model.

  It runs attacks specified by attack_types on each attack_input slice which is
   specified by slicing_spec.

  Args:
    attack_input: input data for running an attack
    slicing_spec: specifies attack_input slices to run attack on
    attack_types: attacks to run
    privacy_report_metadata: the metadata of the model under attack.
    balance_attacker_training: Whether the training and test sets for the
      membership inference attacker should have a balanced (roughly equal)
      number of samples from the training and test sets used to develop the
      model under attack.
    min_num_samples: minimum number of examples in either training or test data.

  Returns:
    the attack result.
  """
    attack_input.validate()
    attack_results = []

    if slicing_spec is None:
        slicing_spec = SlicingSpec(entire_dataset=True)
    num_classes = None
    if slicing_spec.by_class:
        num_classes = attack_input.num_classes
    input_slice_specs = get_single_slice_specs(slicing_spec, num_classes)
    for single_slice_spec in input_slice_specs:
        attack_input_slice = get_slice(attack_input, single_slice_spec)
        for attack_type in attack_types:
            attack_result = _run_attack(attack_input_slice, attack_type,
                                        balance_attacker_training,
                                        min_num_samples)
            if attack_result is not None:
                attack_results.append(attack_result)

    privacy_report_metadata = _compute_missing_privacy_report_metadata(
        privacy_report_metadata, attack_input)

    return AttackResults(single_attack_results=attack_results,
                         privacy_report_metadata=privacy_report_metadata)
Beispiel #6
0
  def __init__(self, *args, **kwargs):
    super().__init__(*args, **kwargs)

    self.some_attack_result = SingleAttackResult(
        slice_spec=SingleSliceSpec(None),
        attack_type=AttackType.THRESHOLD_ATTACK,
        roc_curve=RocCurve(
            tpr=np.array([0.0, 0.5, 1.0]),
            fpr=np.array([0.0, 0.5, 1.0]),
            thresholds=np.array([0, 1, 2])),
        data_size=DataSize(ntrain=1, ntest=1))

    self.results_epoch_10 = AttackResults(
        single_attack_results=[self.some_attack_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.4,
            accuracy_test=0.3,
            epoch_num=10,
            model_variant_label='default'))

    self.results_epoch_15 = AttackResults(
        single_attack_results=[self.some_attack_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.5,
            accuracy_test=0.4,
            epoch_num=15,
            model_variant_label='default'))

    self.attack_results_no_metadata = AttackResults(
        single_attack_results=[self.some_attack_result])

    self.collection_with_metadata = AttackResultsCollection(
        [self.results_epoch_10, self.results_epoch_15])

    self.collection_no_metadata = AttackResultsCollection(
        [self.attack_results_no_metadata, self.attack_results_no_metadata])
Beispiel #7
0
 def test_get_result_with_max_attacker_advantage_second(self):
   results = AttackResults(
       [self.random_classifier_result, self.perfect_classifier_result])
   self.assertEqual(results.get_result_with_max_attacker_advantage(),
                    self.perfect_classifier_result)
Beispiel #8
0
 def test_get_result_with_max_auc_first(self):
   results = AttackResults(
       [self.perfect_classifier_result, self.random_classifier_result])
   self.assertEqual(results.get_result_with_max_auc(),
                    self.perfect_classifier_result)