def test_run_attack_data_size(self):
   result = mia.run_attacks(
       get_test_input(100, 80), SlicingSpec(by_class=True),
       (AttackType.THRESHOLD_ATTACK,))
   self.assertEqual(result.single_attack_results[0].data_size,
                    DataSize(ntrain=100, ntest=80))
   self.assertEqual(result.single_attack_results[3].data_size,
                    DataSize(ntrain=20, ntest=16))
  def __init__(self, *args, **kwargs):
    super(PrivacyReportTest, self).__init__(*args, **kwargs)

    # Classifier that achieves an AUC of 0.5.
    self.imperfect_classifier_result = SingleAttackResult(
        slice_spec=SingleSliceSpec(None),
        attack_type=AttackType.THRESHOLD_ATTACK,
        roc_curve=RocCurve(
            tpr=np.array([0.0, 0.5, 1.0]),
            fpr=np.array([0.0, 0.5, 1.0]),
            thresholds=np.array([0, 1, 2])),
        data_size=DataSize(ntrain=1, ntest=1))

    # Classifier that achieves an AUC of 1.0.
    self.perfect_classifier_result = SingleAttackResult(
        slice_spec=SingleSliceSpec(None),
        attack_type=AttackType.THRESHOLD_ATTACK,
        roc_curve=RocCurve(
            tpr=np.array([0.0, 1.0, 1.0]),
            fpr=np.array([1.0, 1.0, 0.0]),
            thresholds=np.array([0, 1, 2])),
        data_size=DataSize(ntrain=1, ntest=1))

    self.results_epoch_0 = AttackResults(
        single_attack_results=[self.imperfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.4,
            accuracy_test=0.3,
            epoch_num=0,
            model_variant_label='default'))

    self.results_epoch_10 = AttackResults(
        single_attack_results=[self.imperfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.4,
            accuracy_test=0.3,
            epoch_num=10,
            model_variant_label='default'))

    self.results_epoch_15 = AttackResults(
        single_attack_results=[self.perfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.5,
            accuracy_test=0.4,
            epoch_num=15,
            model_variant_label='default'))

    self.results_epoch_15_model_2 = AttackResults(
        single_attack_results=[self.perfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.6,
            accuracy_test=0.7,
            epoch_num=15,
            model_variant_label='model 2'))

    self.attack_results_no_metadata = AttackResults(
        single_attack_results=[self.perfect_classifier_result])
Exemple #3
0
def create_seq2seq_attacker_data(
    attack_input_data: Seq2SeqAttackInputData,
    test_fraction: float = 0.25,
    balance: bool = True,
    privacy_report_metadata: PrivacyReportMetadata = PrivacyReportMetadata()
) -> AttackerData:
    """Prepares Seq2SeqAttackInputData to train ML attackers.

  Uses logits and losses to generate ranks and performs a random train-test
  split.

  Also computes metadata (loss, accuracy) for the model under attack
  and populates respective fields of PrivacyReportMetadata.

  Args:
    attack_input_data: Original Seq2SeqAttackInputData
    test_fraction: Fraction of the dataset to include in the test split.
    balance: Whether the training and test sets for the membership inference
      attacker should have a balanced (roughly equal) number of samples from the
      training and test sets used to develop the model under attack.
    privacy_report_metadata: the metadata of the model under attack.

  Returns:
    AttackerData.
  """
    attack_input_train, loss_train, accuracy_train = _get_attack_features_and_metadata(
        attack_input_data.logits_train, attack_input_data.labels_train)
    attack_input_test, loss_test, accuracy_test = _get_attack_features_and_metadata(
        attack_input_data.logits_test, attack_input_data.labels_test)

    if balance:
        min_size = min(len(attack_input_train), len(attack_input_test))
        attack_input_train = _sample_multidimensional_array(
            attack_input_train, min_size)
        attack_input_test = _sample_multidimensional_array(
            attack_input_test, min_size)

    features_all = np.concatenate((attack_input_train, attack_input_test))
    ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0]

    # Reshape for classifying one-dimensional features
    features_all = features_all.reshape(-1, 1)

    labels_all = np.concatenate(((np.zeros(ntrain)), (np.ones(ntest))))

    # Perform a train-test split
    features_train, features_test, \
    is_training_labels_train, is_training_labels_test = \
      model_selection.train_test_split(
          features_all, labels_all, test_size=test_fraction, stratify=labels_all)

    # Populate accuracy, loss fields in privacy report metadata
    privacy_report_metadata.loss_train = loss_train
    privacy_report_metadata.loss_test = loss_test
    privacy_report_metadata.accuracy_train = accuracy_train
    privacy_report_metadata.accuracy_test = accuracy_test

    return AttackerData(features_train, is_training_labels_train,
                        features_test, is_training_labels_test,
                        DataSize(ntrain=ntrain, ntest=ntest))
    def __init__(self, *args, **kwargs):
        super(AttackResultsCollectionTest, self).__init__(*args, **kwargs)

        self.some_attack_result = SingleAttackResult(
            slice_spec=SingleSliceSpec(None),
            attack_type=AttackType.THRESHOLD_ATTACK,
            roc_curve=RocCurve(tpr=np.array([0.0, 0.5, 1.0]),
                               fpr=np.array([0.0, 0.5, 1.0]),
                               thresholds=np.array([0, 1, 2])),
            data_size=DataSize(ntrain=1, ntest=1))

        self.results_epoch_10 = AttackResults(
            single_attack_results=[self.some_attack_result],
            privacy_report_metadata=PrivacyReportMetadata(
                accuracy_train=0.4,
                accuracy_test=0.3,
                epoch_num=10,
                model_variant_label='default'))

        self.results_epoch_15 = AttackResults(
            single_attack_results=[self.some_attack_result],
            privacy_report_metadata=PrivacyReportMetadata(
                accuracy_train=0.5,
                accuracy_test=0.4,
                epoch_num=15,
                model_variant_label='default'))

        self.attack_results_no_metadata = AttackResults(
            single_attack_results=[self.some_attack_result])

        self.collection_with_metadata = AttackResultsCollection(
            [self.results_epoch_10, self.results_epoch_15])

        self.collection_no_metadata = AttackResultsCollection(
            [self.attack_results_no_metadata, self.attack_results_no_metadata])
    def test_attacker_advantage_random_classifier(self):
        roc = RocCurve(tpr=np.array([0.0, 0.5, 1.0]),
                       fpr=np.array([0.0, 0.5, 1.0]),
                       thresholds=np.array([0, 1, 2]))

        result = SingleAttackResult(roc_curve=roc,
                                    slice_spec=SingleSliceSpec(None),
                                    attack_type=AttackType.THRESHOLD_ATTACK,
                                    data_size=DataSize(ntrain=1, ntest=1))

        self.assertEqual(result.get_attacker_advantage(), 0.0)
    def __init__(self, *args, **kwargs):
        super(AttackResultsTest, self).__init__(*args, **kwargs)

        # ROC curve of a perfect classifier
        self.perfect_classifier_result = SingleAttackResult(
            slice_spec=SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED,
                                       True),
            attack_type=AttackType.THRESHOLD_ATTACK,
            roc_curve=RocCurve(tpr=np.array([0.0, 1.0, 1.0]),
                               fpr=np.array([1.0, 1.0, 0.0]),
                               thresholds=np.array([0, 1, 2])),
            data_size=DataSize(ntrain=1, ntest=1))

        # ROC curve of a random classifier
        self.random_classifier_result = SingleAttackResult(
            slice_spec=SingleSliceSpec(None),
            attack_type=AttackType.THRESHOLD_ATTACK,
            roc_curve=RocCurve(tpr=np.array([0.0, 0.5, 1.0]),
                               fpr=np.array([0.0, 0.5, 1.0]),
                               thresholds=np.array([0, 1, 2])),
            data_size=DataSize(ntrain=1, ntest=1))
def _run_threshold_attack(attack_input: AttackInputData):
    ntrain, ntest = attack_input.get_train_size(), attack_input.get_test_size()
    fpr, tpr, thresholds = metrics.roc_curve(
        np.concatenate((np.zeros(ntrain), np.ones(ntest))),
        np.concatenate(
            (attack_input.get_loss_train(), attack_input.get_loss_test())))

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    return SingleAttackResult(
        slice_spec=_get_slice_spec(attack_input),
        data_size=DataSize(ntrain=ntrain, ntest=ntest),
        attack_type=AttackType.THRESHOLD_ATTACK,
        membership_scores_train=-attack_input.get_loss_train(),
        membership_scores_test=-attack_input.get_loss_test(),
        roc_curve=roc_curve)
Exemple #8
0
def create_attacker_data(attack_input_data: AttackInputData,
                         test_fraction: float = 0.25,
                         balance: bool = True) -> AttackerData:
    """Prepare AttackInputData to train ML attackers.

  Combines logits and losses and performs a random train-test split.

  Args:
    attack_input_data: Original AttackInputData
    test_fraction: Fraction of the dataset to include in the test split.
    balance: Whether the training and test sets for the membership inference
              attacker should have a balanced (roughly equal) number of samples
              from the training and test sets used to develop the model
              under attack.

  Returns:
    AttackerData.
  """
    attack_input_train = _column_stack(attack_input_data.logits_or_probs_train,
                                       attack_input_data.get_loss_train())
    attack_input_test = _column_stack(attack_input_data.logits_or_probs_test,
                                      attack_input_data.get_loss_test())

    if balance:
        min_size = min(attack_input_data.get_train_size(),
                       attack_input_data.get_test_size())
        attack_input_train = _sample_multidimensional_array(
            attack_input_train, min_size)
        attack_input_test = _sample_multidimensional_array(
            attack_input_test, min_size)
    ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0]

    features_all = np.concatenate((attack_input_train, attack_input_test))

    labels_all = np.concatenate(((np.zeros(ntrain)), (np.ones(ntest))))

    # Perform a train-test split
    features_train, features_test, \
    is_training_labels_train, is_training_labels_test = \
      model_selection.train_test_split(
          features_all, labels_all, test_size=test_fraction, stratify=labels_all)
    return AttackerData(features_train, is_training_labels_train,
                        features_test, is_training_labels_test,
                        DataSize(ntrain=ntrain, ntest=ntest))
Exemple #9
0
def _run_threshold_attack(attack_input: AttackInputData):
    """Runs a threshold attack on loss."""
    ntrain, ntest = attack_input.get_train_size(), attack_input.get_test_size()
    loss_train = attack_input.get_loss_train()
    loss_test = attack_input.get_loss_test()
    if loss_train is None or loss_test is None:
        raise ValueError(
            'Not possible to run threshold attack without losses.')
    fpr, tpr, thresholds = metrics.roc_curve(
        np.concatenate((np.zeros(ntrain), np.ones(ntest))),
        np.concatenate((loss_train, loss_test)))

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    return SingleAttackResult(
        slice_spec=_get_slice_spec(attack_input),
        data_size=DataSize(ntrain=ntrain, ntest=ntest),
        attack_type=AttackType.THRESHOLD_ATTACK,
        membership_scores_train=-attack_input.get_loss_train(),
        membership_scores_test=-attack_input.get_loss_test(),
        roc_curve=roc_curve)