Ejemplo n.º 1
0
  def test_attacker_advantage_perfect_classifier(self):
    roc = RocCurve(
        tpr=np.array([0.0, 1.0, 1.0]),
        fpr=np.array([1.0, 1.0, 0.0]),
        thresholds=np.array([0, 1, 2]))

    self.assertEqual(roc.get_auc(), 1.0)
Ejemplo n.º 2
0
  def test_auc_random_classifier(self):
    roc = RocCurve(
        tpr=np.array([0.0, 0.5, 1.0]),
        fpr=np.array([0.0, 0.5, 1.0]),
        thresholds=np.array([0, 1, 2]))

    self.assertEqual(roc.get_auc(), 0.5)
  def __init__(self, *args, **kwargs):
    super(PrivacyReportTest, self).__init__(*args, **kwargs)

    # Classifier that achieves an AUC of 0.5.
    self.imperfect_classifier_result = SingleAttackResult(
        slice_spec=SingleSliceSpec(None),
        attack_type=AttackType.THRESHOLD_ATTACK,
        roc_curve=RocCurve(
            tpr=np.array([0.0, 0.5, 1.0]),
            fpr=np.array([0.0, 0.5, 1.0]),
            thresholds=np.array([0, 1, 2])),
        data_size=DataSize(ntrain=1, ntest=1))

    # Classifier that achieves an AUC of 1.0.
    self.perfect_classifier_result = SingleAttackResult(
        slice_spec=SingleSliceSpec(None),
        attack_type=AttackType.THRESHOLD_ATTACK,
        roc_curve=RocCurve(
            tpr=np.array([0.0, 1.0, 1.0]),
            fpr=np.array([1.0, 1.0, 0.0]),
            thresholds=np.array([0, 1, 2])),
        data_size=DataSize(ntrain=1, ntest=1))

    self.results_epoch_0 = AttackResults(
        single_attack_results=[self.imperfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.4,
            accuracy_test=0.3,
            epoch_num=0,
            model_variant_label='default'))

    self.results_epoch_10 = AttackResults(
        single_attack_results=[self.imperfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.4,
            accuracy_test=0.3,
            epoch_num=10,
            model_variant_label='default'))

    self.results_epoch_15 = AttackResults(
        single_attack_results=[self.perfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.5,
            accuracy_test=0.4,
            epoch_num=15,
            model_variant_label='default'))

    self.results_epoch_15_model_2 = AttackResults(
        single_attack_results=[self.perfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.6,
            accuracy_test=0.7,
            epoch_num=15,
            model_variant_label='model 2'))

    self.attack_results_no_metadata = AttackResults(
        single_attack_results=[self.perfect_classifier_result])
Ejemplo n.º 4
0
    def __init__(self, *args, **kwargs):
        super(AttackResultsCollectionTest, self).__init__(*args, **kwargs)

        self.some_attack_result = SingleAttackResult(
            slice_spec=SingleSliceSpec(None),
            attack_type=AttackType.THRESHOLD_ATTACK,
            roc_curve=RocCurve(tpr=np.array([0.0, 0.5, 1.0]),
                               fpr=np.array([0.0, 0.5, 1.0]),
                               thresholds=np.array([0, 1, 2])))

        self.results_epoch_10 = AttackResults(
            single_attack_results=[self.some_attack_result],
            privacy_report_metadata=PrivacyReportMetadata(
                accuracy_train=0.4,
                accuracy_test=0.3,
                epoch_num=10,
                model_variant_label='default'))

        self.results_epoch_15 = AttackResults(
            single_attack_results=[self.some_attack_result],
            privacy_report_metadata=PrivacyReportMetadata(
                accuracy_train=0.5,
                accuracy_test=0.4,
                epoch_num=15,
                model_variant_label='default'))

        self.attack_results_no_metadata = AttackResults(
            single_attack_results=[self.some_attack_result])

        self.collection_with_metadata = AttackResultsCollection(
            [self.results_epoch_10, self.results_epoch_15])

        self.collection_no_metadata = AttackResultsCollection(
            [self.attack_results_no_metadata, self.attack_results_no_metadata])
Ejemplo n.º 5
0
def run_trained_attack(attack_input: AttackInputData, attack_type: AttackType):
    """Classification attack done by ML models."""
    attacker = None

    if attack_type == AttackType.LOGISTIC_REGRESSION:
        attacker = models.LogisticRegressionAttacker()
    elif attack_type == AttackType.MULTI_LAYERED_PERCEPTRON:
        attacker = models.MultilayerPerceptronAttacker()
    elif attack_type == AttackType.RANDOM_FOREST:
        attacker = models.RandomForestAttacker()
    elif attack_type == AttackType.K_NEAREST_NEIGHBORS:
        attacker = models.KNearestNeighborsAttacker()
    else:
        raise NotImplementedError('Attack type %s not implemented yet.' %
                                  attack_type)

    prepared_attacker_data = models.create_attacker_data(attack_input)

    attacker.train_model(prepared_attacker_data.features_train,
                         prepared_attacker_data.is_training_labels_train)

    # Run the attacker on (permuted) test examples.
    predictions_test = attacker.predict(prepared_attacker_data.features_test)

    # Generate ROC curves with predictions.
    fpr, tpr, thresholds = metrics.roc_curve(
        prepared_attacker_data.is_training_labels_test, predictions_test)

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    return SingleAttackResult(slice_spec=_get_slice_spec(attack_input),
                              attack_type=attack_type,
                              roc_curve=roc_curve)
Ejemplo n.º 6
0
    def __init__(self, *args, **kwargs):
        super(AttackResultsTest, self).__init__(*args, **kwargs)

        # ROC curve of a perfect classifier
        self.perfect_classifier_result = SingleAttackResult(
            slice_spec=SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED,
                                       True),
            attack_type=AttackType.THRESHOLD_ATTACK,
            roc_curve=RocCurve(tpr=np.array([0.0, 1.0, 1.0]),
                               fpr=np.array([1.0, 1.0, 0.0]),
                               thresholds=np.array([0, 1, 2])))

        # ROC curve of a random classifier
        self.random_classifier_result = SingleAttackResult(
            slice_spec=SingleSliceSpec(None),
            attack_type=AttackType.THRESHOLD_ATTACK,
            roc_curve=RocCurve(tpr=np.array([0.0, 0.5, 1.0]),
                               fpr=np.array([0.0, 0.5, 1.0]),
                               thresholds=np.array([0, 1, 2])))
Ejemplo n.º 7
0
    def test_attacker_advantage_random_classifier(self):
        roc = RocCurve(tpr=np.array([0.0, 0.5, 1.0]),
                       fpr=np.array([0.0, 0.5, 1.0]),
                       thresholds=np.array([0, 1, 2]))

        result = SingleAttackResult(roc_curve=roc,
                                    slice_spec=SingleSliceSpec(None),
                                    attack_type=AttackType.THRESHOLD_ATTACK)

        self.assertEqual(result.get_attacker_advantage(), 0.0)
    def test_auc_random_classifier(self):
        roc = RocCurve(tpr=np.array([0.0, 0.5, 1.0]),
                       fpr=np.array([0.0, 0.5, 1.0]),
                       thresholds=np.array([0, 1, 2]))

        result = SingleAttackResult(roc_curve=roc,
                                    slice_spec=SingleSliceSpec(None),
                                    attack_type=AttackType.THRESHOLD_ATTACK,
                                    data_size=DataSize(ntrain=1, ntest=1))

        self.assertEqual(result.get_auc(), 0.5)
Ejemplo n.º 9
0
def run_seq2seq_attack(
        attack_input: Seq2SeqAttackInputData,
        privacy_report_metadata: PrivacyReportMetadata = None,
        balance_attacker_training: bool = True) -> AttackResults:
    """Runs membership inference attacks on a seq2seq model.

  Args:
    attack_input: input data for running an attack
    privacy_report_metadata: the metadata of the model under attack.
    balance_attacker_training: Whether the training and test sets for the
      membership inference attacker should have a balanced (roughly equal)
      number of samples from the training and test sets used to develop the
      model under attack.

  Returns:
    the attack result.
  """
    attack_input.validate()

    # The attacker uses the average rank (a single number) of a seq2seq dataset
    # record to determine membership. So only Logistic Regression is supported,
    # as it makes the most sense for single-number features.
    attacker = models.LogisticRegressionAttacker()

    # Create attacker data and populate fields of privacy_report_metadata
    privacy_report_metadata = privacy_report_metadata or PrivacyReportMetadata(
    )
    prepared_attacker_data = create_seq2seq_attacker_data(
        attack_input_data=attack_input,
        balance=balance_attacker_training,
        privacy_report_metadata=privacy_report_metadata)

    attacker.train_model(prepared_attacker_data.features_train,
                         prepared_attacker_data.is_training_labels_train)

    # Run the attacker on (permuted) test examples.
    predictions_test = attacker.predict(prepared_attacker_data.features_test)

    # Generate ROC curves with predictions.
    fpr, tpr, thresholds = metrics.roc_curve(
        prepared_attacker_data.is_training_labels_test, predictions_test)

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    attack_results = [
        SingleAttackResult(slice_spec=SingleSliceSpec(),
                           attack_type=AttackType.LOGISTIC_REGRESSION,
                           roc_curve=roc_curve,
                           data_size=prepared_attacker_data.data_size)
    ]

    return AttackResults(single_attack_results=attack_results,
                         privacy_report_metadata=privacy_report_metadata)
Ejemplo n.º 10
0
def _run_threshold_entropy_attack(attack_input: AttackInputData):
    fpr, tpr, thresholds = metrics.roc_curve(
        np.concatenate((np.zeros(attack_input.get_train_size()),
                        np.ones(attack_input.get_test_size()))),
        np.concatenate((attack_input.get_entropy_train(),
                        attack_input.get_entropy_test())))

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    return SingleAttackResult(slice_spec=_get_slice_spec(attack_input),
                              attack_type=AttackType.THRESHOLD_ENTROPY_ATTACK,
                              roc_curve=roc_curve)
def _run_threshold_attack(attack_input: AttackInputData):
    ntrain, ntest = attack_input.get_train_size(), attack_input.get_test_size()
    fpr, tpr, thresholds = metrics.roc_curve(
        np.concatenate((np.zeros(ntrain), np.ones(ntest))),
        np.concatenate(
            (attack_input.get_loss_train(), attack_input.get_loss_test())))

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    return SingleAttackResult(
        slice_spec=_get_slice_spec(attack_input),
        data_size=DataSize(ntrain=ntrain, ntest=ntest),
        attack_type=AttackType.THRESHOLD_ATTACK,
        membership_scores_train=-attack_input.get_loss_train(),
        membership_scores_test=-attack_input.get_loss_test(),
        roc_curve=roc_curve)
def _run_trained_attack(attack_input: AttackInputData,
                        attack_type: AttackType,
                        balance_attacker_training: bool = True):
    """Classification attack done by ML models."""
    attacker = None

    if attack_type == AttackType.LOGISTIC_REGRESSION:
        attacker = models.LogisticRegressionAttacker()
    elif attack_type == AttackType.MULTI_LAYERED_PERCEPTRON:
        attacker = models.MultilayerPerceptronAttacker()
    elif attack_type == AttackType.RANDOM_FOREST:
        attacker = models.RandomForestAttacker()
    elif attack_type == AttackType.K_NEAREST_NEIGHBORS:
        attacker = models.KNearestNeighborsAttacker()
    else:
        raise NotImplementedError('Attack type %s not implemented yet.' %
                                  attack_type)

    prepared_attacker_data = models.create_attacker_data(
        attack_input, balance=balance_attacker_training)

    attacker.train_model(prepared_attacker_data.features_train,
                         prepared_attacker_data.is_training_labels_train)

    # Run the attacker on (permuted) test examples.
    predictions_test = attacker.predict(prepared_attacker_data.features_test)

    # Generate ROC curves with predictions.
    fpr, tpr, thresholds = metrics.roc_curve(
        prepared_attacker_data.is_training_labels_test, predictions_test)

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    # NOTE: In the current setup we can't obtain membership scores for all
    # samples, since some of them were used to train the attacker. This can be
    # fixed by training several attackers to ensure each sample was left out
    # in exactly one attacker (basically, this means performing cross-validation).
    # TODO(b/175870479): Implement membership scores for predicted attackers.

    return SingleAttackResult(slice_spec=_get_slice_spec(attack_input),
                              data_size=prepared_attacker_data.data_size,
                              attack_type=attack_type,
                              roc_curve=roc_curve)
Ejemplo n.º 13
0
def _run_threshold_attack(attack_input: AttackInputData):
    """Runs a threshold attack on loss."""
    ntrain, ntest = attack_input.get_train_size(), attack_input.get_test_size()
    loss_train = attack_input.get_loss_train()
    loss_test = attack_input.get_loss_test()
    if loss_train is None or loss_test is None:
        raise ValueError(
            'Not possible to run threshold attack without losses.')
    fpr, tpr, thresholds = metrics.roc_curve(
        np.concatenate((np.zeros(ntrain), np.ones(ntest))),
        np.concatenate((loss_train, loss_test)))

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    return SingleAttackResult(
        slice_spec=_get_slice_spec(attack_input),
        data_size=DataSize(ntrain=ntrain, ntest=ntest),
        attack_type=AttackType.THRESHOLD_ATTACK,
        membership_scores_train=-attack_input.get_loss_train(),
        membership_scores_test=-attack_input.get_loss_test(),
        roc_curve=roc_curve)