예제 #1
0
def run_trained_attack(attack_input: AttackInputData, attack_type: AttackType):
    """Classification attack done by ML models."""
    attacker = None

    if attack_type == AttackType.LOGISTIC_REGRESSION:
        attacker = models.LogisticRegressionAttacker()
    elif attack_type == AttackType.MULTI_LAYERED_PERCEPTRON:
        attacker = models.MultilayerPerceptronAttacker()
    elif attack_type == AttackType.RANDOM_FOREST:
        attacker = models.RandomForestAttacker()
    elif attack_type == AttackType.K_NEAREST_NEIGHBORS:
        attacker = models.KNearestNeighborsAttacker()
    else:
        raise NotImplementedError('Attack type %s not implemented yet.' %
                                  attack_type)

    prepared_attacker_data = models.create_attacker_data(attack_input)

    attacker.train_model(prepared_attacker_data.features_train,
                         prepared_attacker_data.is_training_labels_train)

    # Run the attacker on (permuted) test examples.
    predictions_test = attacker.predict(prepared_attacker_data.features_test)

    # Generate ROC curves with predictions.
    fpr, tpr, thresholds = metrics.roc_curve(
        prepared_attacker_data.is_training_labels_test, predictions_test)

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    return SingleAttackResult(slice_spec=_get_slice_spec(attack_input),
                              attack_type=attack_type,
                              roc_curve=roc_curve)
예제 #2
0
    def test_create_attacker_data_loss_and_logits(self):
        attack_input = AttackInputData(logits_train=np.array([[1, 2], [5, 6]]),
                                       logits_test=np.array([[10, 11],
                                                             [14, 15]]),
                                       loss_train=np.array([3, 7]),
                                       loss_test=np.array([12, 16]))
        attacker_data = models.create_attacker_data(attack_input, 0.25)
        self.assertLen(attacker_data.features_test, 1)
        self.assertLen(attacker_data.features_train, 3)

        for i, feature in enumerate(attacker_data.features_train):
            self.assertLen(feature,
                           3)  # each feature has two logits and one loss
            expected = feature[:2] not in attack_input.logits_train
            self.assertEqual(attacker_data.is_training_labels_train[i],
                             expected)
def _run_trained_attack(attack_input: AttackInputData,
                        attack_type: AttackType,
                        balance_attacker_training: bool = True):
    """Classification attack done by ML models."""
    attacker = None

    if attack_type == AttackType.LOGISTIC_REGRESSION:
        attacker = models.LogisticRegressionAttacker()
    elif attack_type == AttackType.MULTI_LAYERED_PERCEPTRON:
        attacker = models.MultilayerPerceptronAttacker()
    elif attack_type == AttackType.RANDOM_FOREST:
        attacker = models.RandomForestAttacker()
    elif attack_type == AttackType.K_NEAREST_NEIGHBORS:
        attacker = models.KNearestNeighborsAttacker()
    else:
        raise NotImplementedError('Attack type %s not implemented yet.' %
                                  attack_type)

    prepared_attacker_data = models.create_attacker_data(
        attack_input, balance=balance_attacker_training)

    attacker.train_model(prepared_attacker_data.features_train,
                         prepared_attacker_data.is_training_labels_train)

    # Run the attacker on (permuted) test examples.
    predictions_test = attacker.predict(prepared_attacker_data.features_test)

    # Generate ROC curves with predictions.
    fpr, tpr, thresholds = metrics.roc_curve(
        prepared_attacker_data.is_training_labels_test, predictions_test)

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    # NOTE: In the current setup we can't obtain membership scores for all
    # samples, since some of them were used to train the attacker. This can be
    # fixed by training several attackers to ensure each sample was left out
    # in exactly one attacker (basically, this means performing cross-validation).
    # TODO(b/175870479): Implement membership scores for predicted attackers.

    return SingleAttackResult(slice_spec=_get_slice_spec(attack_input),
                              data_size=prepared_attacker_data.data_size,
                              attack_type=attack_type,
                              roc_curve=roc_curve)
예제 #4
0
 def test_create_attacker_data_loss_only(self):
     attack_input = AttackInputData(loss_train=np.array([1]),
                                    loss_test=np.array([2]))
     attacker_data = models.create_attacker_data(attack_input, 0.5)
     self.assertLen(attacker_data.features_test, 1)
     self.assertLen(attacker_data.features_train, 1)