Exemple #1
0
    def __init__(self, *args, **kwargs):
        super(AttackResultsCollectionTest, self).__init__(*args, **kwargs)

        self.some_attack_result = SingleAttackResult(
            slice_spec=SingleSliceSpec(None),
            attack_type=AttackType.THRESHOLD_ATTACK,
            roc_curve=RocCurve(tpr=np.array([0.0, 0.5, 1.0]),
                               fpr=np.array([0.0, 0.5, 1.0]),
                               thresholds=np.array([0, 1, 2])))

        self.results_epoch_10 = AttackResults(
            single_attack_results=[self.some_attack_result],
            privacy_report_metadata=PrivacyReportMetadata(
                accuracy_train=0.4,
                accuracy_test=0.3,
                epoch_num=10,
                model_variant_label='default'))

        self.results_epoch_15 = AttackResults(
            single_attack_results=[self.some_attack_result],
            privacy_report_metadata=PrivacyReportMetadata(
                accuracy_train=0.5,
                accuracy_test=0.4,
                epoch_num=15,
                model_variant_label='default'))

        self.attack_results_no_metadata = AttackResults(
            single_attack_results=[self.some_attack_result])

        self.collection_with_metadata = AttackResultsCollection(
            [self.results_epoch_10, self.results_epoch_15])

        self.collection_no_metadata = AttackResultsCollection(
            [self.attack_results_no_metadata, self.attack_results_no_metadata])
Exemple #2
0
def create_seq2seq_attacker_data(
    attack_input_data: Seq2SeqAttackInputData,
    test_fraction: float = 0.25,
    balance: bool = True,
    privacy_report_metadata: PrivacyReportMetadata = PrivacyReportMetadata()
) -> AttackerData:
    """Prepares Seq2SeqAttackInputData to train ML attackers.

  Uses logits and losses to generate ranks and performs a random train-test
  split.

  Also computes metadata (loss, accuracy) for the model under attack
  and populates respective fields of PrivacyReportMetadata.

  Args:
    attack_input_data: Original Seq2SeqAttackInputData
    test_fraction: Fraction of the dataset to include in the test split.
    balance: Whether the training and test sets for the membership inference
      attacker should have a balanced (roughly equal) number of samples from the
      training and test sets used to develop the model under attack.
    privacy_report_metadata: the metadata of the model under attack.

  Returns:
    AttackerData.
  """
    attack_input_train, loss_train, accuracy_train = _get_attack_features_and_metadata(
        attack_input_data.logits_train, attack_input_data.labels_train)
    attack_input_test, loss_test, accuracy_test = _get_attack_features_and_metadata(
        attack_input_data.logits_test, attack_input_data.labels_test)

    if balance:
        min_size = min(len(attack_input_train), len(attack_input_test))
        attack_input_train = _sample_multidimensional_array(
            attack_input_train, min_size)
        attack_input_test = _sample_multidimensional_array(
            attack_input_test, min_size)

    features_all = np.concatenate((attack_input_train, attack_input_test))
    ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0]

    # Reshape for classifying one-dimensional features
    features_all = features_all.reshape(-1, 1)

    labels_all = np.concatenate(((np.zeros(ntrain)), (np.ones(ntest))))

    # Perform a train-test split
    features_train, features_test, \
    is_training_labels_train, is_training_labels_test = \
      model_selection.train_test_split(
          features_all, labels_all, test_size=test_fraction, stratify=labels_all)

    # Populate accuracy, loss fields in privacy report metadata
    privacy_report_metadata.loss_train = loss_train
    privacy_report_metadata.loss_test = loss_test
    privacy_report_metadata.accuracy_train = accuracy_train
    privacy_report_metadata.accuracy_test = accuracy_test

    return AttackerData(features_train, is_training_labels_train,
                        features_test, is_training_labels_test,
                        DataSize(ntrain=ntrain, ntest=ntest))
  def __init__(self, *args, **kwargs):
    super(PrivacyReportTest, self).__init__(*args, **kwargs)

    # Classifier that achieves an AUC of 0.5.
    self.imperfect_classifier_result = SingleAttackResult(
        slice_spec=SingleSliceSpec(None),
        attack_type=AttackType.THRESHOLD_ATTACK,
        roc_curve=RocCurve(
            tpr=np.array([0.0, 0.5, 1.0]),
            fpr=np.array([0.0, 0.5, 1.0]),
            thresholds=np.array([0, 1, 2])),
        data_size=DataSize(ntrain=1, ntest=1))

    # Classifier that achieves an AUC of 1.0.
    self.perfect_classifier_result = SingleAttackResult(
        slice_spec=SingleSliceSpec(None),
        attack_type=AttackType.THRESHOLD_ATTACK,
        roc_curve=RocCurve(
            tpr=np.array([0.0, 1.0, 1.0]),
            fpr=np.array([1.0, 1.0, 0.0]),
            thresholds=np.array([0, 1, 2])),
        data_size=DataSize(ntrain=1, ntest=1))

    self.results_epoch_0 = AttackResults(
        single_attack_results=[self.imperfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.4,
            accuracy_test=0.3,
            epoch_num=0,
            model_variant_label='default'))

    self.results_epoch_10 = AttackResults(
        single_attack_results=[self.imperfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.4,
            accuracy_test=0.3,
            epoch_num=10,
            model_variant_label='default'))

    self.results_epoch_15 = AttackResults(
        single_attack_results=[self.perfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.5,
            accuracy_test=0.4,
            epoch_num=15,
            model_variant_label='default'))

    self.results_epoch_15_model_2 = AttackResults(
        single_attack_results=[self.perfect_classifier_result],
        privacy_report_metadata=PrivacyReportMetadata(
            accuracy_train=0.6,
            accuracy_test=0.7,
            epoch_num=15,
            model_variant_label='model 2'))

    self.attack_results_no_metadata = AttackResults(
        single_attack_results=[self.perfect_classifier_result])
def _compute_missing_privacy_report_metadata(
        metadata: PrivacyReportMetadata,
        attack_input: AttackInputData) -> PrivacyReportMetadata:
    """Populates metadata fields if they are missing."""
    if metadata is None:
        metadata = PrivacyReportMetadata()
    if metadata.accuracy_train is None:
        metadata.accuracy_train = _get_accuracy(attack_input.logits_train,
                                                attack_input.labels_train)
    if metadata.accuracy_test is None:
        metadata.accuracy_test = _get_accuracy(attack_input.logits_test,
                                               attack_input.labels_test)
    if metadata.loss_train is None:
        metadata.loss_train = np.average(attack_input.get_loss_train())
    if metadata.loss_test is None:
        metadata.loss_test = np.average(attack_input.get_loss_test())
    return metadata
Exemple #5
0
def run_seq2seq_attack(
        attack_input: Seq2SeqAttackInputData,
        privacy_report_metadata: PrivacyReportMetadata = None,
        balance_attacker_training: bool = True) -> AttackResults:
    """Runs membership inference attacks on a seq2seq model.

  Args:
    attack_input: input data for running an attack
    privacy_report_metadata: the metadata of the model under attack.
    balance_attacker_training: Whether the training and test sets for the
      membership inference attacker should have a balanced (roughly equal)
      number of samples from the training and test sets used to develop the
      model under attack.

  Returns:
    the attack result.
  """
    attack_input.validate()

    # The attacker uses the average rank (a single number) of a seq2seq dataset
    # record to determine membership. So only Logistic Regression is supported,
    # as it makes the most sense for single-number features.
    attacker = models.LogisticRegressionAttacker()

    # Create attacker data and populate fields of privacy_report_metadata
    privacy_report_metadata = privacy_report_metadata or PrivacyReportMetadata(
    )
    prepared_attacker_data = create_seq2seq_attacker_data(
        attack_input_data=attack_input,
        balance=balance_attacker_training,
        privacy_report_metadata=privacy_report_metadata)

    attacker.train_model(prepared_attacker_data.features_train,
                         prepared_attacker_data.is_training_labels_train)

    # Run the attacker on (permuted) test examples.
    predictions_test = attacker.predict(prepared_attacker_data.features_test)

    # Generate ROC curves with predictions.
    fpr, tpr, thresholds = metrics.roc_curve(
        prepared_attacker_data.is_training_labels_test, predictions_test)

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    attack_results = [
        SingleAttackResult(slice_spec=SingleSliceSpec(),
                           attack_type=AttackType.LOGISTIC_REGRESSION,
                           roc_curve=roc_curve,
                           data_size=prepared_attacker_data.data_size)
    ]

    return AttackResults(single_attack_results=attack_results,
                         privacy_report_metadata=privacy_report_metadata)
Exemple #6
0
def main(unused_argv):
    epoch_results = AttackResultsCollection([])

    num_epochs = 2
    models = {
        "two layer model": two_layer_model,
        "three layer model": three_layer_model,
    }
    for model_name in models:
        # Incrementally train the model and store privacy metrics every num_epochs.
        for i in range(1, 6):
            models[model_name].fit(
                training_features,
                to_categorical(training_labels, num_clusters),
                validation_data=(test_features,
                                 to_categorical(test_labels, num_clusters)),
                batch_size=64,
                epochs=num_epochs,
                shuffle=True)

            training_pred = models[model_name].predict(training_features)
            test_pred = models[model_name].predict(test_features)

            # Add metadata to generate a privacy report.
            privacy_report_metadata = PrivacyReportMetadata(
                accuracy_train=metrics.accuracy_score(
                    training_labels, np.argmax(training_pred, axis=1)),
                accuracy_test=metrics.accuracy_score(
                    test_labels, np.argmax(test_pred, axis=1)),
                epoch_num=num_epochs * i,
                model_variant_label=model_name)

            attack_results = mia.run_attacks(
                AttackInputData(labels_train=training_labels,
                                labels_test=test_labels,
                                probs_train=training_pred,
                                probs_test=test_pred,
                                loss_train=crossentropy(
                                    training_labels, training_pred),
                                loss_test=crossentropy(test_labels,
                                                       test_pred)),
                SlicingSpec(entire_dataset=True, by_class=True),
                attack_types=(AttackType.THRESHOLD_ATTACK,
                              AttackType.LOGISTIC_REGRESSION),
                privacy_report_metadata=privacy_report_metadata)
            epoch_results.append(attack_results)

    # Generate privacy reports
    epoch_figure = privacy_report.plot_by_epochs(
        epoch_results, [PrivacyMetric.ATTACKER_ADVANTAGE, PrivacyMetric.AUC])
    epoch_figure.show()
    privacy_utility_figure = privacy_report.plot_privacy_vs_accuracy_single_model(
        epoch_results, [PrivacyMetric.ATTACKER_ADVANTAGE, PrivacyMetric.AUC])
    privacy_utility_figure.show()

    # Example of saving the results to the file and loading them back.
    with tempfile.TemporaryDirectory() as tmpdirname:
        filepath = os.path.join(tmpdirname, "results.pickle")
        attack_results.save(filepath)
        loaded_results = AttackResults.load(filepath)
        print(loaded_results.summary(by_slices=False))

    # Print attack metrics
    for attack_result in attack_results.single_attack_results:
        print("Slice: %s" % attack_result.slice_spec)
        print("Attack type: %s" % attack_result.attack_type)
        print("AUC: %.2f" % attack_result.roc_curve.get_auc())

        print("Attacker advantage: %.2f\n" %
              attack_result.roc_curve.get_attacker_advantage())

    max_auc_attacker = attack_results.get_result_with_max_auc()
    print("Attack type with max AUC: %s, AUC of %.2f" %
          (max_auc_attacker.attack_type, max_auc_attacker.roc_curve.get_auc()))

    max_advantage_attacker = attack_results.get_result_with_max_attacker_advantage(
    )
    print("Attack type with max advantage: %s, Attacker advantage of %.2f" %
          (max_advantage_attacker.attack_type,
           max_advantage_attacker.roc_curve.get_attacker_advantage()))

    # Print summary
    print("Summary without slices: \n")
    print(attack_results.summary(by_slices=False))

    print("Summary by slices: \n")
    print(attack_results.summary(by_slices=True))

    # Print pandas data frame
    print("Pandas frame: \n")
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    print(attack_results.calculate_pd_dataframe())

    # Example of ROC curve plotting.
    figure = plotting.plot_roc_curve(
        attack_results.single_attack_results[0].roc_curve)
    figure.show()
    plt.show()
Exemple #7
0
training_pred = model.predict(training_features)
test_pred = model.predict(test_features)


def crossentropy(true_labels, predictions):
    return keras.backend.eval(
        keras.losses.binary_crossentropy(
            keras.backend.variable(to_categorical(true_labels, num_clusters)),
            keras.backend.variable(predictions)))


# Add metadata to generate a privacy report.
privacy_report_metadata = PrivacyReportMetadata(
    accuracy_train=metrics.accuracy_score(training_labels,
                                          np.argmax(training_pred, axis=1)),
    accuracy_test=metrics.accuracy_score(test_labels,
                                         np.argmax(test_pred, axis=1)))

attack_results = mia.run_attacks(
    AttackInputData(labels_train=training_labels,
                    labels_test=test_labels,
                    logits_train=training_pred,
                    logits_test=test_pred,
                    loss_train=crossentropy(training_labels, training_pred),
                    loss_test=crossentropy(test_labels, test_pred)),
    SlicingSpec(entire_dataset=True, by_class=True),
    attack_types=(AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION),
    privacy_report_metadata=None)

# Example of saving the results to the file and loading them back.
Exemple #8
0
                               validation_data=(test_features,
                                                to_categorical(
                                                    test_labels,
                                                    num_clusters)),
                               batch_size=64,
                               epochs=num_epochs,
                               shuffle=True)

        training_pred = models[model_name].predict(training_features)
        test_pred = models[model_name].predict(test_features)

        # Add metadata to generate a privacy report.
        privacy_report_metadata = PrivacyReportMetadata(
            accuracy_train=metrics.accuracy_score(
                training_labels, np.argmax(training_pred, axis=1)),
            accuracy_test=metrics.accuracy_score(test_labels,
                                                 np.argmax(test_pred, axis=1)),
            epoch_num=num_epochs * i,
            model_variant_label=model_name)

        attack_results = mia.run_attacks(
            AttackInputData(labels_train=training_labels,
                            labels_test=test_labels,
                            probs_train=training_pred,
                            probs_test=test_pred,
                            loss_train=crossentropy(training_labels,
                                                    training_pred),
                            loss_test=crossentropy(test_labels, test_pred)),
            SlicingSpec(entire_dataset=True, by_class=True),
            attack_types=(AttackType.THRESHOLD_ATTACK,
                          AttackType.LOGISTIC_REGRESSION),