def test_dp_sum_structure_complex(self):
        query = tensorflow_privacy.GaussianSumQuery(5.0, 0.0)

        def datapoint(a, b, c):
            return collections.OrderedDict(a=(a, ), bc=([b], (c, )))

        data = [
            datapoint(1.0, 2.0, 1.0),
            datapoint(2.0, 3.0, 1.0),
            datapoint(6.0, 8.0, 0.0),  # Clipped to 3.0, 4.0, 0.0
        ]

        value_type = type_conversions.type_from_tensors(data[0])
        dp_aggregate_process = differential_privacy.build_dp_aggregate_process(
            value_type, query)

        global_state = dp_aggregate_process.initialize()

        output = dp_aggregate_process.next(global_state, data, [1.0, 1.0, 1.0])

        self.assertEqual(output.state.l2_norm_clip, 5.0)
        self.assertEqual(output.state.stddev, 0.0)

        self.assertEqual(output.result['a'][0], 6.0)
        self.assertEqual(output.result['bc'][0][0], 9.0)
        self.assertEqual(output.result['bc'][1][0], 2.0)
Beispiel #2
0
    def gaussian_fixed(cls, noise_multiplier: float, clients_per_round: float,
                       clip: float) -> factory.UnweightedAggregationFactory:
        """`DifferentiallyPrivateFactory` with fixed clipping and Gaussian noise.

    Performs fixed clipping and addition of Gaussian noise for differentially
    private learning. For details of the DP algorithm see McMahan et. al (2017)
    https://arxiv.org/abs/1710.06963.

    Args:
      noise_multiplier: A float specifying the noise multiplier for the Gaussian
        mechanism for model updates. A value of 1.0 or higher may be needed for
        strong privacy. See above mentioned paper to compute (epsilon, delta)
        privacy guarantee.
      clients_per_round: A float specifying the expected number of clients per
        round. Must be positive.
      clip: The value of the clipping norm.

    Returns:
      A `DifferentiallyPrivateFactory` with fixed clipping and Gaussian noise.
    """

        if isinstance(clients_per_round, int):
            clients_per_round = float(clients_per_round)

        _check_float_nonnegative(noise_multiplier, 'noise_multiplier')
        _check_float_positive(clients_per_round, 'clients_per_round')
        _check_float_positive(clip, 'clip')

        query = tfp.NormalizedQuery(tfp.GaussianSumQuery(l2_norm_clip=clip,
                                                         stddev=clip *
                                                         noise_multiplier),
                                    denominator=clients_per_round)

        return cls(query)
Beispiel #3
0
  def test_process_type_signature(self, value_template):
    query = tensorflow_privacy.GaussianSumQuery(4.0, 0.0)
    value_type = type_conversions.type_from_tensors(value_template)
    dp_aggregate_process = differential_privacy.build_dp_aggregate_process(
        value_type, query)

    server_state_type = computation_types.FederatedType(
        computation_types.NamedTupleType([('l2_norm_clip', tf.float32),
                                          ('stddev', tf.float32)]),
        placements.SERVER)
    self.assertEqual(
        dp_aggregate_process.initialize.type_signature,
        computation_types.FunctionType(
            parameter=None, result=server_state_type))

    client_value_type = computation_types.FederatedType(value_type,
                                                        placements.CLIENTS)
    client_value_weight_type = computation_types.FederatedType(
        tf.float32, placements.CLIENTS)
    server_result_type = computation_types.FederatedType(
        value_type, placements.SERVER)
    server_metrics_type = computation_types.FederatedType((), placements.SERVER)
    self.assertEqual(
        dp_aggregate_process.next.type_signature,
        computation_types.FunctionType(
            parameter=computation_types.NamedTupleType([
                (None, server_state_type), (None, client_value_type),
                (None, client_value_weight_type)
            ]),
            result=computation_types.NamedTupleType([
                ('state', server_state_type), ('result', server_result_type),
                ('measurements', server_metrics_type)
            ])))
Beispiel #4
0
  def test_process_type_signature(self, value_template):
    query = tensorflow_privacy.GaussianSumQuery(4.0, 0.0)
    value_type = type_conversions.type_from_tensors(value_template)
    dp_aggregate_process = differential_privacy.build_dp_aggregate_process(
        value_type, query)

    global_state = query.initial_global_state()
    server_state_type = computation_types.FederatedType(
        type_conversions.type_from_tensors(global_state), placements.SERVER)
    self.assertEqual(
        dp_aggregate_process.initialize.type_signature,
        computation_types.FunctionType(
            parameter=None, result=server_state_type))

    metrics_type = type_conversions.type_from_tensors(
        query.derive_metrics(global_state))

    client_value_type = computation_types.FederatedType(value_type,
                                                        placements.CLIENTS)
    client_value_weight_type = computation_types.FederatedType(
        tf.float32, placements.CLIENTS)
    server_result_type = computation_types.FederatedType(
        value_type, placements.SERVER)
    server_metrics_type = computation_types.FederatedType(
        metrics_type, placements.SERVER)
    self.assertEqual(
        dp_aggregate_process.next.type_signature,
        computation_types.FunctionType(
            parameter=(server_state_type, client_value_type,
                       client_value_weight_type),
            result=collections.OrderedDict(
                state=server_state_type,
                result=server_result_type,
                measurements=server_metrics_type)))
  def test_execution_with_custom_dp_query(self):
    client_data = create_emnist_client_data()
    train_data = [client_data(), client_data()]

    def loss_fn():
      return tf.keras.losses.SparseCategoricalCrossentropy()

    def metrics_fn():
      return [
          NumExamplesCounter(),
          NumBatchesCounter(),
          tf.keras.metrics.SparseCategoricalAccuracy()
      ]

    # No values should be changed, but working with inf directly zeroes out all
    # updates. Preferring very large value, but one that can be handled in
    # multiplication/division
    gaussian_sum_query = tfp.GaussianSumQuery(l2_norm_clip=1e10, stddev=0)
    dp_sum_factory = tff.aggregators.DifferentiallyPrivateFactory(
        query=gaussian_sum_query,
        record_aggregation_factory=tff.aggregators.SumFactory())
    dp_mean_factory = _DPMean(dp_sum_factory)

    # Disable reconstruction via 0 learning rate to ensure post-recon loss
    # matches exact expectations round 0 and decreases by the next round.
    trainer = training_process.build_federated_reconstruction_process(
        MnistModel,
        loss_fn=loss_fn,
        metrics_fn=metrics_fn,
        server_optimizer_fn=functools.partial(tf.keras.optimizers.SGD, 0.01),
        client_optimizer_fn=functools.partial(tf.keras.optimizers.SGD, 0.001),
        reconstruction_optimizer_fn=functools.partial(tf.keras.optimizers.SGD,
                                                      0.0),
        aggregation_factory=dp_mean_factory,
    )
    state = trainer.initialize()

    outputs = []
    states = []
    for _ in range(2):
      state, output = trainer.next(state, train_data)
      outputs.append(output)
      states.append(state)

    # All weights and biases are initialized to 0, so initial logits are all 0
    # and softmax probabilities are uniform over 10 classes. So negative log
    # likelihood is -ln(1/10). This is on expectation, so increase tolerance.
    self.assertAllClose(outputs[0]['loss'], tf.math.log(10.0), rtol=1e-4)
    self.assertLess(outputs[1]['loss'], outputs[0]['loss'])
    self.assertNotAllClose(states[0].model.trainable, states[1].model.trainable)

    # Expect 6 reconstruction examples, 6 training examples. Only training
    # included in metrics.
    self.assertEqual(outputs[0]['num_examples_total'], 6.0)
    self.assertEqual(outputs[1]['num_examples_total'], 6.0)

    # Expect 4 reconstruction batches and 4 training batches. Only training
    # included in metrics.
    self.assertEqual(outputs[0]['num_batches_total'], 4.0)
    self.assertEqual(outputs[1]['num_batches_total'], 4.0)
Beispiel #6
0
  def test_dp_sum_structure_list(self):
    query = tensorflow_privacy.GaussianSumQuery(5.0, 0.0)

    def _value_type_fn(value):
      del value
      return [
          computation_types.TensorType(tf.float32),
          computation_types.TensorType(tf.float32),
      ]

    dp_aggregate_fn, _ = differential_privacy.build_dp_aggregate(
        query, value_type_fn=_value_type_fn)

    def datapoint(a, b):
      return [tf.Variable(a, name='a'), tf.Variable(b, name='b')]

    data = [
        datapoint(1.0, 2.0),
        datapoint(2.0, 3.0),
        datapoint(6.0, 8.0),  # Clipped to 3.0, 4.0
    ]

    initialize, aggregate = wrap_aggregate_fn(dp_aggregate_fn, data[0])
    global_state = initialize()

    global_state, result = aggregate(global_state, data)

    self.assertEqual(global_state.l2_norm_clip, 5.0)
    self.assertEqual(global_state.stddev, 0.0)

    result = list(result)
    self.assertEqual(result[0], 6.0)
    self.assertEqual(result[1], 9.0)
  def test_bad_query(self):
    non_quantile_estimator_query = tfp.GaussianSumQuery(
        l2_norm_clip=1.0, stddev=1.0)

    with self.assertRaises(TypeError):
      quantile_estimation.PrivateQuantileEstimationProcess(
          non_quantile_estimator_query)
Beispiel #8
0
  def test_dp_global_state_type(self):
    query = tensorflow_privacy.GaussianSumQuery(5.0, 0.0)

    _, dp_global_state_type = differential_privacy.build_dp_aggregate(query)

    self.assertEqual(dp_global_state_type.__class__.__name__,
                     'NamedTupleTypeWithPyContainerType')
  def test_iterative_process_fails_with_dp_agg_and_none_client_weighting(self):

    def loss_fn():
      return tf.keras.losses.SparseCategoricalCrossentropy()

    def metrics_fn():
      return [
          NumExamplesCounter(),
          NumBatchesCounter(),
          tf.keras.metrics.SparseCategoricalAccuracy()
      ]

    # No values should be changed, but working with inf directly zeroes out all
    # updates. Preferring very large value, but one that can be handled in
    # multiplication/division
    gaussian_sum_query = tfp.GaussianSumQuery(l2_norm_clip=1e10, stddev=0)
    dp_sum_factory = differential_privacy.DifferentiallyPrivateFactory(
        query=gaussian_sum_query,
        record_aggregation_factory=sum_factory.SumFactory())
    dp_mean_factory = _DPMean(dp_sum_factory)

    with self.assertRaisesRegex(ValueError, 'unweighted aggregator'):
      training_process.build_training_process(
          MnistModel,
          loss_fn=loss_fn,
          metrics_fn=metrics_fn,
          server_optimizer_fn=_get_keras_optimizer_fn(0.01),
          client_optimizer_fn=_get_keras_optimizer_fn(0.001),
          reconstruction_optimizer_fn=_get_keras_optimizer_fn(0.0),
          aggregation_factory=dp_mean_factory,
          client_weighting=None,
          dataset_split_fn=reconstruction_utils.simple_dataset_split_fn)
Beispiel #10
0
  def test_dp_sum_structure_nested_odict(self):
    query = tensorflow_privacy.GaussianSumQuery(5.0, 0.0)

    def datapoint(a, b, c):
      return collections.OrderedDict([('a', (a,)),
                                      ('bc',
                                       collections.OrderedDict([('b', [b]),
                                                                ('c', (c,))]))])

    data = [
        datapoint(1.0, 2.0, 1.0),
        datapoint(2.0, 3.0, 1.0),
        datapoint(6.0, 8.0, 0.0),  # Clipped to 3.0, 4.0, 0.0
    ]

    value_type = type_conversions.type_from_tensors(data[0])
    dp_aggregate_process = differential_privacy.build_dp_aggregate_process(
        value_type, query)

    global_state = dp_aggregate_process.initialize()

    output = dp_aggregate_process.next(global_state, data, [1.0, 1.0, 1.0])

    self.assertEqual(output['state']['l2_norm_clip'], 5.0)
    self.assertEqual(output['state']['stddev'], 0.0)

    self.assertEqual(output['result']['a'][0], 6.0)
    self.assertEqual(output['result']['bc']['b'][0], 9.0)
    self.assertEqual(output['result']['bc']['c'][0], 2.0)
    def test_dp_sum_structure_list(self):
        query = tensorflow_privacy.GaussianSumQuery(5.0, 0.0)

        def datapoint(a, b):
            return [tf.Variable(a, name='a'), tf.Variable(b, name='b')]

        data = [
            datapoint(1.0, 2.0),
            datapoint(2.0, 3.0),
            datapoint(6.0, 8.0),  # Clipped to 3.0, 4.0
        ]

        value_type = type_conversions.type_from_tensors(data[0])

        dp_aggregate_process = differential_privacy.build_dp_aggregate_process(
            value_type, query)

        global_state = dp_aggregate_process.initialize()

        output = dp_aggregate_process.next(global_state, data, [1.0, 1.0, 1.0])

        self.assertEqual(output.state.l2_norm_clip, 5.0)
        self.assertEqual(output.state.stddev, 0.0)

        result = list(output.result)
        self.assertEqual(result[0], 6.0)
        self.assertEqual(result[1], 9.0)
Beispiel #12
0
  def test_dp_global_state_type(self):
    query = tensorflow_privacy.GaussianSumQuery(5.0, 0.0)

    _, dp_global_state_type = differential_privacy.build_dp_aggregate(query)

    self.assertIsInstance(dp_global_state_type,
                          computation_types.StructWithPythonType)
Beispiel #13
0
  def test_dp_sum(self):
    query = tensorflow_privacy.GaussianSumQuery(4.0, 0.0)

    dp_aggregate_fn, _ = differential_privacy.build_dp_aggregate(query)

    initialize, aggregate = wrap_aggregate_fn(dp_aggregate_fn, 0.0)
    global_state = initialize()

    global_state, result = aggregate(global_state, [1.0, 3.0, 5.0])

    self.assertEqual(global_state.l2_norm_clip, 4.0)
    self.assertEqual(global_state.stddev, 0.0)
    self.assertEqual(result, 8.0)
    def test_dp_sum(self):
        query = tensorflow_privacy.GaussianSumQuery(4.0, 0.0)

        value_type = type_conversions.type_from_tensors(0.0)
        dp_aggregate_process = differential_privacy.build_dp_aggregate_process(
            value_type, query)

        global_state = dp_aggregate_process.initialize()

        output = dp_aggregate_process.next(global_state, [1.0, 3.0, 5.0],
                                           [1.0, 1.0, 1.0])

        self.assertEqual(output.state.l2_norm_clip, 4.0)
        self.assertEqual(output.state.stddev, 0.0)
        self.assertEqual(output.result, 8.0)
    def test_iterative_process_builds_with_dp_agg_and_client_weight_fn(self):
        def loss_fn():
            return tf.keras.losses.SparseCategoricalCrossentropy()

        def metrics_fn():
            return [
                NumExamplesCounter(),
                NumBatchesCounter(),
                tf.keras.metrics.SparseCategoricalAccuracy()
            ]

        # No values should be changed, but working with inf directly zeroes out all
        # updates. Preferring very large value, but one that can be handled in
        # multiplication/division
        gaussian_sum_query = tfp.GaussianSumQuery(l2_norm_clip=1e10, stddev=0)
        dp_sum_factory = tff.aggregators.DifferentiallyPrivateFactory(
            query=gaussian_sum_query,
            record_aggregation_factory=tff.aggregators.SumFactory())
        dp_mean_factory = _DPMean(dp_sum_factory)

        def client_weight_fn(local_outputs):
            del local_outputs  # Unused
            return 1.0

        # Ensure this builds, as some builders raise if an unweighted aggregation is
        # specified with a client_weight_fn.
        trainer = training_process.build_federated_reconstruction_process(
            MnistModel,
            loss_fn=loss_fn,
            metrics_fn=metrics_fn,
            server_optimizer_fn=functools.partial(tf.keras.optimizers.SGD,
                                                  0.01),
            client_optimizer_fn=functools.partial(tf.keras.optimizers.SGD,
                                                  0.001),
            reconstruction_optimizer_fn=functools.partial(
                tf.keras.optimizers.SGD, 0.0),
            aggregation_factory=dp_mean_factory,
            client_weight_fn=client_weight_fn,
        )
        self.assertIsInstance(trainer, tff.templates.IterativeProcess)
Beispiel #16
0
def get_dp_query(mechanism, l2_norm_bound, noise_scale):
  """Factory for DPQuery instances.

  Args:
    mechanism: The mechanism name string.
    l2_norm_bound: The L2 norm bound to be checked by the DPQueries. Note that
      for discrete queries, these are the bounds after scaling.
    noise_scale: The noise scale (stddev) for the mechanism. Note that for
      central queries, this is the central stddev; for distributed queries, this
      is the local stddev for each client.

  Returns:
    A DPQuery object.
  """
  mechanism = mechanism.lower()
  if mechanism == 'gauss':
    return tfp.GaussianSumQuery(l2_norm_clip=l2_norm_bound, stddev=noise_scale)
  elif mechanism == 'distributed_dgauss':
    return distributed_discrete_gaussian_query.DistributedDiscreteGaussianSumQuery(
        l2_norm_bound=l2_norm_bound, local_scale=noise_scale)
  else:
    raise ValueError(f'Not yet implemented: {mechanism}')
Beispiel #17
0
  def test_dp_sum_structure_odict(self):
    query = tensorflow_privacy.GaussianSumQuery(5.0, 0.0)

    dp_aggregate_fn, _ = differential_privacy.build_dp_aggregate(query)

    def datapoint(a, b):
      return collections.OrderedDict([('a', (a,)), ('b', [b])])

    data = [
        datapoint(1.0, 2.0),
        datapoint(2.0, 3.0),
        datapoint(6.0, 8.0),  # Clipped to 3.0, 4.0
    ]

    initialize, aggregate = wrap_aggregate_fn(dp_aggregate_fn, data[0])
    global_state = initialize()

    global_state, result = aggregate(global_state, data)

    self.assertEqual(global_state.l2_norm_clip, 5.0)
    self.assertEqual(global_state.stddev, 0.0)

    self.assertEqual(result['a'][0], 6.0)
    self.assertEqual(result['b'][0], 9.0)
Beispiel #18
0
import numpy as np
import tensorflow as tf
import tensorflow_privacy as tfp

from tensorflow_federated.python.aggregators import differential_privacy
from tensorflow_federated.python.aggregators import factory
from tensorflow_federated.python.aggregators import test_utils
from tensorflow_federated.python.core.api import computation_types
from tensorflow_federated.python.core.api import placements
from tensorflow_federated.python.core.api import test_case
from tensorflow_federated.python.core.backends.native import execution_contexts
from tensorflow_federated.python.core.impl.types import type_conversions
from tensorflow_federated.python.core.templates import aggregation_process
from tensorflow_federated.python.core.templates import measured_process

_test_dp_query = tfp.GaussianSumQuery(l2_norm_clip=1.0, stddev=0.0)

_test_struct_type = [(tf.float32, (2,)), tf.float32]
_test_inner_agg_factory = test_utils.SumPlusOneFactory()


class DPFactoryComputationTest(test_case.TestCase, parameterized.TestCase):

  @parameterized.named_parameters(
      ('float_simple', tf.float32, None),
      ('struct_simple', _test_struct_type, None),
      ('float_inner', tf.float32, _test_inner_agg_factory),
      ('struct_inner', _test_struct_type, _test_inner_agg_factory))
  def test_type_properties(self, value_type, inner_agg_factory):
    factory_ = differential_privacy.DifferentiallyPrivateFactory(
        _test_dp_query, inner_agg_factory)