Example #1
0
def build_dp_query(clip,
                   noise_multiplier,
                   expected_total_weight,
                   adaptive_clip_learning_rate=0,
                   target_unclipped_quantile=None,
                   clipped_count_budget_allocation=None,
                   expected_num_clients=None,
                   per_vector_clipping=False,
                   geometric_clip_update=True,
                   model=None):
    """Makes a `DPQuery` to estimate vector averages with differential privacy.

  Supports many of the types of query available in tensorflow_privacy, including
  nested ("per-vector") queries as described in
  https://arxiv.org/pdf/1812.06210.pdf, and quantile-based adaptive clipping as
  described in https://arxiv.org/abs/1905.03871.

  Args:
    clip: The query's L2 norm bound.
    noise_multiplier: The ratio of the (effective) noise stddev to the clip.
    expected_total_weight: The expected total weight of all clients, used as the
      denominator for the average computation.
    adaptive_clip_learning_rate: Learning rate for quantile-based adaptive
      clipping. If 0, fixed clipping is used. If `per_vector_clipping=True` and
      `geometric_clip_update=False`, the learning rate of each vector is
      proportional to that vector's initial clip.
    target_unclipped_quantile: Target unclipped quantile for adaptive clipping.
    clipped_count_budget_allocation: The fraction of privacy budget to use for
      estimating clipped counts.
    expected_num_clients: The expected number of clients for estimating clipped
      fractions.
    per_vector_clipping: If True, clip each weight tensor independently.
      Otherwise, global clipping is used. The clipping norm for each vector (or
      the initial clipping norm, in the case of adaptive clipping) is
      proportional to the sqrt of the vector dimensionality such that the root
      sum squared of the individual clips equals `clip`.
    geometric_clip_update: If True, use geometric updating of the clip.
    model: A `tff.learning.Model` to determine the structure of model weights.
      Required only if per_vector_clipping is True.

  Returns:
    A `DPQuery` suitable for use in a call to `build_dp_aggregate` to perform
      Federated Averaging with differential privacy.
  """
    py_typecheck.check_type(clip, numbers.Number, 'clip')
    py_typecheck.check_type(noise_multiplier, numbers.Number,
                            'noise_multiplier')
    py_typecheck.check_type(expected_total_weight, numbers.Number,
                            'expected_total_weight')

    if per_vector_clipping:
        # Note we need to keep the structure of vectors (not just the num_vectors)
        # to create the subqueries below, when per_vector_clipping is True.
        vectors = model.weights.trainable
        num_vectors = len(tf.nest.flatten(vectors))
    else:
        num_vectors = 1

    if adaptive_clip_learning_rate:
        py_typecheck.check_type(adaptive_clip_learning_rate, numbers.Number,
                                'adaptive_clip_learning_rate')
        py_typecheck.check_type(target_unclipped_quantile, numbers.Number,
                                'target_unclipped_quantile')
        py_typecheck.check_type(clipped_count_budget_allocation,
                                numbers.Number,
                                'clipped_count_budget_allocation')
        py_typecheck.check_type(expected_num_clients, numbers.Number,
                                'expected_num_clients')
        p = clipped_count_budget_allocation
        clipped_count_stddev = 0.5 * noise_multiplier * (p /
                                                         num_vectors)**(-0.5)
        noise_multiplier = noise_multiplier * ((1 - p) / num_vectors)**(-0.5)

    def make_single_vector_query(vector_clip):
        """Makes a `DPQuery` for a single vector."""
        if not adaptive_clip_learning_rate:
            return tensorflow_privacy.GaussianAverageQuery(
                l2_norm_clip=vector_clip,
                sum_stddev=vector_clip * noise_multiplier * num_vectors**0.5,
                denominator=expected_total_weight)
        else:
            # Without geometric updating, the update is c = c - lr * loss, so for
            # multiple vectors we set the learning rate to be on the same scale as the
            # initial clip. That way big vectors get big updates, small vectors
            # small updates. With geometric updating, the update is
            # c = c * exp(-lr * loss) so the learning rate should be independent of
            # the initial clip.
            if geometric_clip_update:
                learning_rate = adaptive_clip_learning_rate
            else:
                learning_rate = adaptive_clip_learning_rate * vector_clip / clip
            return tensorflow_privacy.QuantileAdaptiveClipAverageQuery(
                initial_l2_norm_clip=vector_clip,
                noise_multiplier=noise_multiplier,
                target_unclipped_quantile=target_unclipped_quantile,
                learning_rate=learning_rate,
                clipped_count_stddev=clipped_count_stddev,
                expected_num_records=expected_num_clients,
                geometric_update=geometric_clip_update,
                denominator=expected_total_weight)

    if per_vector_clipping:

        def dim(v):
            return math.exp(sum([math.log(d.value) for d in v.shape.dims]))

        dims = tf.nest.map_structure(dim, vectors)
        total_dim = sum(tf.nest.flatten(dims))
        clips = tf.nest.map_structure(
            lambda dim: clip * np.sqrt(dim / total_dim), dims)
        subqueries = tf.nest.map_structure(make_single_vector_query, clips)
        return tensorflow_privacy.NestedQuery(subqueries)
    else:
        return make_single_vector_query(clip)
Example #2
0
def build_dp_query(clip,
                   noise_multiplier,
                   expected_total_weight,
                   adaptive_clip_learning_rate=0,
                   target_unclipped_quantile=None,
                   clipped_count_budget_allocation=None,
                   expected_clients_per_round=None,
                   per_vector_clipping=False,
                   geometric_clip_update=True,
                   model=None):
    """Makes a `DPQuery` to estimate vector averages with differential privacy.

  Supports many of the types of query available in tensorflow_privacy, including
  nested ("per-vector") queries as described in
  https://arxiv.org/pdf/1812.06210.pdf, and quantile-based adaptive clipping as
  described in https://arxiv.org/abs/1905.03871.

  Args:
    clip: The query's L2 norm bound.
    noise_multiplier: The ratio of the (effective) noise stddev to the clip.
    expected_total_weight: The expected total weight of all clients, used as the
      denominator for the average computation.
    adaptive_clip_learning_rate: Learning rate for quantile-based adaptive
      clipping. If 0, fixed clipping is used. If per-vector clipping is enabled,
      (but not geometric_clip_update) the learning rate of each vector is
      proportional to that vector's initial clip.
    target_unclipped_quantile: Target unclipped quantile for adaptive clipping.
    clipped_count_budget_allocation: The fraction of privacy budget to use for
      estimating clipped counts.
    expected_clients_per_round: The expected number of clients for estimating
      clipped fractions.
    per_vector_clipping: Note that this option is not recommended because it
      has been shown experimentally and theoretically to be inferior from a
      privacy/utility standpoint. It will be removed in a future release. If
      True, clip each weight tensor independently. Otherwise, global clipping is
      used. The clipping norm for each vector (or the initial clipping norm, in
      the case of adaptive clipping) is proportional to the sqrt of the vector
      dimensionality such that the root sum squared of the individual clips
      equals `clip`.
    geometric_clip_update: If True, use geometric updating of the clip.
    model: A `tff.learning.Model` to determine the structure of model weights.
      Required only if per_vector_clipping is True.

  Returns:
    A `DPQuery` suitable for use in a call to `build_dp_aggregate` and
    `build_dp_aggregate_process` to perform Federated Averaging with
    differential privacy.
  """
    py_typecheck.check_type(clip, numbers.Number, 'clip')
    py_typecheck.check_type(noise_multiplier, numbers.Number,
                            'noise_multiplier')
    py_typecheck.check_type(expected_total_weight, numbers.Number,
                            'expected_total_weight')

    if per_vector_clipping:
        warnings.warn(
            'Per-vector clipping is not recommended because it has been shown '
            'experimentally and theoretically to be inferior from a '
            'privacy/utility standpoint. It will be removed in a future release.'
        )

        # Note we need to keep the structure of vectors (not just the num_vectors)
        # to create the subqueries below, when per_vector_clipping is True.
        vectors = model.weights.trainable
        num_vectors = len(tf.nest.flatten(vectors))
    else:
        num_vectors = 1

    if adaptive_clip_learning_rate:
        py_typecheck.check_type(adaptive_clip_learning_rate, numbers.Number,
                                'adaptive_clip_learning_rate')
        py_typecheck.check_type(target_unclipped_quantile, numbers.Number,
                                'target_unclipped_quantile')
        py_typecheck.check_type(clipped_count_budget_allocation,
                                numbers.Number,
                                'clipped_count_budget_allocation')
        py_typecheck.check_type(expected_clients_per_round, numbers.Number,
                                'expected_clients_per_round')
        p = clipped_count_budget_allocation
        nm = noise_multiplier
        vectors_noise_multiplier = nm * ((1 - p) / num_vectors)**(-0.5)
        clipped_count_noise_multiplier = nm * (p / num_vectors)**(-0.5)

        # Clipped count sensitivity is 0.5.
        clipped_count_stddev = 0.5 * clipped_count_noise_multiplier
    else:
        if target_unclipped_quantile is not None:
            warnings.warn(
                'target_unclipped_quantile is specified but '
                'adaptive_clip_learning_rate is zero. No adaptive clipping will be '
                'performed. Use adaptive_clip_learning_rate > 0 if you want '
                'adaptive clipping.')
        elif clipped_count_budget_allocation is not None:
            warnings.warn(
                'clipped_count_budget_allocation is specified but '
                'adaptive_clip_learning_rate is zero. No adaptive clipping will be '
                'performed. Use adaptive_clip_learning_rate > 0 if you want '
                'adaptive clipping.')

    def make_single_vector_query(vector_clip):
        """Makes a `DPQuery` for a single vector."""
        if not adaptive_clip_learning_rate:
            return tensorflow_privacy.GaussianAverageQuery(
                l2_norm_clip=vector_clip,
                sum_stddev=vector_clip * noise_multiplier * num_vectors**0.5,
                denominator=expected_total_weight)
        else:
            # Without geometric updating, the update is c = c - lr * loss, so for
            # multiple vectors we set the learning rate to be on the same scale as the
            # initial clip. That way big vectors get big updates, small vectors
            # small updates. With geometric updating, the update is
            # c = c * exp(-lr * loss) so the learning rate should be independent of
            # the initial clip.
            if geometric_clip_update:
                learning_rate = adaptive_clip_learning_rate
            else:
                learning_rate = adaptive_clip_learning_rate * vector_clip / clip
            return tensorflow_privacy.QuantileAdaptiveClipAverageQuery(
                initial_l2_norm_clip=vector_clip,
                noise_multiplier=vectors_noise_multiplier,
                target_unclipped_quantile=target_unclipped_quantile,
                learning_rate=learning_rate,
                clipped_count_stddev=clipped_count_stddev,
                expected_num_records=expected_clients_per_round,
                geometric_update=geometric_clip_update,
                denominator=expected_total_weight)

    if per_vector_clipping:
        clips = _distribute_clip(clip, vectors)
        subqueries = tf.nest.map_structure(make_single_vector_query, clips)
        return tensorflow_privacy.NestedQuery(subqueries)
    else:
        return make_single_vector_query(clip)