コード例 #1
0
    def gaussian_fixed(cls, noise_multiplier: float, clients_per_round: float,
                       clip: float) -> factory.UnweightedAggregationFactory:
        """`DifferentiallyPrivateFactory` with fixed clipping and Gaussian noise.

    Performs fixed clipping and addition of Gaussian noise for differentially
    private learning. For details of the DP algorithm see McMahan et. al (2017)
    https://arxiv.org/abs/1710.06963.

    Args:
      noise_multiplier: A float specifying the noise multiplier for the Gaussian
        mechanism for model updates. A value of 1.0 or higher may be needed for
        strong privacy. See above mentioned paper to compute (epsilon, delta)
        privacy guarantee.
      clients_per_round: A float specifying the expected number of clients per
        round. Must be positive.
      clip: The value of the clipping norm.

    Returns:
      A `DifferentiallyPrivateFactory` with fixed clipping and Gaussian noise.
    """

        if isinstance(clients_per_round, int):
            clients_per_round = float(clients_per_round)

        _check_float_positive(noise_multiplier, 'noise_multiplier')
        _check_float_positive(clients_per_round, 'clients_per_round')
        _check_float_positive(clip, 'clip')

        query = tfp.GaussianAverageQuery(l2_norm_clip=clip,
                                         sum_stddev=clip * noise_multiplier,
                                         denominator=clients_per_round)

        return cls(query)
コード例 #2
0
ファイル: aggregate_fn.py プロジェクト: sls33/federated
def build_dp_aggregate(l2_norm, mul_factor, num_clients):
    """Build a 'tff.utils.StatefulAggregateFn' that aggregates the model deltas differentially privately."""

    query = tensorflow_privacy.GaussianAverageQuery(l2_norm, mul_factor,
                                                    num_clients)
    dp_aggregate_fn, _ = tff.utils.build_dp_aggregate(query)
    return dp_aggregate_fn
コード例 #3
0
 def make_single_vector_query(vector_clip):
     """Makes a `DPQuery` for a single vector."""
     if not adaptive_clip_learning_rate:
         return tensorflow_privacy.GaussianAverageQuery(
             l2_norm_clip=vector_clip,
             sum_stddev=vector_clip * noise_multiplier * num_vectors**0.5,
             denominator=expected_total_weight)
     else:
         # Without geometric updating, the update is c = c - lr * loss, so for
         # multiple vectors we set the learning rate to be on the same scale as the
         # initial clip. That way big vectors get big updates, small vectors
         # small updates. With geometric updating, the update is
         # c = c * exp(-lr * loss) so the learning rate should be independent of
         # the initial clip.
         if geometric_clip_update:
             learning_rate = adaptive_clip_learning_rate
         else:
             learning_rate = adaptive_clip_learning_rate * vector_clip / clip
         return tensorflow_privacy.QuantileAdaptiveClipAverageQuery(
             initial_l2_norm_clip=vector_clip,
             noise_multiplier=noise_multiplier,
             target_unclipped_quantile=target_unclipped_quantile,
             learning_rate=learning_rate,
             clipped_count_stddev=clipped_count_stddev,
             expected_num_records=expected_num_clients,
             geometric_update=geometric_clip_update,
             denominator=expected_total_weight)
コード例 #4
0
def _get_gan(gen_model_fn, disc_model_fn, gan_loss_fns, gen_optimizer,
             disc_optimizer, server_gen_inputs_dataset,
             client_real_images_tff_data, use_dp, dp_l2_norm_clip,
             dp_noise_multiplier, clients_per_round):
    """Construct instance of tff_gans.GanFnsAndTypes class."""
    dummy_gen_input = next(iter(server_gen_inputs_dataset))
    dummy_real_data = next(
        iter(
            client_real_images_tff_data.create_tf_dataset_for_client(
                client_real_images_tff_data.client_ids[0])))

    train_generator_fn = gan_training_tf_fns.create_train_generator_fn(
        gan_loss_fns, gen_optimizer)
    train_discriminator_fn = gan_training_tf_fns.create_train_discriminator_fn(
        gan_loss_fns, disc_optimizer)

    dp_average_query = None
    if use_dp:
        dp_average_query = tensorflow_privacy.GaussianAverageQuery(
            l2_norm_clip=dp_l2_norm_clip,
            sum_stddev=dp_l2_norm_clip * dp_noise_multiplier,
            denominator=clients_per_round)

    return tff_gans.GanFnsAndTypes(
        generator_model_fn=gen_model_fn,
        discriminator_model_fn=disc_model_fn,
        dummy_gen_input=dummy_gen_input,
        dummy_real_data=dummy_real_data,
        train_generator_fn=train_generator_fn,
        train_discriminator_fn=train_discriminator_fn,
        server_disc_update_optimizer_fn=lambda: tf.keras.optimizers.SGD(lr=1.0
                                                                        ),
        train_discriminator_dp_average_query=dp_average_query)
コード例 #5
0
def _dp_factory(
    config: DifferentialPrivacyConfig
) -> dp_factory.DifferentiallyPrivateFactory:
    """Creates DifferentiallyPrivateFactory based on config settings."""
    if isinstance(config.clipping, FixedClippingConfig):
        stddev = config.clipping.clip * config.noise_multiplier
        query = tfp.GaussianAverageQuery(l2_norm_clip=config.clipping.clip,
                                         sum_stddev=stddev,
                                         denominator=config.clients_per_round)
    elif isinstance(config.clipping, AdaptiveClippingConfig):
        query = tfp.QuantileAdaptiveClipAverageQuery(
            initial_l2_norm_clip=config.clipping.initial_clip,
            noise_multiplier=config.noise_multiplier,
            denominator=config.clients_per_round,
            target_unclipped_quantile=config.clipping.target_quantile,
            learning_rate=config.clipping.learning_rate,
            clipped_count_stddev=config.clipped_count_stddev,
            expected_num_records=config.clients_per_round,
            geometric_update=True)
    else:
        raise TypeError(
            f'config.clipping is not a supported type of ClippingConfig. Found '
            f'type {type(config.clipping)}.')

    return dp_factory.DifferentiallyPrivateFactory(query)
コード例 #6
0
 def make_single_vector_query(vector_clip):
   """Makes a `DPQuery` for a single vector."""
   if not adaptive_clip_learning_rate:
     return tensorflow_privacy.GaussianAverageQuery(
         l2_norm_clip=vector_clip,
         sum_stddev=vector_clip * noise_multiplier * num_vectors**0.5,
         denominator=expected_total_weight)
   else:
     return tensorflow_privacy.QuantileAdaptiveClipAverageQuery(
         initial_l2_norm_clip=vector_clip,
         noise_multiplier=noise_multiplier,
         target_unclipped_quantile=target_unclipped_quantile,
         learning_rate=adaptive_clip_learning_rate * vector_clip / clip,
         clipped_count_stddev=clipped_count_stddev,
         expected_num_records=expected_num_clients,
         denominator=expected_total_weight)
コード例 #7
0
  def to_factory(self) -> dp_factory.DifferentiallyPrivateFactory:
    """Creates factory based on config settings."""
    if self._clipping.is_fixed:
      stddev = self._clipping.clip * self._noise_multiplier
      query = tfp.GaussianAverageQuery(
          l2_norm_clip=self._clipping.clip,
          sum_stddev=stddev,
          denominator=self._clients_per_round)
    else:
      query = tfp.QuantileAdaptiveClipAverageQuery(
          initial_l2_norm_clip=self._clipping.clip.initial_estimate,
          noise_multiplier=self._noise_multiplier,
          denominator=self._clients_per_round,
          target_unclipped_quantile=self._clipping.clip.target_quantile,
          learning_rate=self._clipping.clip.learning_rate,
          clipped_count_stddev=self._clipped_count_stddev,
          expected_num_records=self._clients_per_round,
          geometric_update=True)

    return dp_factory.DifferentiallyPrivateFactory(query)
コード例 #8
0
 def test_dp_fed_mean(self):
   """Test whether the norm clipping is done successfully."""
   client_data = create_client_data()
   batch = client_data()
   train_data = [batch]
   malicious_data = [batch]
   client_type_list = [tf.constant(False)]
   l2_norm = 0.01
   query = tensorflow_privacy.GaussianAverageQuery(l2_norm, 0.0, 1.0)
   dp_agg_factory = tff.aggregators.DifferentiallyPrivateFactory(query)
   aggregation_process = dp_agg_factory.create(
       tff.learning.framework.weights_type_from_model(_model_fn).trainable)
   trainer = build_federated_averaging_process_attacked(
       _model_fn, aggregation_process=aggregation_process)
   state = trainer.initialize()
   initial_weights = state.model.trainable
   state, _ = trainer.next(state, train_data, malicious_data, client_type_list)
   weights_delta = tf.nest.map_structure(tf.subtract, state.model.trainable,
                                         initial_weights)
   self.assertLess(attacked_fedavg._get_norm(weights_delta), l2_norm * 1.1)
コード例 #9
0
def _get_gan(gen_model_fn, disc_model_fn, gan_loss_fns, gen_optimizer,
             disc_optimizer, server_gen_inputs_dataset,
             client_real_images_tff_data, use_dp, dp_l2_norm_clip,
             dp_noise_multiplier, clients_per_round, gen_status = 'def', 
             disc_status = 'def', learning_rate=0.0002, optimizer='adam', client_disc_train_steps=1,lr_factor=1.):
  """Construct instance of tff_gans.GanFnsAndTypes class."""
  dummy_gen_input = next(iter(server_gen_inputs_dataset))
  dummy_real_data = next(
      iter(
          client_real_images_tff_data.create_tf_dataset_for_client(
              client_real_images_tff_data.client_ids[0])))
  
  
  dp_average_query = None
  if use_dp:
    dp_average_query = tensorflow_privacy.GaussianAverageQuery(
        l2_norm_clip=dp_l2_norm_clip,
        sum_stddev=dp_l2_norm_clip * dp_noise_multiplier,
        denominator=clients_per_round)
  if optimizer == 'adam':
    def server_disc_opt_fn(lr):
      state_disc_optimizer = tf.keras.optimizers.Adam(lr_factor*learning_rate/client_disc_train_steps, 0.5)
      return state_disc_optimizer
  else:
    def server_disc_opt_fn(lr):
      state_disc_optimizer = tf.keras.optimizers.SGD(lr_factor*learning_rate/client_disc_train_steps)
      return state_disc_optimizer
  def server_gen_opt_fn(lr):
    state_gen_optimizer = tf.keras.optimizers.Adam(lr_factor*lr, 0.5)
    return state_gen_optimizer
  return tff_gans.GanFnsAndTypes(
      generator_model_fn=gen_model_fn,
      discriminator_model_fn=disc_model_fn,
      dummy_gen_input=dummy_gen_input,
      dummy_real_data=dummy_real_data,
      state_disc_optimizer_fn=server_disc_opt_fn,
      state_gen_optimizer_fn=server_gen_opt_fn,
      gen_status = gen_status,
      disc_status = disc_status,
      train_discriminator_dp_average_query=dp_average_query)
コード例 #10
0
def main(argv):
  if len(argv) > 1:
    raise app.UsageError('Too many command-line arguments.')
  config = tf.compat.v1.ConfigProto()
  config.graph_options.rewrite_options.layout_optimizer = 2
  tf.compat.v1.enable_eager_execution(config)

  np.random.seed(FLAGS.random_seed)

  flag_dict = FLAGS.flag_values_dict()
  configs = '-'.join(
      ['{}={}'.format(k, flag_dict[k]) for k in keys if k != 'root_output_dir'])
  file_name = 'log' + configs
  create_if_not_exists(FLAGS.root_output_dir)
  file_handle = open(os.path.join(FLAGS.root_output_dir, file_name), 'w')

  global_step = tf.Variable(1, name='global_step', dtype=tf.int64)
  file_writer = tf.summary.create_file_writer(
      os.path.join(FLAGS.root_output_dir))
  file_writer.set_as_default()
  write_print(file_handle, '=======configurations========')
  write_print(file_handle, configs)
  write_print(file_handle, '=======configurations========')
  # prepare dataset.
  write_print(file_handle, 'Loading Dataset!')
  emnist_train, _ = tff.simulation.datasets.emnist.load_data(
      only_digits=FLAGS.only_digits)

  # prepare test set
  write_print(file_handle, 'Loading Test Set!')
  test_image, test_label = load_test_data()

  # load malicious dataset
  write_print(file_handle, 'Loading malicious dataset!')
  dataset_malicious, target_x, target_y = load_malicious_dataset(FLAGS.task_num)

  # prepare model_fn.
  example_dataset = preprocess(
      emnist_train.create_tf_dataset_for_client(emnist_train.client_ids[0]))
  input_spec = example_dataset.element_spec

  def model_fn():
    keras_model = create_keras_model()
    return tff.learning.from_keras_model(
        keras_model,
        input_spec=input_spec,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

  # define server optimizer
  nesterov = True if FLAGS.server_momentum != 0 else False

  def server_optimizer_fn():
    return tf.keras.optimizers.SGD(
        learning_rate=FLAGS.server_learning_rate,
        momentum=FLAGS.server_momentum,
        nesterov=nesterov)

  # build interative process
  write_print(file_handle, 'Building Iterative Process!')
  client_update_function = attacked_fedavg.ClientProjectBoost(
      boost_factor=float(FLAGS.num_clients_per_round),
      norm_bound=FLAGS.norm_bound,
      round_num=FLAGS.client_round_num)
  query = tensorflow_privacy.GaussianAverageQuery(FLAGS.l2_norm_clip,
                                                  FLAGS.mul_factor,
                                                  FLAGS.num_clients_per_round)
  dp_aggregate_fn = tff.utils.build_dp_aggregate_process(
      tff.learning.framework.weights_type_from_model(model_fn), query)
  iterative_process = attacked_fedavg.build_federated_averaging_process_attacked(
      model_fn=model_fn,
      aggregation_process=dp_aggregate_fn,
      client_update_tf=client_update_function,
      server_optimizer_fn=server_optimizer_fn)
  state = iterative_process.initialize()

  # training loop
  for cur_round in range(FLAGS.num_rounds):
    if cur_round % FLAGS.attack_freq == FLAGS.attack_freq // 2:
      with_attack = 1
      write_print(file_handle, 'Attacker appears!')
    else:
      with_attack = 0

    # sample clients and make federated dataset
    federated_train_data, federated_malicious_data, client_type_list = \
        sample_clients_with_malicious(
            emnist_train, client_ids=emnist_train.client_ids,
            dataset_malicious=dataset_malicious,
            num_clients=FLAGS.num_clients_per_round, with_attack=with_attack)

    # one round of attacked federated averaging
    write_print(file_handle, 'Round starts!')
    state, train_metrics = iterative_process.next(state, federated_train_data,
                                                  federated_malicious_data,
                                                  client_type_list)

    write_print(
        file_handle,
        'Training round {:2d}, train_metrics={}'.format(cur_round,
                                                        train_metrics))

    log_tfboard('train_acc', train_metrics['sparse_categorical_accuracy'],
                global_step)
    log_tfboard('train_loss', train_metrics['loss'], global_step)

    # evaluate current model on test data and malicious data
    if cur_round % FLAGS.evaluate_per_rounds == 0:
      test_metrics, test_metrics_target = evaluate(state, test_image,
                                                   test_label, target_x,
                                                   target_y)
      write_print(
          file_handle,
          'Evaluation round {:2d}, <sparse_categorical_accuracy={},loss={}>'
          .format(cur_round, test_metrics[1], test_metrics[0]))
      write_print(
          file_handle,
          'Evaluation round {:2d}, <sparse_categorical_accuracy={},loss={}>'
          .format(cur_round, test_metrics_target[1], test_metrics_target[0]))
      log_tfboard('test_acc', test_metrics[1], global_step)
      log_tfboard('test_loss', test_metrics[0], global_step)
      log_tfboard('test_acc_target', test_metrics_target[1], global_step)
      log_tfboard('test_loss_target', test_metrics_target[0], global_step)

    global_step.assign_add(1)
コード例 #11
0
def build_dp_query(clip,
                   noise_multiplier,
                   expected_total_weight,
                   adaptive_clip_learning_rate=0,
                   target_unclipped_quantile=None,
                   clipped_count_budget_allocation=None,
                   expected_clients_per_round=None,
                   geometric_clip_update=True):
    """Makes a `DPQuery` to estimate vector averages with differential privacy.

  Supports many of the types of query available in tensorflow_privacy, including
  nested ("per-vector") queries as described in
  https://arxiv.org/pdf/1812.06210.pdf, and quantile-based adaptive clipping as
  described in https://arxiv.org/abs/1905.03871.

  Args:
    clip: The query's L2 norm bound, or the initial clip if adaptive clipping
      is used.
    noise_multiplier: The ratio of the (effective) noise stddev to the clip.
    expected_total_weight: The expected total weight of all clients, used as the
      denominator for the average computation.
    adaptive_clip_learning_rate: Learning rate for quantile-based adaptive
      clipping. If 0, fixed clipping is used.
    target_unclipped_quantile: Target unclipped quantile for adaptive clipping.
    clipped_count_budget_allocation: The fraction of privacy budget to use for
      estimating clipped counts.
    expected_clients_per_round: The expected number of clients for estimating
      clipped fractions.
    geometric_clip_update: If True, use geometric updating of the clip.

  Returns:
    A `DPQuery` suitable for use in a call to `build_dp_aggregate` and
    `build_dp_aggregate_process` to perform Federated Averaging with
    differential privacy.
  """
    py_typecheck.check_type(clip, numbers.Number, 'clip')
    py_typecheck.check_type(noise_multiplier, numbers.Number,
                            'noise_multiplier')
    py_typecheck.check_type(expected_total_weight, numbers.Number,
                            'expected_total_weight')

    if adaptive_clip_learning_rate:
        py_typecheck.check_type(adaptive_clip_learning_rate, numbers.Number,
                                'adaptive_clip_learning_rate')
        py_typecheck.check_type(target_unclipped_quantile, numbers.Number,
                                'target_unclipped_quantile')
        py_typecheck.check_type(clipped_count_budget_allocation,
                                numbers.Number,
                                'clipped_count_budget_allocation')
        py_typecheck.check_type(expected_clients_per_round, numbers.Number,
                                'expected_clients_per_round')
        p = clipped_count_budget_allocation
        nm = noise_multiplier
        vectors_noise_multiplier = nm * (1 - p)**(-0.5)
        clipped_count_noise_multiplier = nm * p**(-0.5)

        # Clipped count sensitivity is 0.5.
        clipped_count_stddev = 0.5 * clipped_count_noise_multiplier

        return tensorflow_privacy.QuantileAdaptiveClipAverageQuery(
            initial_l2_norm_clip=clip,
            noise_multiplier=vectors_noise_multiplier,
            target_unclipped_quantile=target_unclipped_quantile,
            learning_rate=adaptive_clip_learning_rate,
            clipped_count_stddev=clipped_count_stddev,
            expected_num_records=expected_clients_per_round,
            geometric_update=geometric_clip_update,
            denominator=expected_total_weight)
    else:
        if target_unclipped_quantile is not None:
            warnings.warn(
                'target_unclipped_quantile is specified but '
                'adaptive_clip_learning_rate is zero. No adaptive clipping will be '
                'performed. Use adaptive_clip_learning_rate > 0 if you want '
                'adaptive clipping.')
        if clipped_count_budget_allocation is not None:
            warnings.warn(
                'clipped_count_budget_allocation is specified but '
                'adaptive_clip_learning_rate is zero. No adaptive clipping will be '
                'performed. Use adaptive_clip_learning_rate > 0 if you want '
                'adaptive clipping.')
        return tensorflow_privacy.GaussianAverageQuery(
            l2_norm_clip=clip,
            sum_stddev=clip * noise_multiplier,
            denominator=expected_total_weight)