def gaussian_fixed(cls, noise_multiplier: float, clients_per_round: float, clip: float) -> factory.UnweightedAggregationFactory: """`DifferentiallyPrivateFactory` with fixed clipping and Gaussian noise. Performs fixed clipping and addition of Gaussian noise for differentially private learning. For details of the DP algorithm see McMahan et. al (2017) https://arxiv.org/abs/1710.06963. Args: noise_multiplier: A float specifying the noise multiplier for the Gaussian mechanism for model updates. A value of 1.0 or higher may be needed for strong privacy. See above mentioned paper to compute (epsilon, delta) privacy guarantee. clients_per_round: A float specifying the expected number of clients per round. Must be positive. clip: The value of the clipping norm. Returns: A `DifferentiallyPrivateFactory` with fixed clipping and Gaussian noise. """ if isinstance(clients_per_round, int): clients_per_round = float(clients_per_round) _check_float_positive(noise_multiplier, 'noise_multiplier') _check_float_positive(clients_per_round, 'clients_per_round') _check_float_positive(clip, 'clip') query = tfp.GaussianAverageQuery(l2_norm_clip=clip, sum_stddev=clip * noise_multiplier, denominator=clients_per_round) return cls(query)
def build_dp_aggregate(l2_norm, mul_factor, num_clients): """Build a 'tff.utils.StatefulAggregateFn' that aggregates the model deltas differentially privately.""" query = tensorflow_privacy.GaussianAverageQuery(l2_norm, mul_factor, num_clients) dp_aggregate_fn, _ = tff.utils.build_dp_aggregate(query) return dp_aggregate_fn
def make_single_vector_query(vector_clip): """Makes a `DPQuery` for a single vector.""" if not adaptive_clip_learning_rate: return tensorflow_privacy.GaussianAverageQuery( l2_norm_clip=vector_clip, sum_stddev=vector_clip * noise_multiplier * num_vectors**0.5, denominator=expected_total_weight) else: # Without geometric updating, the update is c = c - lr * loss, so for # multiple vectors we set the learning rate to be on the same scale as the # initial clip. That way big vectors get big updates, small vectors # small updates. With geometric updating, the update is # c = c * exp(-lr * loss) so the learning rate should be independent of # the initial clip. if geometric_clip_update: learning_rate = adaptive_clip_learning_rate else: learning_rate = adaptive_clip_learning_rate * vector_clip / clip return tensorflow_privacy.QuantileAdaptiveClipAverageQuery( initial_l2_norm_clip=vector_clip, noise_multiplier=noise_multiplier, target_unclipped_quantile=target_unclipped_quantile, learning_rate=learning_rate, clipped_count_stddev=clipped_count_stddev, expected_num_records=expected_num_clients, geometric_update=geometric_clip_update, denominator=expected_total_weight)
def _get_gan(gen_model_fn, disc_model_fn, gan_loss_fns, gen_optimizer, disc_optimizer, server_gen_inputs_dataset, client_real_images_tff_data, use_dp, dp_l2_norm_clip, dp_noise_multiplier, clients_per_round): """Construct instance of tff_gans.GanFnsAndTypes class.""" dummy_gen_input = next(iter(server_gen_inputs_dataset)) dummy_real_data = next( iter( client_real_images_tff_data.create_tf_dataset_for_client( client_real_images_tff_data.client_ids[0]))) train_generator_fn = gan_training_tf_fns.create_train_generator_fn( gan_loss_fns, gen_optimizer) train_discriminator_fn = gan_training_tf_fns.create_train_discriminator_fn( gan_loss_fns, disc_optimizer) dp_average_query = None if use_dp: dp_average_query = tensorflow_privacy.GaussianAverageQuery( l2_norm_clip=dp_l2_norm_clip, sum_stddev=dp_l2_norm_clip * dp_noise_multiplier, denominator=clients_per_round) return tff_gans.GanFnsAndTypes( generator_model_fn=gen_model_fn, discriminator_model_fn=disc_model_fn, dummy_gen_input=dummy_gen_input, dummy_real_data=dummy_real_data, train_generator_fn=train_generator_fn, train_discriminator_fn=train_discriminator_fn, server_disc_update_optimizer_fn=lambda: tf.keras.optimizers.SGD(lr=1.0 ), train_discriminator_dp_average_query=dp_average_query)
def _dp_factory( config: DifferentialPrivacyConfig ) -> dp_factory.DifferentiallyPrivateFactory: """Creates DifferentiallyPrivateFactory based on config settings.""" if isinstance(config.clipping, FixedClippingConfig): stddev = config.clipping.clip * config.noise_multiplier query = tfp.GaussianAverageQuery(l2_norm_clip=config.clipping.clip, sum_stddev=stddev, denominator=config.clients_per_round) elif isinstance(config.clipping, AdaptiveClippingConfig): query = tfp.QuantileAdaptiveClipAverageQuery( initial_l2_norm_clip=config.clipping.initial_clip, noise_multiplier=config.noise_multiplier, denominator=config.clients_per_round, target_unclipped_quantile=config.clipping.target_quantile, learning_rate=config.clipping.learning_rate, clipped_count_stddev=config.clipped_count_stddev, expected_num_records=config.clients_per_round, geometric_update=True) else: raise TypeError( f'config.clipping is not a supported type of ClippingConfig. Found ' f'type {type(config.clipping)}.') return dp_factory.DifferentiallyPrivateFactory(query)
def make_single_vector_query(vector_clip): """Makes a `DPQuery` for a single vector.""" if not adaptive_clip_learning_rate: return tensorflow_privacy.GaussianAverageQuery( l2_norm_clip=vector_clip, sum_stddev=vector_clip * noise_multiplier * num_vectors**0.5, denominator=expected_total_weight) else: return tensorflow_privacy.QuantileAdaptiveClipAverageQuery( initial_l2_norm_clip=vector_clip, noise_multiplier=noise_multiplier, target_unclipped_quantile=target_unclipped_quantile, learning_rate=adaptive_clip_learning_rate * vector_clip / clip, clipped_count_stddev=clipped_count_stddev, expected_num_records=expected_num_clients, denominator=expected_total_weight)
def to_factory(self) -> dp_factory.DifferentiallyPrivateFactory: """Creates factory based on config settings.""" if self._clipping.is_fixed: stddev = self._clipping.clip * self._noise_multiplier query = tfp.GaussianAverageQuery( l2_norm_clip=self._clipping.clip, sum_stddev=stddev, denominator=self._clients_per_round) else: query = tfp.QuantileAdaptiveClipAverageQuery( initial_l2_norm_clip=self._clipping.clip.initial_estimate, noise_multiplier=self._noise_multiplier, denominator=self._clients_per_round, target_unclipped_quantile=self._clipping.clip.target_quantile, learning_rate=self._clipping.clip.learning_rate, clipped_count_stddev=self._clipped_count_stddev, expected_num_records=self._clients_per_round, geometric_update=True) return dp_factory.DifferentiallyPrivateFactory(query)
def test_dp_fed_mean(self): """Test whether the norm clipping is done successfully.""" client_data = create_client_data() batch = client_data() train_data = [batch] malicious_data = [batch] client_type_list = [tf.constant(False)] l2_norm = 0.01 query = tensorflow_privacy.GaussianAverageQuery(l2_norm, 0.0, 1.0) dp_agg_factory = tff.aggregators.DifferentiallyPrivateFactory(query) aggregation_process = dp_agg_factory.create( tff.learning.framework.weights_type_from_model(_model_fn).trainable) trainer = build_federated_averaging_process_attacked( _model_fn, aggregation_process=aggregation_process) state = trainer.initialize() initial_weights = state.model.trainable state, _ = trainer.next(state, train_data, malicious_data, client_type_list) weights_delta = tf.nest.map_structure(tf.subtract, state.model.trainable, initial_weights) self.assertLess(attacked_fedavg._get_norm(weights_delta), l2_norm * 1.1)
def _get_gan(gen_model_fn, disc_model_fn, gan_loss_fns, gen_optimizer, disc_optimizer, server_gen_inputs_dataset, client_real_images_tff_data, use_dp, dp_l2_norm_clip, dp_noise_multiplier, clients_per_round, gen_status = 'def', disc_status = 'def', learning_rate=0.0002, optimizer='adam', client_disc_train_steps=1,lr_factor=1.): """Construct instance of tff_gans.GanFnsAndTypes class.""" dummy_gen_input = next(iter(server_gen_inputs_dataset)) dummy_real_data = next( iter( client_real_images_tff_data.create_tf_dataset_for_client( client_real_images_tff_data.client_ids[0]))) dp_average_query = None if use_dp: dp_average_query = tensorflow_privacy.GaussianAverageQuery( l2_norm_clip=dp_l2_norm_clip, sum_stddev=dp_l2_norm_clip * dp_noise_multiplier, denominator=clients_per_round) if optimizer == 'adam': def server_disc_opt_fn(lr): state_disc_optimizer = tf.keras.optimizers.Adam(lr_factor*learning_rate/client_disc_train_steps, 0.5) return state_disc_optimizer else: def server_disc_opt_fn(lr): state_disc_optimizer = tf.keras.optimizers.SGD(lr_factor*learning_rate/client_disc_train_steps) return state_disc_optimizer def server_gen_opt_fn(lr): state_gen_optimizer = tf.keras.optimizers.Adam(lr_factor*lr, 0.5) return state_gen_optimizer return tff_gans.GanFnsAndTypes( generator_model_fn=gen_model_fn, discriminator_model_fn=disc_model_fn, dummy_gen_input=dummy_gen_input, dummy_real_data=dummy_real_data, state_disc_optimizer_fn=server_disc_opt_fn, state_gen_optimizer_fn=server_gen_opt_fn, gen_status = gen_status, disc_status = disc_status, train_discriminator_dp_average_query=dp_average_query)
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') config = tf.compat.v1.ConfigProto() config.graph_options.rewrite_options.layout_optimizer = 2 tf.compat.v1.enable_eager_execution(config) np.random.seed(FLAGS.random_seed) flag_dict = FLAGS.flag_values_dict() configs = '-'.join( ['{}={}'.format(k, flag_dict[k]) for k in keys if k != 'root_output_dir']) file_name = 'log' + configs create_if_not_exists(FLAGS.root_output_dir) file_handle = open(os.path.join(FLAGS.root_output_dir, file_name), 'w') global_step = tf.Variable(1, name='global_step', dtype=tf.int64) file_writer = tf.summary.create_file_writer( os.path.join(FLAGS.root_output_dir)) file_writer.set_as_default() write_print(file_handle, '=======configurations========') write_print(file_handle, configs) write_print(file_handle, '=======configurations========') # prepare dataset. write_print(file_handle, 'Loading Dataset!') emnist_train, _ = tff.simulation.datasets.emnist.load_data( only_digits=FLAGS.only_digits) # prepare test set write_print(file_handle, 'Loading Test Set!') test_image, test_label = load_test_data() # load malicious dataset write_print(file_handle, 'Loading malicious dataset!') dataset_malicious, target_x, target_y = load_malicious_dataset(FLAGS.task_num) # prepare model_fn. example_dataset = preprocess( emnist_train.create_tf_dataset_for_client(emnist_train.client_ids[0])) input_spec = example_dataset.element_spec def model_fn(): keras_model = create_keras_model() return tff.learning.from_keras_model( keras_model, input_spec=input_spec, loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) # define server optimizer nesterov = True if FLAGS.server_momentum != 0 else False def server_optimizer_fn(): return tf.keras.optimizers.SGD( learning_rate=FLAGS.server_learning_rate, momentum=FLAGS.server_momentum, nesterov=nesterov) # build interative process write_print(file_handle, 'Building Iterative Process!') client_update_function = attacked_fedavg.ClientProjectBoost( boost_factor=float(FLAGS.num_clients_per_round), norm_bound=FLAGS.norm_bound, round_num=FLAGS.client_round_num) query = tensorflow_privacy.GaussianAverageQuery(FLAGS.l2_norm_clip, FLAGS.mul_factor, FLAGS.num_clients_per_round) dp_aggregate_fn = tff.utils.build_dp_aggregate_process( tff.learning.framework.weights_type_from_model(model_fn), query) iterative_process = attacked_fedavg.build_federated_averaging_process_attacked( model_fn=model_fn, aggregation_process=dp_aggregate_fn, client_update_tf=client_update_function, server_optimizer_fn=server_optimizer_fn) state = iterative_process.initialize() # training loop for cur_round in range(FLAGS.num_rounds): if cur_round % FLAGS.attack_freq == FLAGS.attack_freq // 2: with_attack = 1 write_print(file_handle, 'Attacker appears!') else: with_attack = 0 # sample clients and make federated dataset federated_train_data, federated_malicious_data, client_type_list = \ sample_clients_with_malicious( emnist_train, client_ids=emnist_train.client_ids, dataset_malicious=dataset_malicious, num_clients=FLAGS.num_clients_per_round, with_attack=with_attack) # one round of attacked federated averaging write_print(file_handle, 'Round starts!') state, train_metrics = iterative_process.next(state, federated_train_data, federated_malicious_data, client_type_list) write_print( file_handle, 'Training round {:2d}, train_metrics={}'.format(cur_round, train_metrics)) log_tfboard('train_acc', train_metrics['sparse_categorical_accuracy'], global_step) log_tfboard('train_loss', train_metrics['loss'], global_step) # evaluate current model on test data and malicious data if cur_round % FLAGS.evaluate_per_rounds == 0: test_metrics, test_metrics_target = evaluate(state, test_image, test_label, target_x, target_y) write_print( file_handle, 'Evaluation round {:2d}, <sparse_categorical_accuracy={},loss={}>' .format(cur_round, test_metrics[1], test_metrics[0])) write_print( file_handle, 'Evaluation round {:2d}, <sparse_categorical_accuracy={},loss={}>' .format(cur_round, test_metrics_target[1], test_metrics_target[0])) log_tfboard('test_acc', test_metrics[1], global_step) log_tfboard('test_loss', test_metrics[0], global_step) log_tfboard('test_acc_target', test_metrics_target[1], global_step) log_tfboard('test_loss_target', test_metrics_target[0], global_step) global_step.assign_add(1)
def build_dp_query(clip, noise_multiplier, expected_total_weight, adaptive_clip_learning_rate=0, target_unclipped_quantile=None, clipped_count_budget_allocation=None, expected_clients_per_round=None, geometric_clip_update=True): """Makes a `DPQuery` to estimate vector averages with differential privacy. Supports many of the types of query available in tensorflow_privacy, including nested ("per-vector") queries as described in https://arxiv.org/pdf/1812.06210.pdf, and quantile-based adaptive clipping as described in https://arxiv.org/abs/1905.03871. Args: clip: The query's L2 norm bound, or the initial clip if adaptive clipping is used. noise_multiplier: The ratio of the (effective) noise stddev to the clip. expected_total_weight: The expected total weight of all clients, used as the denominator for the average computation. adaptive_clip_learning_rate: Learning rate for quantile-based adaptive clipping. If 0, fixed clipping is used. target_unclipped_quantile: Target unclipped quantile for adaptive clipping. clipped_count_budget_allocation: The fraction of privacy budget to use for estimating clipped counts. expected_clients_per_round: The expected number of clients for estimating clipped fractions. geometric_clip_update: If True, use geometric updating of the clip. Returns: A `DPQuery` suitable for use in a call to `build_dp_aggregate` and `build_dp_aggregate_process` to perform Federated Averaging with differential privacy. """ py_typecheck.check_type(clip, numbers.Number, 'clip') py_typecheck.check_type(noise_multiplier, numbers.Number, 'noise_multiplier') py_typecheck.check_type(expected_total_weight, numbers.Number, 'expected_total_weight') if adaptive_clip_learning_rate: py_typecheck.check_type(adaptive_clip_learning_rate, numbers.Number, 'adaptive_clip_learning_rate') py_typecheck.check_type(target_unclipped_quantile, numbers.Number, 'target_unclipped_quantile') py_typecheck.check_type(clipped_count_budget_allocation, numbers.Number, 'clipped_count_budget_allocation') py_typecheck.check_type(expected_clients_per_round, numbers.Number, 'expected_clients_per_round') p = clipped_count_budget_allocation nm = noise_multiplier vectors_noise_multiplier = nm * (1 - p)**(-0.5) clipped_count_noise_multiplier = nm * p**(-0.5) # Clipped count sensitivity is 0.5. clipped_count_stddev = 0.5 * clipped_count_noise_multiplier return tensorflow_privacy.QuantileAdaptiveClipAverageQuery( initial_l2_norm_clip=clip, noise_multiplier=vectors_noise_multiplier, target_unclipped_quantile=target_unclipped_quantile, learning_rate=adaptive_clip_learning_rate, clipped_count_stddev=clipped_count_stddev, expected_num_records=expected_clients_per_round, geometric_update=geometric_clip_update, denominator=expected_total_weight) else: if target_unclipped_quantile is not None: warnings.warn( 'target_unclipped_quantile is specified but ' 'adaptive_clip_learning_rate is zero. No adaptive clipping will be ' 'performed. Use adaptive_clip_learning_rate > 0 if you want ' 'adaptive clipping.') if clipped_count_budget_allocation is not None: warnings.warn( 'clipped_count_budget_allocation is specified but ' 'adaptive_clip_learning_rate is zero. No adaptive clipping will be ' 'performed. Use adaptive_clip_learning_rate > 0 if you want ' 'adaptive clipping.') return tensorflow_privacy.GaussianAverageQuery( l2_norm_clip=clip, sum_stddev=clip * noise_multiplier, denominator=expected_total_weight)