def create_optimizer(learning_rate_var): if FLAGS.optimizer == 'adam': optimizer = tfv1.train.AdamOptimizer(learning_rate=learning_rate_var, beta1=FLAGS.beta1, beta2=FLAGS.beta2, epsilon=FLAGS.epsilon) elif FLAGS.optimizer == 'sgd': optimizer = tfv1.train.GradientDescentOptimizer(learning_rate=1) elif FLAGS.optimizer == 'dp-sgd': from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy_lib from tensorflow_privacy.privacy.optimizers import dp_optimizer optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer( l2_norm_clip=FLAGS.gradient_clip_value, noise_multiplier=FLAGS.gradient_noise / FLAGS.gradient_clip_value * np.sqrt(FLAGS.train_batch_size), num_microbatches=FLAGS.train_batch_size // FLAGS.microbatch_size, learning_rate=learning_rate_var) elif FLAGS.optimizer == 'fast-dp-sgd': from tensorflow_privacy.privacy.optimizers import dp_optimizer_vectorized optimizer = dp_optimizer_vectorized.VectorizedDPSGD( l2_norm_clip=FLAGS.gradient_clip_value, noise_multiplier=FLAGS.gradient_noise / FLAGS.gradient_clip_value, num_microbatches=FLAGS.train_batch_size // FLAGS.microbatch_size, learning_rate=learning_rate_var) return optimizer
def cnn_model_fn(features, labels, mode): """Model function for a CNN.""" # Define CNN architecture using tf.keras.layers. input_layer = tf.reshape(features['x'], [-1, 28, 28, 1]) y = tf.keras.layers.Conv2D(16, 8, strides=2, padding='same', activation='relu').apply(input_layer) y = tf.keras.layers.MaxPool2D(2, 1).apply(y) y = tf.keras.layers.Conv2D(32, 4, strides=2, padding='valid', activation='relu').apply(y) y = tf.keras.layers.MaxPool2D(2, 1).apply(y) y = tf.keras.layers.Flatten().apply(y) y = tf.keras.layers.Dense(32, activation='relu').apply(y) logits = tf.keras.layers.Dense(10).apply(y) # Calculate loss as a vector and as its average across minibatch. vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits) scalar_loss = tf.reduce_mean(vector_loss) # Configure the training op (for TRAIN mode). if mode == tf.estimator.ModeKeys.TRAIN: # optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, noise_multiplier=FLAGS.noise_multiplier, num_microbatches=FLAGS.num_microbatches, learning_rate=FLAGS.learning_rate) global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=vector_loss, global_step=global_step) # opt_loss = scalar_loss # train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode=mode, loss=scalar_loss, train_op=train_op) # Add evaluation metrics (for EVAL mode). elif mode == tf.estimator.ModeKeys.EVAL: eval_metric_ops = { 'accuracy': tf.metrics.accuracy(labels=labels, predictions=tf.argmax(input=logits, axis=1)) } return tf.estimator.EstimatorSpec(mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
def nn_model_fn(features, labels, mode): """Define CNN architecture using tf.keras.layers.""" input_layer = tf.reshape(features['x'], [-1, 123]) y = tf.keras.layers.Dense(16, activation='relu').apply(input_layer) logits = tf.keras.layers.Dense(2).apply(y) # Calculate loss as a vector (to support microbatches in DP-SGD). vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits) # Define mean of loss across minibatch (for reporting through tf.Estimator). scalar_loss = tf.reduce_mean(vector_loss) # Configure the training op (for TRAIN mode). if mode == tf_estimator.ModeKeys.TRAIN: if FLAGS.dpsgd: # Use DP version of GradientDescentOptimizer. Other optimizers are # available in dp_optimizer. Most optimizers inheriting from # tf.train.Optimizer should be wrappable in differentially private # counterparts by calling dp_optimizer.optimizer_from_args(). optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, noise_multiplier=FLAGS.noise_multiplier, num_microbatches=microbatches, learning_rate=FLAGS.learning_rate) opt_loss = vector_loss else: optimizer = tf.compat.v1.train.GradientDescentOptimizer( learning_rate=FLAGS.learning_rate) opt_loss = scalar_loss global_step = tf.compat.v1.train.get_global_step() train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) # In the following, we pass the mean of the loss (scalar_loss) rather than # the vector_loss because tf.estimator requires a scalar loss. This is only # used for evaluation and debugging by tf.estimator. The actual loss being # minimized is opt_loss defined above and passed to optimizer.minimize(). return tf_estimator.EstimatorSpec(mode=mode, loss=scalar_loss, train_op=train_op) # Add evaluation metrics (for EVAL mode). if mode == tf_estimator.ModeKeys.EVAL: eval_metric_ops = { 'accuracy': tf.compat.v1.metrics.accuracy(labels=labels, predictions=tf.argmax(input=logits, axis=1)) } return tf_estimator.EstimatorSpec(mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops) return None
def lr_model_fn(features, labels, mode, nclasses, dim): """Model function for logistic regression.""" input_layer = tf.reshape(features['x'], tuple([-1]) + dim) logits = tf.keras.layers.Dense( units=nclasses, kernel_regularizer=tf.keras.regularizers.L2(l2=FLAGS.regularizer), bias_regularizer=tf.keras.regularizers.L2( l2=FLAGS.regularizer)).apply(input_layer) # Calculate loss as a vector (to support microbatches in DP-SGD). vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits) + tf.losses.get_regularization_loss() # Define mean of loss across minibatch (for reporting through tf.Estimator). scalar_loss = tf.reduce_mean(vector_loss) # Configure the training op (for TRAIN mode). if mode == tf.estimator.ModeKeys.TRAIN: if FLAGS.dpsgd: # The loss function is L-Lipschitz with L = sqrt(2*(||x||^2 + 1)) where # ||x|| is the norm of the data. # We don't use microbatches (thus speeding up computation), since no # clipping is necessary due to data normalization. optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer( l2_norm_clip=math.sqrt(2 * (FLAGS.data_l2_norm**2 + 1)), noise_multiplier=FLAGS.noise_multiplier, num_microbatches=1, learning_rate=FLAGS.learning_rate) opt_loss = vector_loss else: optimizer = GradientDescentOptimizer( learning_rate=FLAGS.learning_rate) opt_loss = scalar_loss global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) # In the following, we pass the mean of the loss (scalar_loss) rather than # the vector_loss because tf.estimator requires a scalar loss. This is only # used for evaluation and debugging by tf.estimator. The actual loss being # minimized is opt_loss defined above and passed to optimizer.minimize(). return tf.estimator.EstimatorSpec(mode=mode, loss=scalar_loss, train_op=train_op) # Add evaluation metrics (for EVAL mode). elif mode == tf.estimator.ModeKeys.EVAL: eval_metric_ops = { 'accuracy': tf.metrics.accuracy(labels=labels, predictions=tf.argmax(input=logits, axis=1)) } return tf.estimator.EstimatorSpec(mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
def cifar10_model(FLAGS, eps_list=None, noise_multiplier=None): classifier = cnn_cifar10(data_placeholder) classifier.fit(target_X_train, target_y_train, batch_size=batch_size, epochs=epochs, validation_data=[target_X_valid, target_y_valid], verbose=1) # Create the gradient descent optimizer with the given learning rate. if FLAGS.dpsgd: #gradient_op_list = [] train_op_list = [] for i in range(FLAGS.N): optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, noise_multiplier=noise_multiplier[i], num_microbatches=FLAGS.num_microbatches, learning_rate=learning_rate) opt_loss = vector_loss #var_list = tf.trainable_variables() #gradient_op = optimizer.compute_gradients(loss=opt_loss, var_list=None) #gradient_op_list.append(gradient_op) train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) train_op_list.append(train_op) else: optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) opt_loss = scalar_loss #gradient_op = optimizer.compute_gradients(loss=opt_loss) #gradient_op_list = [gradient_op] * FLAGS.N train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) train_op_list = [train_op] * FLAGS.N
def cnn_model_fn(features, labels, mode): """Model function for a CNN.""" # Define CNN architecture using tf.keras.layers. input_layer = tf.reshape(features['x'], [-1, 28, 28, 1]) y = tf.keras.layers.Conv2D(16, 8, strides=2, padding='same', activation='relu').apply(input_layer) y = tf.keras.layers.MaxPool2D(2, 1).apply(y) y = tf.keras.layers.Conv2D(32, 4, strides=2, padding='valid', activation='relu').apply(y) y = tf.keras.layers.MaxPool2D(2, 1).apply(y) y = tf.keras.layers.Flatten().apply(y) y = tf.keras.layers.Dense(32, activation='relu').apply(y) logits = tf.keras.layers.Dense(10).apply(y) # Calculate loss as a vector (to support microbatches in DP-SGD). vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits) # Define mean of loss across minibatch (for reporting through tf.Estimator). scalar_loss = tf.reduce_mean(vector_loss) # Configure the training op (for TRAIN mode). if mode == tf.estimator.ModeKeys.TRAIN: if FLAGS.dpsgd: ledger = privacy_ledger.PrivacyLedger( population_size=60000, selection_probability=(FLAGS.batch_size / 60000)) # Use DP version of GradientDescentOptimizer. Other optimizers are # available in dp_optimizer. Most optimizers inheriting from # tf.train.Optimizer should be wrappable in differentially private # counterparts by calling dp_optimizer.optimizer_from_args(). optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, noise_multiplier=FLAGS.noise_multiplier, num_microbatches=FLAGS.microbatches, ledger=ledger, learning_rate=FLAGS.learning_rate) training_hooks = [EpsilonPrintingTrainingHook(ledger)] opt_loss = vector_loss else: optimizer = GradientDescentOptimizer( learning_rate=FLAGS.learning_rate) training_hooks = [] opt_loss = scalar_loss global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) # In the following, we pass the mean of the loss (scalar_loss) rather than # the vector_loss because tf.estimator requires a scalar loss. This is only # used for evaluation and debugging by tf.estimator. The actual loss being # minimized is opt_loss defined above and passed to optimizer.minimize(). return tf.estimator.EstimatorSpec(mode=mode, loss=scalar_loss, train_op=train_op, training_hooks=training_hooks) # Add evaluation metrics (for EVAL mode). elif mode == tf.estimator.ModeKeys.EVAL: eval_metric_ops = { 'accuracy': tf.metrics.accuracy(labels=labels, predictions=tf.argmax(input=logits, axis=1)) } return tf.estimator.EstimatorSpec(mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
def cnn_model_fn(features, labels, mode, params): # pylint: disable=unused-argument """Model function for a CNN.""" # Define CNN architecture using tf.keras.layers. logits = common.get_cnn_model(features) # Calculate loss as a vector (to support microbatches in DP-SGD). vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits) # Define mean of loss across minibatch (for reporting through tf.Estimator). scalar_loss = tf.reduce_mean(input_tensor=vector_loss) # Configure the training op (for TRAIN mode). if mode == tf.estimator.ModeKeys.TRAIN: if FLAGS.dpsgd: # Use DP version of GradientDescentOptimizer. Other optimizers are # available in dp_optimizer. Most optimizers inheriting from # tf.train.Optimizer should be wrappable in differentially private # counterparts by calling dp_optimizer.optimizer_from_args(). optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, noise_multiplier=FLAGS.noise_multiplier, num_microbatches=FLAGS.microbatches, learning_rate=FLAGS.learning_rate) opt_loss = vector_loss else: optimizer = tf.train.GradientDescentOptimizer( learning_rate=FLAGS.learning_rate) opt_loss = scalar_loss # Training with TPUs requires wrapping the optimizer in a # CrossShardOptimizer. optimizer = tf.tpu.CrossShardOptimizer(optimizer) global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) # In the following, we pass the mean of the loss (scalar_loss) rather than # the vector_loss because tf.estimator requires a scalar loss. This is only # used for evaluation and debugging by tf.estimator. The actual loss being # minimized is opt_loss defined above and passed to optimizer.minimize(). return tf.estimator.tpu.TPUEstimatorSpec(mode=mode, loss=scalar_loss, train_op=train_op) # Add evaluation metrics (for EVAL mode). elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(labels, logits): predictions = tf.argmax(logits, 1) return { 'accuracy': tf.metrics.accuracy(labels=labels, predictions=predictions), } return tf.estimator.tpu.TPUEstimatorSpec(mode=mode, loss=scalar_loss, eval_metrics=(metric_fn, { 'labels': labels, 'logits': logits, }))
def mnist_model(FLAGS, eps_list=None, noise_multiplier=None): # - placeholder for the input Data (in our case MNIST), depends on the batch size specified in C img_size = IMAGE_SIZE[FLAGS.dataset] img_pixels = img_size[0] * img_size[1] * img_size[2] data_placeholder, labels_placeholder = placeholder_inputs( FLAGS.client_batch_size, img_pixels) # Define FCNN architecture # - logits : output of the [fully connected neural network] when fed with images. if FLAGS.model == 'lr' and (FLAGS.dataset == 'mnist' or FLAGS.dataset == 'fmnist'): logits = lr_mnist(data_placeholder) elif FLAGS.model == 'cnn' and (FLAGS.dataset == 'mnist' or FLAGS.dataset == 'mnist'): logits = cnn_mnist(data_placeholder) else: raise ValueError('No model matches the required model and dataset.') # - loss : when comparing logits to the true labels. # Calculate loss as a vector (to support microbatches in DP-SGD). labels_placeholder = tf.cast(labels_placeholder, dtype=tf.int64) vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels_placeholder, logits=logits) # Define mean of loss across minibatch (for reporting through tf.Estimator). scalar_loss = tf.reduce_mean(input_tensor=vector_loss) # - eval_correct : when run, returns the amount of labels that were predicted correctly. eval_op = evaluation(logits, labels_placeholder) # Add a scalar summary for the snapshot loss. tf.summary.scalar('loss', scalar_loss) # - global_step : A Variable, which tracks the amount of steps taken by the clients: global_step = tf.Variable(0, dtype=tf.float32, trainable=False, name='global_step') # - learning_rate : A tensorflow learning rate, dependent on the global_step variable. if FLAGS.lr_mode == 'decay': learning_rate = tf.train.exponential_decay(learning_rate=FLAGS.lr, global_step=global_step, decay_steps=27000, decay_rate=0.1, staircase=True, name='learning_rate') print('decay lr: {}'.format(FLAGS.lr)) elif FLAGS.lr_mode == 'const': learning_rate = FLAGS.lr print('constant lr: {}'.format(learning_rate)) ''' ledger = privacy_ledger.PrivacyLedger( population_size=6000, selection_probability=(FLAGS.client_batch_size / 6000)) ''' # Create the gradient descent optimizer with the given learning rate. if FLAGS.dpsgd: #gradient_op_list = [] train_op_list = [] for i in range(FLAGS.N): optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, noise_multiplier=noise_multiplier[i], num_microbatches=FLAGS.num_microbatches, learning_rate=learning_rate) opt_loss = vector_loss #var_list = tf.trainable_variables() #gradient_op = optimizer.compute_gradients(loss=opt_loss, var_list=None) #gradient_op_list.append(gradient_op) train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) train_op_list.append(train_op) else: optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) opt_loss = scalar_loss #gradient_op = optimizer.compute_gradients(loss=opt_loss) #gradient_op_list = [gradient_op] * FLAGS.N train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) train_op_list = [train_op] * FLAGS.N # - train_op : A tf.train operation, which backpropagates the loss and updates the model according to the # learning rate specified. # Use the optimizer to apply the gradients that minimize the loss # (and also increment the global step counter) as a single training step. return train_op_list, eval_op, scalar_loss, data_placeholder, labels_placeholder