def compute_epsilon(steps, num_examples=60000, target_delta=1e-5): if num_examples * target_delta > 1.: warnings.warn('Your delta might be too high.') q = FLAGS.batch_size / float(num_examples) orders = list(np.linspace(1.1, 10.9, 99)) + range(11, 64) rdp_const = compute_rdp(q, FLAGS.noise_multiplier, steps, orders) eps, _, _ = get_privacy_spent(orders, rdp_const, target_delta=target_delta) return eps
def end(self, session): orders = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)) samples = session.run(self._samples) queries = session.run(self._queries) formatted_ledger = privacy_ledger.format_ledger(samples, queries) rdp = compute_rdp_from_ledger(formatted_ledger, orders) eps = get_privacy_spent(orders, rdp, target_delta=1e-5)[0] print('For delta=1e-5, the current epsilon is: %.2f' % eps)
def compute_epsilon(steps, sample_prob): """Computes epsilon value for given hyperparameters.""" if FLAGS.noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) rdp = compute_rdp(q=sample_prob, noise_multiplier=FLAGS.noise_multiplier, steps=steps, orders=orders) # Delta is set to 1e-5 because MNIST has 60000 training points. return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
def compute_epsilon(steps): """Computes epsilon value for given hyperparameters.""" if FLAGS.noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = FLAGS.batch_size / 60000 rdp = compute_rdp(q=sampling_probability, noise_multiplier=FLAGS.noise_multiplier, steps=steps, orders=orders) return get_privacy_spent(orders, rdp, target_delta=FLAGS.delta)[0]
def compute_epsilon(steps): """Computes epsilon value for given hyperparameters.""" if FLAGS.noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = FLAGS.batch_size / NB_TRAIN rdp = compute_rdp(q=sampling_probability, noise_multiplier=FLAGS.noise_multiplier, steps=steps, orders=orders) # Delta is set to 1e-5 because Penn TreeBank has 60000 training points. return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
def compute_epsilon(steps): """Computes epsilon value for given hyperparameters.""" if FLAGS.noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + range(12, 64) sampling_probability = FLAGS.batch_size / 60000 rdp = compute_rdp(q=sampling_probability, stddev_to_sensitivity_ratio=FLAGS.noise_multiplier, steps=steps, orders=orders) # Delta is set to 1e-5 because MNIST has 60000 training points. return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
def compute_epsilon(steps, n): """Computes epsilon value for given hyperparameters.""" if FLAGS.noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = FLAGS.batch_size / n rdp = compute_rdp(q=sampling_probability, noise_multiplier=FLAGS.noise_multiplier, steps=steps, orders=orders) # Delta is set to approximate 1 / (number of training points). return get_privacy_spent(orders, rdp, target_delta=1 / n)[0]
def end(self, session): orders = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)) samples = session.run(self._samples) queries = session.run(self._queries) formatted_ledger = privacy_ledger.format_ledger(samples, queries) rdp = compute_rdp_from_ledger(formatted_ledger, orders) eps = get_privacy_spent(orders, rdp, target_delta=FLAGS.delta)[0] print('***************************************************') f=open(FLAGS.record_dir + "/" + FLAGS.record_file, "a+") f.write('For delta='+ str(FLAGS.delta) + ', the current epsilon is: %.2f \n' % eps) print('For delta='+ str(FLAGS.delta) + ', the current epsilon is: %.2f' % eps) f.close() print('***************************************************')
def test_check_composition(self): orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14., 16., 20., 24., 28., 32., 64., 256.) rdp = rdp_accountant.compute_rdp(q=1e-4, stddev_to_sensitivity_ratio=.4, steps=40000, orders=orders) eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-6) rdp += rdp_accountant.compute_rdp(q=0.1, stddev_to_sensitivity_ratio=2, steps=100, orders=orders) eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-5) self.assertAlmostEqual(eps, 8.509656, places=5) self.assertEqual(opt_order, 2.5)
def calc_epsilon(delta, ledger, sess): orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) _samples, _queries = ledger.get_unformatted_ledger() print("samples: " + str(_samples.shape)) print("queries: " + str(_queries.shape)) samples = sess.run(_samples) queries = sess.run(_queries) print("samples: " + str(len(samples))) print("queries: " + str(len(queries))) formatted_ledger = privacy_ledger.format_ledger(samples, queries) rdp = compute_rdp_from_ledger(formatted_ledger, orders) epsilon = get_privacy_spent(orders, rdp, target_delta=delta)[0] return (epsilon)
def print_privacy_guarantees(epochs, batch_size, samples, noise_multiplier): """Tabulating position-dependent privacy guarantees.""" if noise_multiplier == 0: print('No differential privacy (additive noise is 0).') return print( 'In the conditions of Theorem 34 (https://arxiv.org/abs/1808.06651) ' 'the training procedure results in the following privacy guarantees.') print('Out of the total of {} samples:'.format(samples)) steps_per_epoch = samples // batch_size orders = np.concatenate( [np.linspace(2, 20, num=181), np.linspace(20, 100, num=81)]) delta = 1e-5 for p in (.5, .9, .99): steps = math.ceil(steps_per_epoch * p) # Steps in the last epoch. coef = 2 * (noise_multiplier * batch_size)**-2 * ( # Accounting for privacy loss (epochs - 1) / steps_per_epoch + # ... from all-but-last epochs 1 / (steps_per_epoch - steps + 1)) # ... due to the last epoch # Using RDP accountant to compute eps. Doing computation analytically is # an option. rdp = [order * coef for order in orders] eps, _, _ = get_privacy_spent(orders, rdp, target_delta=delta) print('\t{:g}% enjoy at least ({:.2f}, {})-DP'.format( p * 100, eps, delta)) # Compute privacy guarantees for the Sampled Gaussian Mechanism. rdp_sgm = compute_rdp(batch_size / samples, noise_multiplier, epochs * steps_per_epoch, orders) eps_sgm, _, _ = get_privacy_spent(orders, rdp_sgm, target_delta=delta) print('By comparison, DP-SGD analysis for training done with the same ' 'parameters and random shuffling in each epoch guarantees ' '({:.2f}, {})-DP for all samples.'.format(eps_sgm, delta))
def apply_dp_sgd_analysis(q, sigma, steps, orders, delta): """Compute and print results of DP-SGD analysis.""" rdp = compute_rdp(q, sigma, steps, orders) eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta) print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated' ' over {} steps satisfies'.format(100 * q, sigma, steps), end=' ') print('differential privacy with eps = {:.3g} and delta = {}.'.format( eps, delta)) print('The optimal RDP order is {}.'.format(opt_order)) if opt_order == max(orders) or opt_order == min(orders): print('The privacy estimate is likely to be improved by expanding ' 'the set of orders.')
def apply_dp_sgd_analysis_biscotti(q, sigma, steps, orders, delta, prev_rdp): rdp = compute_rdp(q, sigma, 1, orders) new_rdp = prev_rdp + rdp eps, _, opt_order = get_privacy_spent(orders, new_rdp, target_delta=delta) # print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated' # ' over {} steps satisfies'.format(100 * q, sigma, steps), end=' ') # print('differential privacy with eps = {:.3g} and delta = {}.'.format( # eps, delta)) # print('The optimal RDP order is {}.'.format(opt_order)) if opt_order == max(orders) or opt_order == min(orders): print('The privacy estimate is likely to be improved by expanding ' 'the set of orders.') return eps, new_rdp
def apply_dp_sgd_analysis(q, sigma, steps, orders, delta): """Compute and print results of DP-SGD analysis.""" # compute_rdp requires that sigma be the ratio of the standard deviation of # the Gaussian noise to the l2-sensitivity of the function to which it is # added. Hence, sigma here corresponds to the `noise_multiplier` parameter # in the DP-SGD implementation found in privacy.optimizers.dp_optimizer rdp = compute_rdp(q, sigma, steps, orders) eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta) print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated' ' over {} steps satisfies'.format(100 * q, sigma, steps), end=' ') print('differential privacy with eps = {:.3g} and delta = {}.'.format( eps, delta)) print('The optimal RDP order is {}.'.format(opt_order)) if opt_order == max(orders) or opt_order == min(orders): print('The privacy estimate is likely to be improved by expanding ' 'the set of orders.')
def obj(sigma): rdp1 = compute_rdp(1.0, sigma / round1, 1, orders) rdp2 = compute_rdp(1.0, sigma / round2, 1, orders) rdp = rdp1 + rdp2 privacy = get_privacy_spent(orders, rdp, target_delta=delta) return privacy[0] - eps + 1e-8
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.batch_size % FLAGS.microbatches != 0: raise ValueError( 'Number of microbatches should divide evenly batch_size') # Load training and test data. train_data, train_labels, test_data, test_labels = load_mnist() # Define a sequential Keras model model = tf.keras.Sequential([ tf.keras.layers.Conv2D(16, 8, strides=2, padding='same', activation='relu', input_shape=(28, 28, 1)), tf.keras.layers.MaxPool2D(2, 1), tf.keras.layers.Conv2D(32, 4, strides=2, padding='valid', activation='relu'), tf.keras.layers.MaxPool2D(2, 1), tf.keras.layers.Flatten(), tf.keras.layers.Dense(32, activation='relu'), tf.keras.layers.Dense(10) ]) if FLAGS.dpsgd: dp_average_query = GaussianAverageQuery( FLAGS.l2_norm_clip, FLAGS.l2_norm_clip * FLAGS.noise_multiplier, FLAGS.microbatches) optimizer = DPGradientDescentOptimizer( dp_average_query, FLAGS.microbatches, learning_rate=FLAGS.learning_rate, unroll_microbatches=True) # Compute vector of per-example loss rather than its mean over a minibatch. loss = tf.keras.losses.CategoricalCrossentropy( from_logits=True, reduction=tf.losses.Reduction.NONE) else: optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True) # Compile model with Keras model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) # Train model with Keras model.fit(train_data, train_labels, epochs=FLAGS.epochs, validation_data=(test_data, test_labels), batch_size=FLAGS.batch_size) # Compute the privacy budget expended. if FLAGS.noise_multiplier == 0.0: print('Trained with vanilla non-private SGD optimizer') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = FLAGS.batch_size / 60000 rdp = compute_rdp(q=sampling_probability, noise_multiplier=FLAGS.noise_multiplier, steps=(FLAGS.epochs * 60000 // FLAGS.batch_size), orders=orders) # Delta is set to 1e-5 because MNIST has 60000 training points. eps = get_privacy_spent(orders, rdp, target_delta=1e-5)[0] print('For delta=1e-5, the current epsilon is: %.2f' % eps)
rdp = compute_rdp(q=sampling_probability, noise_multiplier=multiplier, steps=steps, orders=orders) return (get_privacy_spent(orders, rdp, target_delta=delta)[0] - float(target_eps)) noise_multiplier = bisect(find_eps, 0.5, 3.0) rdp = compute_rdp(q=sampling_probability, noise_multiplier=noise_multiplier, steps=steps, orders=orders) epsilon = get_privacy_spent(orders, rdp, target_delta=delta)[0] # In[3]: dev = 0.5 # In[5]: def lrelu(x, th=0.2): return tf.maximum(th * x, x) # G(z) def generator(x, isTrain=True, reuse=False): with tf.variable_scope('generator', reuse=reuse):
def find_eps(multiplier): rdp = compute_rdp(q=sampling_probability, noise_multiplier=multiplier, steps=steps, orders=orders) return(get_privacy_spent(orders, rdp, target_delta=delta)[0]-epsGAN)
def train(dataset, n_hidden=50, batch_size=100, epochs=100, learning_rate=0.01, model='nn', l2_ratio=1e-7, silent=True, non_linearity='relu', privacy='no_privacy', dp='dp', epsilon=0.5, delta=1e-5): train_x, train_y, test_x, test_y = dataset n_in = train_x.shape[1] n_out = len(np.unique(train_y)) if batch_size > len(train_y): batch_size = len(train_y) classifier = tf.estimator.Estimator(model_fn=get_model, params=[ train_x.shape[0], n_in, n_hidden, n_out, non_linearity, model, privacy, dp, epsilon, delta, batch_size, learning_rate, l2_ratio, epochs ]) train_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': train_x}, y=train_y, batch_size=batch_size, num_epochs=epochs, shuffle=True) test_eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': test_x}, y=test_y, num_epochs=1, shuffle=False) train_eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': train_x}, y=train_y, num_epochs=1, shuffle=False) pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': test_x}, num_epochs=1, shuffle=False) steps_per_epoch = train_x.shape[0] // batch_size orders = [1 + x / 100.0 for x in range(1, 1000)] + list(range(12, 1200)) rdp = compute_rdp(batch_size / train_x.shape[0], noise_multiplier[epsilon], epochs * steps_per_epoch, orders) eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta) print('\nFor delta= %.5f' % delta, ',the epsilon is: %.2f\n' % eps) if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR) for epoch in range(1, epochs + 1): hooks = [] if LOGGING: hooks.append( tf.train.ProfilerHook(output_dir=LOG_DIR, save_steps=30)) # This hook will save traces of what tensorflow is doing # during the training of each model. View the combined trace # by running `combine_traces.py` classifier.train(input_fn=train_input_fn, steps=steps_per_epoch, hooks=hooks) if not silent: eval_results = classifier.evaluate(input_fn=train_eval_input_fn) print('Train loss after %d epochs is: %.3f' % (epoch, eval_results['loss'])) eval_results = classifier.evaluate(input_fn=train_eval_input_fn) train_acc = eval_results['accuracy'] train_loss = eval_results['loss'] if not silent: print('Train accuracy is: %.3f' % (train_acc)) eval_results = classifier.evaluate(input_fn=test_eval_input_fn) test_acc = eval_results['accuracy'] if not silent: print('Test accuracy is: %.3f' % (test_acc)) predictions = classifier.predict(input_fn=pred_input_fn) pred_y, pred_scores = get_predictions(predictions) return classifier, pred_y, pred_scores, train_loss, train_acc, test_acc
def train_private(dataset, hold_out_train_data=None, n_hidden=50, batch_size=100, epochs=100, learning_rate=0.01, model='nn', l2_ratio=1e-7, silent=True, non_linearity='relu', privacy='no_privacy', dp='dp', epsilon=0.5, delta=1e-5): train_x, train_y, test_x, test_y = dataset if hold_out_train_data != None: hold_out_x, hold_out_y, _, _ = hold_out_train_data n_in = train_x.shape[1] n_out = len(np.unique(train_y)) if batch_size > len(train_y): batch_size = len(train_y) classifier = tf.estimator.Estimator(model_fn=get_model, params=[ train_x.shape[0], n_in, n_hidden, n_out, non_linearity, model, privacy, dp, epsilon, delta, batch_size, learning_rate, l2_ratio, epochs ]) train_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': train_x}, y=train_y, batch_size=batch_size, num_epochs=epochs, shuffle=True) test_eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': test_x}, y=test_y, num_epochs=1, shuffle=False) train_eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': train_x}, y=train_y, num_epochs=1, shuffle=False) pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': test_x}, num_epochs=1, shuffle=False) steps_per_epoch = train_x.shape[0] // batch_size orders = [1 + x / 100.0 for x in range(1, 1000)] + list(range(12, 1200)) rdp = compute_rdp(batch_size / train_x.shape[0], noise_multiplier[epsilon], epochs * steps_per_epoch, orders) eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta) print('\nFor delta= %.5f' % delta, ',the epsilon is: %.2f\n' % eps) for epoch in range(1, epochs + 1): classifier.train(input_fn=train_input_fn, steps=steps_per_epoch) if not silent: eval_results = classifier.evaluate(input_fn=train_eval_input_fn) print('Train loss after %d epochs is: %.3f' % (epoch, eval_results['loss'])) eval_results = classifier.evaluate(input_fn=train_eval_input_fn) train_acc = eval_results['accuracy'] train_loss = eval_results['loss'] if not silent: print('Train accuracy is: %.3f' % (train_acc)) eval_results = classifier.evaluate(input_fn=test_eval_input_fn) test_acc = eval_results['accuracy'] if not silent: print('Test accuracy is: %.3f' % (test_acc)) predictions = classifier.predict(input_fn=pred_input_fn) pred_y, pred_scores = get_predictions(predictions) return classifier, pred_y, pred_scores, train_loss, train_acc, test_acc