コード例 #1
0
def main(argv):
    del argv  # argv is not used.

    assert FLAGS.N is not None, 'Flag N is missing.'
    assert FLAGS.batch_size is not None, 'Flag batch_size is missing.'
    assert FLAGS.noise_multiplier is not None, 'Flag noise_multiplier is missing.'
    assert FLAGS.epochs is not None, 'Flag epochs is missing.'
    compute_dp_sgd_privacy(FLAGS.N, FLAGS.batch_size, FLAGS.noise_multiplier,
                           FLAGS.epochs, FLAGS.delta)
コード例 #2
0
 def test_compute_dpsgd_noise_multiplier(self, num_train, epsilon, delta,
                                         epochs, batch_size, tolerance):
     noise_multiplier = multinomial_logistic.compute_dpsgd_noise_multiplier(
         num_train, epsilon, delta, epochs, batch_size, tolerance)
     epsilon_lower_bound = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy(
         num_train, batch_size, noise_multiplier + tolerance, epochs,
         delta)[0]
     epsilon_upper_bound = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy(
         num_train, batch_size, noise_multiplier - tolerance, epochs,
         delta)[0]
     self.assertLess(epsilon_lower_bound, epsilon)
     self.assertLess(epsilon, epsilon_upper_bound)
コード例 #3
0
    def test_compute_dp_sgd_privacy(self, n, batch_size, noise_multiplier,
                                    epochs, delta, expected_eps,
                                    expected_order):
        eps, order = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy(
            n, batch_size, noise_multiplier, epochs, delta)
        self.assertAlmostEqual(eps, expected_eps)
        self.assertEqual(order, expected_order)

        # We perform an additional sanity check on the hard-coded test values.
        # We do a back-of-the-envelope calculation to obtain a lower bound.
        # Specifically, we make the approximation that subsampling a q-fraction is
        # equivalent to multiplying noise scale by 1/q.
        # This is only an approximation, but can be justified by the central limit
        # theorem in the Gaussian Differential Privacy framework; see
        # https://arxiv.org/1911.11607
        # The approximation error is one-sided and provides a lower bound, which is
        # the basis of this sanity check. This is confirmed in the above paper.
        q = batch_size / n
        steps = epochs * n / batch_size
        sigma = noise_multiplier * math.sqrt(steps) / q
        # We compute the optimal guarantee for Gaussian
        # using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2).
        low_delta = .5 * math.erfc((eps * sigma - .5 / sigma) / math.sqrt(2))
        if eps < 100:  # Skip this if it causes overflow; error is minor.
            low_delta -= math.exp(eps) * .5 * math.erfc(
                (eps * sigma + .5 / sigma) / math.sqrt(2))
        self.assertLessEqual(low_delta, delta)
コード例 #4
0
 def test_compute_dp_sgd_privacy(self, n, batch_size, noise_multiplier,
                                 epochs, delta, expected_eps,
                                 expected_order):
     eps, order = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy(
         n, batch_size, noise_multiplier, epochs, delta)
     self.assertAlmostEqual(eps, expected_eps)
     self.assertAlmostEqual(order, expected_order)
コード例 #5
0
def main(unused_argv):
    logging.set_verbosity(logging.INFO)
    if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
        raise ValueError(
            'Number of microbatches should divide evenly batch_size')

    # Instantiate the tf.Estimator.
    run_config = tf.estimator.tpu.RunConfig(master=FLAGS.master)
    mnist_classifier = tf.estimator.tpu.TPUEstimator(
        train_batch_size=FLAGS.batch_size,
        eval_batch_size=FLAGS.batch_size,
        model_fn=cnn_model_fn,
        model_dir=FLAGS.model_dir,
        config=run_config)

    # Training loop.
    steps_per_epoch = 60000 // FLAGS.batch_size
    eval_steps_per_epoch = 10000 // FLAGS.batch_size
    for epoch in range(1, FLAGS.epochs + 1):
        start_time = time.time()
        # Train the model for one epoch.
        mnist_classifier.train(input_fn=common.make_input_fn('train',
                                                             FLAGS.batch_size /
                                                             FLAGS.cores,
                                                             tpu=True),
                               steps=steps_per_epoch)
        end_time = time.time()
        logging.info('Epoch %d time in seconds: %.2f', epoch,
                     end_time - start_time)

        # Evaluate the model and print results
        eval_results = mnist_classifier.evaluate(input_fn=common.make_input_fn(
            'test', FLAGS.batch_size / FLAGS.cores, 1, tpu=True),
                                                 steps=eval_steps_per_epoch)
        test_accuracy = eval_results['accuracy']
        print('Test accuracy after %d epochs is: %.3f' %
              (epoch, test_accuracy))

        # Compute the privacy budget expended.
        if FLAGS.dpsgd:
            if FLAGS.noise_multiplier > 0.0:
                # Due to the nature of Gaussian noise, the actual noise applied is
                # equal to FLAGS.noise_multiplier * sqrt(number of cores).
                eps, _ = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy(
                    60000, FLAGS.batch_size,
                    FLAGS.noise_multiplier * math.sqrt(FLAGS.cores), epoch,
                    1e-5)
                print('For delta=1e-5, the current epsilon is: %.2f' % eps)
            else:
                print('Trained with DP-SGD but with zero noise.')
        else:
            print('Trained with vanilla non-private SGD optimizer')
コード例 #6
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    # Load training and test data.
    train_data, train_labels, test_data, test_labels = load_mnist()

    # Instantiate the tf.Estimator.
    mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn)

    # Create tf.Estimator input functions for the training and test data.
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'x': train_data},
        y=train_labels,
        batch_size=FLAGS.batch_size,
        num_epochs=FLAGS.epochs,
        shuffle=True)
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': test_data},
                                                       y=test_labels,
                                                       num_epochs=1,
                                                       shuffle=False)

    # Training loop.
    steps_per_epoch = 60000 // FLAGS.batch_size
    for epoch in range(1, FLAGS.epochs + 1):
        start_time = time.time()
        # Train the model for one epoch.
        mnist_classifier.train(input_fn=train_input_fn, steps=steps_per_epoch)
        end_time = time.time()
        print('Epoch %d time in seconds: %.2f', epoch, end_time - start_time)

        # Evaluate the model and print results
        eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
        test_accuracy = eval_results['accuracy']
        print('Test accuracy after %d epochs is: %.3f' %
              (epoch, test_accuracy))

        # Compute the privacy budget expended.
        if FLAGS.dpsgd:
            if FLAGS.noise_multiplier > 0.0:
                eps, _ = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy(
                    60000, FLAGS.batch_size, FLAGS.noise_multiplier, epoch,
                    1e-5)
                print('For delta=1e-5, the current epsilon is: %.2f' % eps)
            else:
                print('Trained with DP-SGD but with zero noise.')
        else:
            print('Trained with vanilla non-private SGD optimizer')
コード例 #7
0
def main(unused_argv):
    logging.set_verbosity(logging.INFO)
    if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
        raise ValueError(
            'Number of microbatches should divide evenly batch_size')

    # Instantiate the tf.Estimator.
    mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,
                                              model_dir=FLAGS.model_dir)

    # Training loop.
    steps_per_epoch = 60000 // FLAGS.batch_size
    for epoch in range(1, FLAGS.epochs + 1):
        start_time = time.time()
        # Train the model for one epoch.
        mnist_classifier.train(input_fn=common.make_input_fn(
            'train', FLAGS.batch_size),
                               steps=steps_per_epoch)
        end_time = time.time()
        logging.info('Epoch %d time in seconds: %.2f', epoch,
                     end_time - start_time)

        # Evaluate the model and print results
        eval_results = mnist_classifier.evaluate(
            input_fn=common.make_input_fn('test', FLAGS.batch_size, 1))
        test_accuracy = eval_results['accuracy']
        print('Test accuracy after %d epochs is: %.3f' %
              (epoch, test_accuracy))

        # Compute the privacy budget expended.
        if FLAGS.dpsgd:
            if FLAGS.noise_multiplier > 0.0:
                eps, _ = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy(
                    60000, FLAGS.batch_size, FLAGS.noise_multiplier, epoch,
                    1e-5)
                print('For delta=1e-5, the current epsilon is: %.2f' % eps)
            else:
                print('Trained with DP-SGD but with zero noise.')
        else:
            print('Trained with vanilla non-private SGD optimizer')
コード例 #8
0
 def _func(x):
     result = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy(
         num_train, batch_size, x, epochs, delta)
     return result[0]
コード例 #9
0
    def train(self, feature_network_checkpoint_path=None, restore=False):
        tf.global_variables_initializer().run()
        if restore is True:
            restore_global_id = self.load()
            print("Loaded from global_id {}".format(restore_global_id))
        else:
            restore_global_id = -1

        if feature_network_checkpoint_path is not None:
            # feature
            variables = tf.get_collection(
                tf.GraphKeys.GLOBAL_VARIABLES,
                scope=self.generator.scope_name + "/feature")
            print(variables)
            saver = tf.train.Saver(variables)
            saver.restore(self.sess, feature_network_checkpoint_path)

            # min max
            variables = tf.get_collection(
                tf.GraphKeys.GLOBAL_VARIABLES,
                scope=self.generator.scope_name + "/attribute_addi")
            print(variables)
            if len(variables) > 0:
                saver = tf.train.Saver(variables)
                saver.restore(self.sess, feature_network_checkpoint_path)

        self.summary_writer = tf.summary.FileWriter(
            self.checkpoint_dir, self.sess.graph)

        batch_num = self.data_feature.shape[0] // self.batch_size

        global_id = 0

        if (self.dp_noise_multiplier is not None and
                self.dp_l2_norm_clip is not None):
            if self.attr_discriminator:
                # The effective noise multiplier for DP guarantee is
                # 0.5*noise_multiplier because each batch of data is utilized
                # twice for the two discriminators.
                noise_multiplier = self.dp_noise_multiplier * 0.5
            else:
                noise_multiplier = self.dp_noise_multiplier
            print("Using DP training")
            print("The final DP parameters will be:")
            compute_dp_sgd_privacy(
                self.data_feature.shape[0],
                self.batch_size,
                noise_multiplier,
                self.epoch,
                self.dp_delta)

        for epoch_id in tqdm(range(self.epoch)):
            data_id = np.random.choice(
                self.data_feature.shape[0],
                size=(self.data_feature.shape[0], self.num_packing))

            if global_id > restore_global_id:
                if ((epoch_id + 1) % self.epoch_checkpoint_freq == 0 or
                        epoch_id == self.epoch - 1):
                    with open(self.time_path, "a") as f:
                        time = datetime.datetime.now().strftime(
                            '%Y-%m-%d %H:%M:%S.%f')
                        f.write("epoch {} starts: {}\n".format(epoch_id, time))

            for batch_id in range(batch_num):
                feed_dict = {}
                for i in range(self.num_packing):
                    batch_data_id = data_id[batch_id * self.batch_size:
                                            (batch_id + 1) * self.batch_size,
                                            i]
                    batch_data_feature = self.data_feature[batch_data_id]
                    batch_data_attribute = self.data_attribute[batch_data_id]

                    batch_real_attribute_input_noise = \
                        self.gen_attribute_input_noise(self.batch_size)
                    batch_addi_attribute_input_noise = \
                        self.gen_attribute_input_noise(self.batch_size)
                    batch_feature_input_noise = \
                        self.gen_feature_input_noise(
                            self.batch_size, self.sample_time)
                    batch_feature_input_data = \
                        self.gen_feature_input_data_free(self.batch_size)

                    feed_dict[self.real_feature_pl_l[i]] = \
                        batch_data_feature
                    feed_dict[self.real_attribute_pl_l[i]] = \
                        batch_data_attribute
                    feed_dict[self.
                              g_real_attribute_input_noise_train_pl_l[i]] = \
                        batch_real_attribute_input_noise
                    feed_dict[self.
                              g_addi_attribute_input_noise_train_pl_l[i]] = \
                        batch_addi_attribute_input_noise
                    feed_dict[self.g_feature_input_noise_train_pl_l[i]] = \
                        batch_feature_input_noise
                    feed_dict[self.g_feature_input_data_train_pl_l[i]] = \
                        batch_feature_input_data

                if global_id > restore_global_id:
                    for _ in range(self.d_rounds - 1):
                        self.sess.run(self.d_op, feed_dict=feed_dict)
                        if self.attr_discriminator is not None:
                            self.sess.run(self.attr_d_op, feed_dict=feed_dict)
                    summary_result, _ = self.sess.run(
                        [self.d_summary, self.d_op],
                        feed_dict=feed_dict)
                    self.summary_writer.add_summary(summary_result, global_id)
                    if self.attr_discriminator is not None:
                        summary_result, _ = self.sess.run(
                            [self.attr_d_summary, self.attr_d_op],
                            feed_dict=feed_dict)
                        self.summary_writer.add_summary(
                            summary_result, global_id)

                    for _ in range(self.g_rounds - 1):
                        self.sess.run(self.g_op, feed_dict=feed_dict)
                    summary_result, _ = self.sess.run(
                        [self.g_summary, self.g_op],
                        feed_dict=feed_dict)
                    self.summary_writer.add_summary(summary_result, global_id)

                    if (batch_id + 1) % self.vis_freq == 0:
                        self.visualize(epoch_id, batch_id, global_id)

                global_id += 1

            if global_id - 1 > restore_global_id:
                if ((epoch_id + 1) % self.epoch_checkpoint_freq == 0 or
                        epoch_id == self.epoch - 1):
                    self.visualize(epoch_id, -1, global_id - 1)
                    self.save(global_id - 1)
                    with open(self.time_path, "a") as f:
                        time = datetime.datetime.now().strftime(
                            '%Y-%m-%d %H:%M:%S.%f')
                        f.write("epoch {} ends: {}\n".format(epoch_id, time))

                if (epoch_id + 1) % self.extra_checkpoint_freq == 0:
                    saver = tf.train.Saver()
                    checkpoint_dir = os.path.join(
                        self.checkpoint_dir,
                        "epoch_id-{}".format(epoch_id))
                    self.save(global_id - 1, saver, checkpoint_dir)
コード例 #10
0
dpsgd_subopt_params = {}
dpsgd_subopt_params["num_epochs"] = 20
dpsgd_subopt_params["clip_norm"] = 1e-3
dpsgd_subopt_params["learning_rate"] = 0.1
dpsgd_subopt_params["batch_size"] = 64
dpsgd_subopt_params["noise_multiplier"] = 1.150390625

# set remaining parameters
eps = np.log(3)
delta = 1e-5
m_range = np.linspace(1000, 2000, 5)
num_trials = 3

# verify dpsgd hyperparameters
compute_dp_sgd_privacy(len(synthetic_x), dpsgd_opt_params["batch_size"],
                       dpsgd_opt_params["noise_multiplier"],
                       dpsgd_opt_params["num_epochs"], delta)
compute_dp_sgd_privacy(len(synthetic_x), dpsgd_subopt_params["batch_size"],
                       dpsgd_subopt_params["noise_multiplier"],
                       dpsgd_subopt_params["num_epochs"], delta)

# run r2 experiments and store r2 quantiles and times
results = experiment.run_trials(synthetic_x, synthetic_y, eps, delta, m_range,
                                dpsgd_opt_params, dpsgd_subopt_params,
                                num_trials)

# plot r2 experiments results
experiment.plot_r2(results[0], results[1], results[2], m_range, num_trials,
                   "synthetic_r2")
experiment.plot_time(results[3], m_range, num_trials, "synthetic_time", True)