def single_release_comp(sigma_1, sigma_2=None, delta=1e-5): """ input arguments """ acct = rdp_acct.anaRDPacct() acct.compose_subsampled_mechanism(lambda x: rdp_bank.RDP_gaussian({'sigma': sigma_1}, x), prob=1.) if sigma_2 is not None: acct.compose_subsampled_mechanism(lambda x: rdp_bank.RDP_gaussian({'sigma': sigma_2}, x), prob=1.) print("Privacy loss is", acct.get_eps(delta))
def CGF_func(sigma1, sigma2, sigma3, sigma4, num_Clust, num_iter_EM): # gaussian 1 and 2 are for the discrimintor update (i.e., two terms for applying DP-SGD) func_gaussian_1 = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma1}, x) func_gaussian_2 = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma2}, x) # gaussian 3 and 4 are for EM updates for MoG func_gaussian_3 = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma3}, x) func_gaussian_4 = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma4}, x) func = lambda x: func_gaussian_1(x) + func_gaussian_2(x) + num_Clust*num_iter_EM*(func_gaussian_3(x) + func_gaussian_4(x)) return func
def __init__(self, sigma, name='Gaussian', RDP_off=False, approxDP_off=False, fdp_off=True, use_basic_RDP_to_approxDP_conversion=False, use_fDP_based_RDP_to_approxDP_conversion=False): # the sigma parameter is the std of the noise divide by the l2 sensitivity Mechanism.__init__(self) self.name = name # When composing self.params = {'sigma': sigma} # This will be useful for the Calibrator # TODO: should a generic unspecified mechanism have a name and a param dictionary? self.delta0 = 0 if not RDP_off: new_rdp = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x) if use_fDP_based_RDP_to_approxDP_conversion: # This setting is slightly more complex, which involves converting RDP to fDP, # then to eps-delta-DP via the duality self.propagate_updates(new_rdp, 'RDP', fDP_based_conversion=True) elif use_basic_RDP_to_approxDP_conversion: self.propagate_updates(new_rdp, 'RDP', BBGHS_conversion=False) else: # This is the default setting with fast computation of RDP to approx-DP self.propagate_updates(new_rdp, 'RDP') if not approxDP_off: # Direct implementation of approxDP new_approxdp = lambda x: dp_bank.get_eps_ana_gaussian(sigma, x) self.propagate_updates(new_approxdp,'approxDP_func') if not fdp_off: # Direct implementation of fDP fun1 = lambda x: fdp_bank.log_one_minus_fdp_gaussian({'sigma': sigma}, x) fun2 = lambda x: fdp_bank.log_neg_fdp_grad_gaussian({'sigma': sigma}, x) self.propagate_updates([fun1,fun2],'fDP_and_grad_log') # overwrite the fdp computation with the direct computation self.fdp = lambda x: fdp_bank.fDP_gaussian({'sigma': sigma}, x)
def conservative_analysis(): """ input arguments """ # (1) privacy parameters for four types of Gaussian mechanisms sigma = 10. # (2) desired delta level delta = 1e-5 n_epochs = 10 # 5 for DP-MERF and 17 for DP-MERF+AE batch_size = 64 # the same across experiments acct = rdp_acct.anaRDPacct() n_data_by_class = [ 5923, 6742, 5958, 6131, 5842, 5421, 5918, 6265, 5851, 5949 ] start_time = time.time() subset_count = 0 for n_data in n_data_by_class: steps_per_epoch = int(np.ceil(n_data / batch_size)) n_steps = steps_per_epoch * n_epochs sampling_rate = batch_size / n_data epoch_last_batch_size = n_data % batch_size epoch_last_sampling_rate = epoch_last_batch_size / n_data # old_time = start_time old_time = time.time() for i in range(1, n_steps + 1): sampling_rate_i = epoch_last_sampling_rate if i % steps_per_epoch == 0 else sampling_rate acct.compose_subsampled_mechanism( lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x), sampling_rate_i) if i % steps_per_epoch == 0: new_time = time.time() epochs_done = i // steps_per_epoch t_used = new_time - old_time t_total = new_time - start_time t_total_min = t_total / 60 print( f'Epoch {epochs_done} done - Time used: {t_used:.2f}, Total: {t_total:.2f} ({t_total_min:.2f} minutes)' ) old_time = new_time if i == n_steps: pre_eps_time = time.time() subset_count += 1 print("[", i, "]Privacy loss is", (acct.get_eps(delta))) post_eps_time = time.time() print('time to get_eps: ', post_eps_time - pre_eps_time) old_time = post_eps_time print(f'data subset {subset_count} done')
def main(config): delta = 1e-5 batch_size = config['batchsize'] prob = 1. / config['num_discriminators'] # subsampling rate n_steps = config['iterations'] # training iterations sigma = 0.4859#config['noise_multiplier'] # noise scale func = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x) acct = rdp_acct.anaRDPacct() acct.compose_subsampled_mechanism(func, prob, coeff=n_steps * batch_size) epsilon = acct.get_eps(delta) print("Privacy cost is: epsilon={}, delta={}".format(epsilon, delta))
def main(): """ input arguments """ # (1) privacy parameters for four types of Gaussian mechanisms sigma = 1.2 # (2) desired delta level delta = 1e-5 # (5) number of training steps n_epochs = 10 # 5 for DP-MERF and 17 for DP-MERF+AE batch_size = 64 # the same across experiments dataset = "intrusion" if dataset == "epileptic": n_data = 8049 elif dataset == "isolet": n_data = 4366 elif dataset == "adult": n_data = 11077 elif dataset == "census": n_data = 199523 elif dataset == "cervical": n_data = 753 elif dataset == "credit": n_data = 2668 elif dataset == "intrusion": n_data = 394021 elif dataset == "covtype": n_data = 9217 steps_per_epoch = n_data // batch_size n_steps = steps_per_epoch * n_epochs # n_steps = 1 # (6) sampling rate prob = batch_size / n_data # prob = 1 """ end of input arguments """ """ now use autodp to calculate the cumulative privacy loss """ # declare the moment accountants acct = rdp_acct.anaRDPacct() eps_seq = [] for i in range(1, n_steps + 1): acct.compose_subsampled_mechanism( lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x), prob) if i % steps_per_epoch == 0 or i == n_steps: eps_seq.append(acct.get_eps(delta)) print("[", i, "]Privacy loss is", (eps_seq[-1]))
def conservative_analysis_syn2d(sigma, delta, n_epochs, batch_size, n_data_per_class, n_classes, print_intermediate_results): """ input arguments """ # (2) desired delta level # delta = 1e-5 # n_epochs = 20 # batch_size = 256 acct = rdp_acct.anaRDPacct() n_data_by_class = [n_data_per_class] * n_classes start_time = time.time() subset_count = 0 for model_idx, n_data in enumerate(n_data_by_class): steps_per_epoch = int(np.ceil(n_data / batch_size)) n_steps = steps_per_epoch * n_epochs sampling_rate = batch_size / n_data epoch_last_batch_size = n_data % batch_size epoch_last_sampling_rate = epoch_last_batch_size / n_data # old_time = start_time old_time = time.time() for i in range(1, n_steps + 1): sampling_rate_i = epoch_last_sampling_rate if i % steps_per_epoch == 0 else sampling_rate acct.compose_subsampled_mechanism( lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x), sampling_rate_i) if i % steps_per_epoch == 0: new_time = time.time() epochs_done = i // steps_per_epoch t_used = new_time - old_time t_total = new_time - start_time t_total_min = t_total / 60 print( f'Epoch {epochs_done} done - Time used: {t_used:.2f}, Total: {t_total:.2f} ({t_total_min:.2f} minutes)' ) old_time = new_time if i == n_steps and (print_intermediate_results or model_idx + 1 == len(n_data_by_class)): pre_eps_time = time.time() subset_count += 1 print("[", i, "]Privacy loss is", (acct.get_eps(delta))) post_eps_time = time.time() print(f'time to get_eps: {post_eps_time - pre_eps_time:.2f}') old_time = post_eps_time print(f'data subset {subset_count} done')
def __init__(self, sigma=None, name='Gaussian'): # the sigma parameter is the std of the noise divide by the l2 sensitivity Mechanism.__init__(self) self.name = name # When composing self.params = {'sigma': sigma} # This will be useful for the Calibrator self.delta0 = 0 if sigma is not None: new_rdp = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x) self.propagate_updates(new_rdp, 'RDP') # Overwrite the approxDP and fDP with their direct computation self.approxDP = lambda x: dp_bank.get_eps_ana_gaussian(sigma, x) self.fDP = lambda x: fdp_bank.fDP_gaussian({'sigma': sigma}, x)
def direct_readout(ar): delta = 1e-5 batch_size = ar.batchsize prob = 1. / ar.num_discriminators # subsampling rate n_steps = ar.iterations # training iterations print(n_steps, batch_size, prob) sigma = ar.noise_multiplier # noise scale func = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x) acct = rdp_acct.anaRDPacct() acct.compose_subsampled_mechanism(func, prob, coeff=n_steps * batch_size) epsilon = acct.get_eps(delta) print("Privacy cost is: epsilon={}, delta={}".format(epsilon, delta))
def __init__(self, hyperparams, net, params, loss_func, model_ctx, accountant): self._hyperparams = hyperparams # Store network and parameter info self._net = net self._params = params self._loss_func = loss_func self._model_ctx = model_ctx # Store privacy info self._accountant = accountant # self._cgf_func = lambda x: cgfbank.CGF_gaussian({'sigma': self._hyperparams['z']}, x) self._cgf_func = lambda x: rdp_bank.RDP_gaussian( {'sigma': self._hyperparams['z']}, x) # Keep track of the number of steps (i.e., # of updates to the params vector) self._step = 0 # Use a batch_size that fits in GPU memory self._batch_size = self._compute_good_batch_size()
import network #from utils import Hamming_Score as hamming_accuracy import os from dataset_loader import ImageDataset import aggregation import autodp from autodp import rdp_bank, dp_acct, rdp_acct, privacy_calibrator #from utils import Hamming_Score as hamming_accuracy from utils import hamming_precision as hamming_accuracy from knn_attribute import tau_limit import sys sys.path.append('../dataset/duke') from datafolder.folder import Test_Dataset nb_teachers = config.nb_teachers acct = rdp_acct.anaRDPacct() gaussian = lambda x: rdp_bank.RDP_gaussian( {'sigma': int(config.gau_scale / config.tau)}, x) #acct.compose_mechanism(gaussian,coeff=config.tau*config.stdnt_share) #print('privacy loss', acct.get_eps(config.delta)) dataset_dict = { 'market': 'Market-1501', 'duke': 'DukeMTMC-reID', } def ensemble_preds(nb_teachers, stdnt_data): """ Given a dataset, a number of teachers, and some input data, this helper function queries each teacher for predictions on the data and returns all predictions in a single array. (That can then be aggregated into one single prediction per input using aggregation.py (cf. function prepare_student_data() below)
tf.flags.DEFINE_integer('stdnt_share', 1000, 'Student share (last index) of the test data') tf.flags.DEFINE_integer('extra', 0, 'remove extra samples from training to test') tf.flags.DEFINE_bool('pca', True, 'if true then apply pca as preprocessing') tf.flags.DEFINE_bool('knn', 1, 'if 1 then replace dnn with knn') tf.flags.DEFINE_bool('vat', False, 'whether use vat to lable query, only use after vat') tf.flags.DEFINE_boolean('deeper', False, 'Activate deeper CNN model') FLAGS = tf.flags.FLAGS prob = 0.2 # subsample probability for i acct = rdp_acct.anaRDPacct() delta = 1e-8 sigma = FLAGS.gau_scale #gaussian parameter gaussian = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x) def convert_vat(test_data, test_labels, noisy_labels): log = {} log['labeled_train_images'] = test_data[:FLAGS.stdnt_share] log['labeled_train_labels'] = noisy_labels log['train_images'] = test_data[FLAGS.stdnt_share:-1000] log['train_labels'] = test_labels[FLAGS.stdnt_share:-1000] #use the remaining 1000 point for test log['test_images'] = test_data[:-1000] print('test_images.size', log['test_images'].shape) log['test_labels'] = test_labels[:-1000] file_vat = "../vat_tf/log/" + FLAGS.dataset + '_query=' + str( FLAGS.stdnt_share) + '.pkl'
def get_eps_gaussian(sigma, delta): """ This function calculates the eps for Gaussian Mech given sigma and delta""" assert (delta >= 0) func = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x) return get_eps_rdp(func, delta)
def __init__(self, sigma, name='Gaussian', RDP_off=False, approxDP_off=False, fdp_off=True, use_basic_RDP_to_approxDP_conversion=False, use_fDP_based_RDP_to_approxDP_conversion=False, phi_off=True): """ sigma: the std of the noise divide by the l2 sensitivity. coeff: the number of composition RDP_off: if False, then we characterize the mechanism using RDP. fdp_off: if False, then we characterize the mechanism using fdp. phi_off: if False, then we characterize the mechanism using phi-function. """ Mechanism.__init__(self) self.name = name # When composing self.params = {'sigma': sigma} # This will be useful for the Calibrator # TODO: should a generic unspecified mechanism have a name and a param dictionary? self.delta0 = 0 if not phi_off: """ Apply phi function to analyze Gaussian mechanism. the CDF of privacy loss R.V. is computed using an integration (see details in cdf_bank) through Levy Theorem. If self.exactPhi = True, the algorithm provides an exact characterization. """ self.exactPhi = True log_phi = lambda x: phi_bank.phi_gaussian({'sigma': sigma}, x) self.log_phi_p = self.log_phi_q = log_phi # self.cdf tracks the cdf of log(p/q) and the cdf of log(q/p). self.propagate_updates((log_phi, log_phi), 'log_phi') """ Moreover, we know the closed-form expression of the CDF of the privacy loss RV privacy loss RV distribution l=log(p/q) ~ N(1/2\sigma^2, 1/sigma^2) We can also use the following closed-form cdf directly. """ #sigma = sigma*1.0/np.sqrt(coeff) #mean = 1.0 / (2.0 * sigma ** 2) #std = 1.0 / (sigma) #cdf = lambda x: norm.cdf((x - mean) / std) #self.propagate_updates(cdf, 'cdf', take_log=True) if not RDP_off: new_rdp = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x) if use_fDP_based_RDP_to_approxDP_conversion: # This setting is slightly more complex, which involves converting RDP to fDP, # then to eps-delta-DP via the duality self.propagate_updates(new_rdp, 'RDP', fDP_based_conversion=True) elif use_basic_RDP_to_approxDP_conversion: self.propagate_updates(new_rdp, 'RDP', BBGHS_conversion=False) else: # This is the default setting with fast computation of RDP to approx-DP self.propagate_updates(new_rdp, 'RDP') if not approxDP_off: # Direct implementation of approxDP new_approxdp = lambda x: dp_bank.get_eps_ana_gaussian(sigma, x) self.propagate_updates(new_approxdp,'approxDP_func') if not fdp_off: # Direct implementation of fDP fun1 = lambda x: fdp_bank.log_one_minus_fdp_gaussian({'sigma': sigma}, x) fun2 = lambda x: fdp_bank.log_neg_fdp_grad_gaussian({'sigma': sigma}, x) self.propagate_updates([fun1,fun2],'fDP_and_grad_log') # overwrite the fdp computation with the direct computation self.fdp = lambda x: fdp_bank.fDP_gaussian({'sigma': sigma}, x)
def calibrate_epsilon(params, delta): #lemma_8 # We use approximate-CDP for the composition, and then calculate the \epsilon parameters as a function of \delta # Input 'params' should contain the following fields # params['config'] keeps the integer denoting which configuration it is # params['eps_sigma'] keeps the epsilon parameter used by the Laplace mechanism when releasing M2's eigenvalue # params['delta_sigma'] denotes the failure probability for the high-probability upper bound of LS # params['eps_gamma'] and params['delta_gamma'] are similarly for M3's eigenvalue # params['Gaussian'] contains a list of tuples each containing (sensitivity, variance) # this is because each config often release more than one quantities config = params['config'] eps_edge_dist = params['eps_dist'] acct = rdp_acct.anaRDPacct() if not config: return 0 delta0 = 0 if config == 'config4': eps_e9 = eps_edge_dist['e9'] eps_sigma = eps_e9 / 4 eps_gamma = eps_e9 / 4 delta_sigma = delta / 4 delta_gamma = delta / 4 delta0 = delta_sigma + delta_gamma acct.compose_mechanism( lambda x: rdp_bank.RDP_pureDP({'eps': eps_sigma}, x)) acct.compose_mechanism( lambda x: rdp_bank.RDP_pureDP({'eps': eps_gamma}, x)) if config == 'config3': eps_e7 = eps_edge_dist['e7'] eps_sigma = eps_e7 / 3 eps_gamma = eps_e7 / 3 delta_sigma = delta / 3 delta_gamma = delta / 3 delta0 = delta_sigma + delta_gamma acct.compose_mechanism( lambda x: rdp_bank.RDP_pureDP({'eps': eps_sigma}, x)) acct.compose_mechanism( lambda x: rdp_bank.RDP_pureDP({'eps': eps_gamma}, x)) if config == 'config2': eps_e6 = eps_edge_dist['e6'] eps_sigma = eps_e6 / 2 delta_sigma = delta / 2 delta0 = delta_sigma acct.compose_mechanism( lambda x: rdp_bank.RDP_pureDP({'eps': eps_sigma}, x)) print('delta0:', delta0) if delta0 >= delta: return np.inf for sensitivity, variance in params['gaussian']: ## often we pre-emptively calculate sensitivities, ## so they might not be zero in places where we aren;t adding noise. ## variance provides a better check for this. if sensitivity == 0 or variance == 0: continue std = np.sqrt(variance) # CDP of gaussian mechanism conditioning on the event is the same as its RDP. acct.compose_mechanism(lambda x: rdp_bank.RDP_gaussian( {'sigma': std / max(sensitivity, np.finfo(np.float32).eps)}, x)) # This privacy calcluation follows from Lemma 8.8 of Bun et al. (2016) https://arxiv.org/pdf/1605.02065.pdf return acct.get_eps((delta - delta0) / (1 - delta0))
loss_fun = loss_fun * i / (i + 1) + nd.mean(loss).asscalar() / (i + 1) return acc.get()[1], loss_fun # ## Now let's try attaching a privacy accountant to this data set # declare a moment accountant from pydiffpriv DPobject = rdp_acct.anaRDPacct() # Specify privacy specific inputs thresh = 4.0 # limit the norm of individual gradient sigma = thresh delta = 1e-5 func = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma / thresh}, x) # ## We now specify the parameters needed for learning # epochs = 10 learning_rate = .1 n = train_data.num_data batchsz = 100 # count = 0 niter = 0 moving_loss = 0 grads = dpdl_utils.initialize_grad(params, ctx=ctx)