def call_sanitize_grouped(self, px_grads, var_list, iteration): sanitized_grads = [] clipped_grads = [] index = 0 for px_grad, v in zip(px_grads, var_list): mul_l2norm_bound = 1 tensor_name = utils.GetTensorOpName(v) #if (tensor_name[-1] == 'b'): # mul_l2norm_bound *= 1 #mul_l2norm_bound *= int(tensor_name[14]) privacy_multiplier = tf.add( tf.multiply( tf.mod(tf.add(iteration, tf.constant(index, tf.float32)), tf.constant(12.0, tf.float32)), tf.constant(0.05, tf.float32)), tf.constant(1.0)) #tf.add(tf.subtract(iteration,iteration),tf.constant(1,tf.float32)) curr_sigma = self._sigma * privacy_multiplier #* 0.4 * ((curr_iteration + index) % 6) #self._iteration#tf.constant(1.0,tf.float32)#self._iteration#* (5-int(tensor_name[14])) mul_l2norm_bound /= tf.multiply(privacy_multiplier, tf.constant(2.0, tf.float32)) index += 1 sanitized_grad, clipped_grad = self._sanitizer.sanitize_grouped( px_grad, self._eps_delta, sigma=curr_sigma, tensor_name=tensor_name, add_noise=True, num_examples=self._batches_per_lot * tf.slice(tf.shape(px_grad), [0], [1]), mul_l2norm_bound=mul_l2norm_bound ) # remove l2norm_inv to come back to clipping on each layer sanitized_grads.append(sanitized_grad) clipped_grads.append(clipped_grad) return sanitized_grads, clipped_grads
def compute_sanitized_gradients(self, loss, var_list=None, add_noise=True): """Compute the sanitized gradients. Args: loss: the loss tensor. var_list: the optional variables. add_noise: if true, then add noise. Always clip. Returns: a pair of (list of sanitized gradients) and privacy spending accumulation operations. Raises: TypeError: if var_list contains non-variable. """ self._assert_valid_dtypes([loss]) xs = [tf.convert_to_tensor(x) for x in var_list] px_grads = per_example_gradients.PerExampleGradients(loss, xs) sanitized_grads = [] for px_grad, v in zip(px_grads, var_list): tensor_name = utils.GetTensorOpName(v) sanitized_grad = self._sanitizer.sanitize( px_grad, self._eps_delta, sigma=self._sigma, tensor_name=tensor_name, add_noise=add_noise, num_examples=self._batches_per_lot * tf.slice(tf.shape(px_grad), [0], [1])) sanitized_grads.append(sanitized_grad) return sanitized_grads
def __init__(self, learning_rate, eps_delta, sanitizer, sigma=None, accountant_sigma=None, use_locking=False, name="DPGradientDescent", batches_per_lot=1, is_sigma_layerwised=False, is_sigma_data_dependent=False): """Construct a differentially private gradient descent optimizer. The optimizer uses fixed privacy budget for each batch of training. Args: learning_rate: for GradientDescentOptimizer. eps_delta: EpsDelta pair for each epoch. sanitizer: for sanitizing the graident. sigma: noise sigma. If None, use eps_delta pair to compute sigma; otherwise use supplied sigma directly. use_locking: use locking. name: name for the object. batches_per_lot: Number of batches in a lot. """ super(DPGradientDescentOptimizer, self).__init__(learning_rate, use_locking, name) # Also, if needed, define the gradient accumulators self._batches_per_lot = batches_per_lot self._is_sigma_layerwised = is_sigma_layerwised self._is_sigma_data_dependent = is_sigma_data_dependent self._grad_accum_dict = {} if batches_per_lot > 1: self._batch_count = tf.Variable(1, dtype=tf.int32, trainable=False, name="batch_count") var_list = tf.trainable_variables() with tf.variable_scope("grad_acc_for"): for var in var_list: v_grad_accum = tf.Variable(tf.zeros_like(var), trainable=False, name=utils.GetTensorOpName(var)) self._grad_accum_dict[var.name] = v_grad_accum self._eps_delta = eps_delta self._sanitizer = sanitizer self._sigma = sigma self._act_sigma = accountant_sigma
def compute_sanitized_gradients(self, loss, var_list=None, add_noise=True): """Compute the sanitized gradients. Args: loss: the loss tensor. var_list: the optional variables. add_noise: if true, then add noise. Always clip. Returns: a pair of (list of sanitized gradients) and privacy spending accumulation operations. Raises: TypeError: if var_list contains non-variable. """ self._assert_valid_dtypes([loss]) xs = [tf.convert_to_tensor(x) for x in var_list] # TODO check this change loss_list = tf.unstack(loss, axis=0) px_grads_byexample = [tf.gradients(l, xs) for l in loss_list] px_grads = [[x[v] for x in px_grads_byexample] for v in range(len(xs))] #px_grads = tf.gradients(loss, xs) # add a dummy 0th dimension to reflect the fact that we have a batch size of 1... # px_grads = [tf.expand_dims(x, 0) for x in px_grads] # px_grads = per_example_gradients.PerExampleGradients(loss, xs) sanitized_grads = [] for px_grad, v in zip(px_grads, var_list): tensor_name = utils.GetTensorOpName(v) sanitized_grad = self._sanitizer.sanitize( px_grad, self._eps_delta, sigma=self._sigma, tensor_name=tensor_name, add_noise=add_noise, num_examples=self._batches_per_lot * tf.slice(tf.shape(px_grad), [0], [1])) sanitized_grads.append(sanitized_grad) return sanitized_grads
def call_sanitize_basic( self, px_grads, var_list ): #basic sanitizer with different parameters for bias weights sanitized_grads = [] clipped_grads = [] for px_grad, v in zip(px_grads, var_list): tensor_name = utils.GetTensorOpName(v) if (tensor_name[-1] == 'b'): # and tensor_name[14]=='4'): isBias = True else: isBias = False sanitized_grad, clipped_grad, num_ex = self._sanitizer.sanitize( px_grad, self._eps_delta, sigma=self._sigma, tensor_name=tensor_name, add_noise=True, num_examples=self._batches_per_lot * tf.slice(tf.shape(px_grad), [0], [1]), isBias=isBias ) #remove l2norm_inv to come back to clipping on each layer sanitized_grads.append(sanitized_grad) clipped_grads.append(clipped_grad) return sanitized_grads, clipped_grads, num_ex
def compute_sanitized_gradients(self, loss_list, var_list=None, add_noise=True): """Compute the sanitized gradients. Args: loss_list: the list of loss tensor. var_list: the optional variables. add_noise: if true, then add noise. Always clip. Returns: a pair of (list of sanitized gradients) and privacy spending accumulation operations. Raises: TypeError: if var_list contains non-variable. """ for loss in loss_list: self._assert_valid_dtypes([loss]) xs = [tf.convert_to_tensor(x) for x in var_list] px_grads_per_loss = [] for loss in loss_list: px_grads_per_loss.append( per_example_gradients.PerExampleGradients(loss, xs)) per_loss_px_grads = list(zip(*px_grads_per_loss)) px_grads = [] for grads in per_loss_px_grads: s_ = tf.constant(0.0) for g_ in grads: s_ += g_ px_grads.append(s_) sanitized_grads = [] idx = 0 for px_grad, v in zip(px_grads, var_list): tensor_name = utils.GetTensorOpName(v) if self._is_sigma_layerwised: sig = self._sigma[idx] if self._act_sigma != None: act_sig = self._act_sigma[idx] else: sig = self._sigma if self._act_sigma != None: act_sig = self._act_sigma def no_noise(): act_op = self._sanitizer.sanitize( px_grad, self._eps_delta, sigma=act_sig, tensor_name=tensor_name, is_sigma_scalar=not self._is_sigma_data_dependent, add_noise=add_noise, num_examples=self._batches_per_lot * tf.slice(tf.shape(px_grad), [0], [1])) with tf.control_dependencies([act_op]): sanitized_grad = self._sanitizer.sanitize( px_grad, self._eps_delta, sigma=sig, tensor_name=tensor_name, is_sigma_scalar=not self._is_sigma_data_dependent, add_noise=False, num_examples=self._batches_per_lot * tf.slice(tf.shape(px_grad), [0], [1])) return sanitized_grad def noise(): act_op = self._sanitizer.sanitize( px_grad, self._eps_delta, sigma=act_sig, tensor_name=tensor_name, is_sigma_scalar=not self._is_sigma_data_dependent, add_noise=add_noise, num_examples=self._batches_per_lot * tf.slice(tf.shape(px_grad), [0], [1])) with tf.control_dependencies([act_op]): sanitized_grad = self._sanitizer.sanitize( px_grad, self._eps_delta, sigma=sig, tensor_name=tensor_name, is_sigma_scalar=not self._is_sigma_data_dependent, add_noise=add_noise, no_account=True, num_examples=self._batches_per_lot * tf.slice(tf.shape(px_grad), [0], [1])) return sanitized_grad if self._act_sigma != None: sanitized_grad = tf.cond(tf.equal(sig, tf.constant(0.0)), no_noise, noise) else: sanitized_grad = self._sanitizer.sanitize( px_grad, self._eps_delta, sigma=sig, tensor_name=tensor_name, is_sigma_scalar=not self._is_sigma_data_dependent, add_noise=add_noise, num_examples=self._batches_per_lot * tf.slice(tf.shape(px_grad), [0], [1])) sanitized_grads.append(sanitized_grad) idx += 1 return sanitized_grads
def compute_sanitized_gradients_from_input_perturbation( self, loss, ex, input_sigma, var_list, add_noise=True): """Compute the sanitized gradients. Args: loss: the loss tensor. var_list: the optional variables. add_noise: if true, then add noise. Always clip. eps_delta: [epsilon, delta] input_sigma: input_sigma Returns: a pair of (list of sanitized gradients) and privacy spending accumulation operations. Raises: TypeError: if var_list contains non-variable. """ self._assert_valid_dtypes([loss]) #import pdb; pdb.set_trace() xs = [tf.convert_to_tensor(x) for x in var_list] #import pdb; pdb.set_trace() # Each element in px_grads is the px_grad for a param matrix, having the shape of [batch_size, shape of param matrix] px_grads = per_example_gradients.PerExampleGradients(loss, xs) # calculate sigma, sigma has the shape of [batch_size] px_pp_grads = [] unmasked_sigmas = [] sigmas = [] sanitized_grads = [] num = 0 for px_grad, v in zip(px_grads, var_list): num += 1 if num > FLAGS.ACCOUNT_NUM: break #px_grad = utils.BatchClipByL2norm(px_grad, FLAGS.DP_GRAD_CLIPPING_L2NORM/ FLAGS.BATCH_SIZE) px_grad_vec = tf.reshape( px_grad, [tf.shape(px_grad)[0], -1]) # [batch_size, vec_param] #import pdb; pdb.set_trace() # method 1 px_pp_grad = batch_jacobian( px_grad_vec, ex, use_pfor=False, parallel_iterations=px_grad_vec.get_shape().as_list()[0] * px_grad_vec.get_shape().as_list()[1] ) # [b, vec_param, ex_shape] #px_pp_grad = tf.stop_gradient(px_pp_grad) px_pp_grad = tf.identity( tf.reshape(px_pp_grad, [ px_pp_grad.get_shape().as_list()[0], px_pp_grad.get_shape().as_list()[1], -1 ])) #[b, vec_param, ex_size] px_pp_grads.append(px_pp_grad) #px_pp_cov = tf.identity(tf.matmul(px_pp_grad, px_pp_grad, transpose_b=True, name="mat_{}".format(num))) # [b, vec_param, vec_param] #px_pp_L = tf.identity(tf.linalg.cholesky(px_pp_cov, name="cho_{}".format(num))) # [b, vec_param, vec_param] #px_pp_L_inv = tf.identity(tf.linalg.inv(px_pp_L, name="inv_{}".format(num))) # [b, vec_param, vec_param] #px_pp_sen_norm = tf.identity(tf.norm(px_pp_L_inv, ord="fro", axis=[1, 2], name="fro_{}".format(num))) # [b] #sigma = tf.identity(input_sigma / px_pp_sen_norm) #[batch_size] px_pp_A_norm = tf.norm(px_pp_grad, ord="fro", axis=[1, 2], name="fro_{}".format(num)) px_pp_I_norm = tf.norm(tf.eye(px_pp_grad.get_shape().as_list()[1]), ord="fro", axis=[0, 1], name="fro_{}".format(num)) sigma = input_sigma * px_pp_A_norm / px_pp_I_norm ## ''' px_scale = tf.reduce_sum(tf.square(px_pp_grad), 2) # [batch_size, vec_param] # heterogeneous: each param has different scale scale = tf.reduce_mean(px_scale, 1) # [batch_size] # minimum #scale = tf.reduce_min(px_scale, 1) # [batch_size] sigma = tf.sqrt(scale) * input_sigma #[batch_size] ''' ## unmasked_sigmas.append(sigma) mask = tf.cast( tf.greater_equal(sigma, tf.constant(FLAGS.INPUT_DP_SIGMA_THRESHOLD)), tf.float32) sigma = tf.identity(sigma * mask) sigmas.append(sigma) # tensor_name = utils.GetTensorOpName(v) px_grad = tf.identity(px_grad) sanitized_grad = self._sanitizer.sanitize(px_grad, self._eps_delta, sigma=sigma, tensor_name=tensor_name, add_noise=add_noise, num_examples=tf.slice( tf.shape(px_grad), [0], [1]), no_clipping=False) sanitized_grads.append(sanitized_grad) while num <= len(var_list): sigmas.append(tf.zeros([ex.get_shape().as_list()[0]])) num += 1 return sanitized_grads, sigmas, unmasked_sigmas, ( px_pp_A_norm, px_pp_I_norm ) #(px_pp_grads, px_pp_cov, px_pp_L, px_pp_L_inv, px_pp_sen_norm)
def sanitize_overall(self, px_grads, var_list,eps_delta, option=ClipOption(None, None), num_examples=None, sigma=None, bound_multiplier = 1, add_noise=True,batches_per_lot=None): num_tot_examples = tf.zeros([1],dtype=tf.int32) sanitized_gradient = [] clipped_gradients = [] t_list = [] weights_shapes = [] weights_sizes = [] linear_clipped_weights = [] t2 = [] for px_grad, v in zip(px_grads, var_list): t_list.append(tf.reshape(px_grad, tf.concat(axis=0, values=[tf.slice(tf.shape(px_grad), [0], [1]), [-1]])))#tf.reshape(px_grad, tf.concat(axis=0, values=[tf.slice(tf.shape(px_grad), [0], [1]), [-1]])))#self.compute_overall_bound(px_grad)) l2norm_inv = tf.rsqrt(tf.reduce_sum(t_list[0] * t_list[0], [1]) + 0.000001)#tf.div(tf.constant(1.0),(tf.norm(t_list[0])+0.000001)) t_overall = tf.concat(t_list, axis=0) #l2norm_inv = tf.div(tf.constant(1.0),tf.norm(t_overall))#tf.rsqrt(tf.reduce_sum(t_overall * t_overall, [1]) + 0.000001)#tf.div(tf.constant(1.0),tf.norm(px_grad)) #t_overall = tf.reshape(t_overall, tf.concat(axis=0, values=[tf.slice(tf.shape(t_overall), [0], [1]), [-1]])) #l2norm_inv = tf.div(tf.constant(1.0),tf.norm(t_overall))#tf.rsqrt(tf.reduce_sum(t_overall * t_overall, [1]) + 0.000001) # saved_shape = tf.shape(px_grad) # batch_size = tf.slice(saved_shape, [0], [1]) # t2 = tf.reshape(px_grad, tf.concat(axis=0, values=[batch_size, [-1]])) # Add a small number to avoid divide by 0 #l2norm_inv = tf.rsqrt(tf.reduce_sum(t2 * t2, [1]) + 0.000001) for px_grad, v in zip(px_grads, var_list): tensor_name = utils.GetTensorOpName(v) if sigma is None: # pylint: disable=unpacking-non-sequence eps, delta = eps_delta with tf.control_dependencies( [tf.Assert(tf.greater(eps, 0), ["eps needs to be greater than 0"]), tf.Assert(tf.greater(delta, 0), ["delta needs to be greater than 0"])]): # The following formula is taken from # Dwork and Roth, The Algorithmic Foundations of Differential # Privacy, Appendix A. # http://www.cis.upenn.edu/~aaroth/Papers/privacybook.pdf sigma = tf.sqrt(2.0 * tf.log(1.25 / delta)) / eps l2norm_bound, clip = option if l2norm_bound is None: l2norm_bound, clip = self._default_option l2norm_bound *= bound_multiplier if ((v.name is not None) and (v.name in self._options)): l2norm_bound, clip = self._options[v.name] #clipper = GroupedClipper(self.disc_parames) clipper = BasicClipper(l2norm_bound)#BasicClipperOverall(l2norm_bound,l2norm_inv)#BasicClipper(l2norm_bound)#BasicClipperOverall(l2norm_bound,l2norm_inv)#BasicClipper(l2norm_bound)#BasicClipperOverall(l2norm_bound,l2norm_inv)#BasicClipper(l2norm_bound)# if clip: x, boundNew = clipper.clip_grads(px_grad) clipped_gradients.append(x) linear_clipped_weights.append(self.compute_overall_bound(x)) num_examples_cur = tf.slice(tf.shape(x), [0], [1]) if(x.shape.ndims>1): weights_sizes.append((x.shape[0]*x.shape[1]).value) else: weights_sizes.append(x.shape[0].value) weights_shapes.append(x.shape) num_tot_examples = tf.add(num_tot_examples,num_examples_cur) #saned_x = self.sanitize( # px_grad, eps_delta, sigma=sigma, # tensor_name=tensor_name, add_noise=add_noise, # num_examples=batches_per_lot * tf.slice( # tf.shape(px_grad), [0], [1]), # isBias=False) #remove l2norm_inv to come back to clipping on each layer #sanitized_grads.append(sanitized_grad) #num_examples = tf.slice(tf.shape(t_overall), [0], [1]) all_clipped_weights = tf.concat(linear_clipped_weights, axis=-1) privacy_accum_op = self._accountant.accumulate_privacy_spending( eps_delta, sigma, 200) with tf.control_dependencies([privacy_accum_op]): saned_x = clipper.add_noise(all_clipped_weights, sigma * l2norm_bound) splits = tf.split(saned_x,weights_sizes,1) for i,split in enumerate(splits): sanitized_gradient.append(tf.reshape(split,weights_shapes[i])) return sanitized_gradient, clipped_gradients, boundNew
def compute_sanitized_gradients_from_input_perturbation( self, loss, ex, input_sigma, var_list, add_noise=True): """Compute the sanitized gradients. Args: loss: the loss tensor. var_list: the optional variables. add_noise: if true, then add noise. Always clip. eps_delta: [epsilon, delta] input_sigma: input_sigma Returns: a pair of (list of sanitized gradients) and privacy spending accumulation operations. Raises: TypeError: if var_list contains non-variable. """ self._assert_valid_dtypes([loss]) #import pdb; pdb.set_trace() xs = [tf.convert_to_tensor(x) for x in var_list] #import pdb; pdb.set_trace() # Each element in px_grads is the px_grad for a param matrix, having the shape of [batch_size, shape of param matrix] px_grads = per_example_gradients.PerExampleGradients(loss, xs) # calculate sigma, sigma has the shape of [batch_size] unmasked_sigmas = [] sigmas = [] sanitized_grads = [] num = 0 for px_grad, v in zip(px_grads, var_list): num += 1 if num > FLAGS.ACCOUNT_NUM: break #px_grad = utils.BatchClipByL2norm(px_grad, FLAGS.DP_GRAD_CLIPPING_L2NORM/ FLAGS.BATCH_SIZE) px_grad_vec = tf.reshape( px_grad, [tf.shape(px_grad)[0], -1]) # [batch_size, vec_param] #import pdb; pdb.set_trace() # method 1 px_pp_grad = batch_jacobian( px_grad_vec, ex, use_pfor=False, parallel_iterations=px_grad_vec.get_shape().as_list()[0] * px_grad_vec.get_shape().as_list()[1] ) # [b, vec_param, ex_shape] px_pp_grad = tf.reshape(px_pp_grad, [ px_pp_grad.get_shape().as_list()[0], px_pp_grad.get_shape().as_list()[1], -1 ]) #[b, vec_param, ex_size] px_scale = tf.reduce_sum(tf.square(px_pp_grad), 2) # [batch_size, vec_param] ''' #elems = (np.array([1, 2, 3]), np.array([-1, 1, -1])) #map_fn(lambda x: x[0] * x[1], elems) #method 2 px_grad_vec = tf.split(px_grad_vec, px_grad_vec.get_shape().as_list()[0], axis=0) def px_fn(arg): import pdb; pdb.set_trace() px_grad = arg[0] px_ex = arg[1] px_grad = tf.squeeze(px_grad, axis=0) px_pp_grad = jacobian(px_grad, px_ex, use_pfor=False, parallel_iterations=px_grad.get_shape().as_list()[0]) # [vec_param, ex_shape] px_pp_grad = tf.reshape(px_pp_grad, [px_pp_grad.get_shape().as_list()[0], -1]) #[vec_param, ex_size] px_scale = tf.reduce_sum(tf.square(px_pp_grad), 1) # [vec_param] return px_scale #px_scale = control_flow_ops.pfor(px_fn, len(ex), parallel_iterations=len(ex)) px_scale = map_fn(px_fn, [px_grad_vec, ex]) import pdb; pdb.set_trace() ''' # heterogeneous: each param has different scale scale = tf.reduce_mean(px_scale, 1) # [batch_size] # minimum #scale = tf.reduce_min(px_scale, 1) # [batch_size] sigma = tf.sqrt(scale) * input_sigma #[batch_size] unmasked_sigmas.append(sigma) mask = tf.cast( tf.greater_equal(sigma, tf.constant(FLAGS.INPUT_DP_SIGMA_THRESHOLD)), tf.float32) sigma = sigma * mask sigmas.append(sigma) # tensor_name = utils.GetTensorOpName(v) sanitized_grad = self._sanitizer.sanitize(px_grad, self._eps_delta, sigma=sigma, tensor_name=tensor_name, add_noise=add_noise, num_examples=tf.slice( tf.shape(px_grad), [0], [1]), no_clipping=False) sanitized_grads.append(sanitized_grad) while num <= len(var_list): sigmas.append(tf.zeros([ex.get_shape().as_list()[0]])) num += 1 return sanitized_grads, sigmas, unmasked_sigmas