def get_eps(self, delta): # minimize over \lambda if not self.flag: self.build_zeroth_oracle() self.flag = True if delta<0 or delta > 1: print("Error! delta is a probability and must be between 0 and 1") if delta == 0: return self.RDP_inf else: def fun(x): # the input the RDP's \alpha if x <= 1: return np.inf else: return np.log(1 / delta)/(x-1) + self.evalRDP(x) def fun_int(i): # the input is RDP's \alpha in integer if i <= 1 | i >= len(self.RDPs_int): return np.inf else: return np.log(1 / delta) / (i-1) + self.RDPs_int[i - 1] # When do we have computational constraints? # Only when we have subsampled items. # First check if the forward difference is positive at self.m, or if it is infinite while (self.m<self.m_max) and (not np.isposinf(fun(self.m))) and (fun_int(self.m-1)-fun_int(self.m-2) < 0): # If so, double m, expand logBimomC until the forward difference is positive if self.flag_subsample: # The following line is m^2 time. self.logBinomC = utils.get_binom_coeffs(self.m*2+1) # Update deltas_caches for key, val in self.deltas_cache.items(): if type(key) is tuple: func_tmp = key[0] else: func_tmp = key cgf = lambda x: x*func_tmp(x+1) deltas,signs_deltas = utils.get_forward_diffs(cgf,self.m*2) self.deltas_cache[key] = [deltas, signs_deltas] new_alphas = range(self.m + 1, self.m * 2 + 1, 1) self.alphas = np.concatenate((self.alphas, np.array(new_alphas))) # array of integers self.m = self.m * 2 mm = np.max(self.alphas) rdp_int_new = np.zeros_like(self.alphas, float) for key,val in self.cache.items(): idx = self.idxhash[key] rdp = self.RDPs[idx] newarray = np.zeros_like(self.alphas, float) for j in range(2,mm+1,1): newarray[j-1] = rdp(1.0*j) newarray[0]=newarray[1] coeff = self.coeffs[idx] rdp_int_new += newarray * coeff self.cache[key] = newarray self.RDPs_int = rdp_int_new # # update the integer CGF and the cache for each function # rdp_int_new = np.zeros_like(self.RDPs_int) # for key,val in self.cache.items(): # idx = self.idxhash[key] # rdp = self.RDPs[idx] # newarray = np.zeros_like(self.RDPs_int) # for j in range(self.m): # newarray[j] = rdp(1.0*(j+self.m+1)) # # coeff = self.coeffs[idx] # rdp_int_new += newarray * coeff # self.cache[key] = np.concatenate((val, newarray)) # # # update the corresponding quantities # self.RDPs_int = np.concatenate((self.RDPs_int, rdp_int_new)) #self.m = self.m*2 bestint = np.argmin(np.log(1 / delta)/(self.alphas[1:]-1) + self.RDPs_int[1:]) + 1 if bestint == self.m-1: if self.verbose: print('Warning: Reach quadratic upper bound: m_max.') # In this case, we matches the maximum qudaratic upper bound # Fix it by calling O(1) upper bounds and do logarithmic search cur = fun(bestint) while (not np.isposinf(cur)) and fun(bestint-1)-fun(bestint-2) < -1e-8: bestint = bestint*2 cur = fun(bestint) # if bestint > self.m_lin_max: # print('Warning: Reach linear upper bound: m_lin_max.') # return cur results = minimize_scalar(fun, method='Bounded', bounds=[self.m-1, bestint + 2], options={'disp': False}) if results.success: return results.fun else: return None #return fun(bestint) if bestint == 0: if self.verbose: print('Warning: Smallest alpha = 1.') # find the best integer alpha. bestalpha = self.alphas[bestint] results = minimize_scalar(fun, method='Bounded',bounds=[bestalpha-1, bestalpha+1], options={'disp':False}) # the while loop above ensures that bestint+2 is at most m, and also bestint is at least 0. if results.success: return results.fun else: # There are cases when certain \delta is not feasible. # For example, let p and q be uniform the privacy R.V. is either 0 or \infty and unless all \infty # events are taken cared of by \delta, \epsilon cannot be < \infty return -1
def compose_subsampled_mechanism(self, func, prob, coeff=1.0): # This function is for subsample without replacements. self.flag = False self.flag_subsample = True if (func, prob) in self.idxhash: idx = self.idxhash[(func, prob)] # update the coefficients of each function self.coeffs[idx] += coeff # also update the integer CGFs self.RDPs_int += self.cache[(func, prob)] * coeff else: def cgf(x): return x * func(x+1) # we need forward differences of thpe exp(cgf) # The following line is the numericall y stable way of implementing it. # The output is in polar form with logarithmic magnitude deltas, signs_deltas = utils.get_forward_diffs(cgf,self.m) #deltas1, signs_deltas1 = get_forward_diffs_direct(func, self.m) #tmp = deltas-deltas1 self.deltas_cache[(func,prob)] = [deltas,signs_deltas] def subsample_func_int(x): # This function evaluates teh CGF at alpha = x, i.e., lamb = x- 1 deltas_local, signs_deltas_local = self.deltas_cache[(func,prob)] if np.isinf(func(x)): return np.inf mm = int(x) fastupperbound = fast_subsampled_cgf_upperbound(func, mm, prob, deltas_local) if mm <= self.alphas[-1]: # compute the bound exactly. Requires book keeping of O(x^2) moments = [ np.minimum(np.minimum((j)*np.log(np.exp(func(np.inf))-1) + np.minimum(cgf(j-1),np.log(4)), np.log(2) + cgf(j-1)), np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1] + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1]) + j*np.log(prob) +self.logBinomC[int(mm), j] for j in range(2,int(mm+1),1)] return np.minimum(fastupperbound, utils.stable_logsumexp([0]+moments)) elif mm <= self.m_lin_max: # compute the bound with stirling approximation. Everything is O(x) now. moment_bound = lambda j: np.minimum(j * np.log(np.exp(func(np.inf)) - 1) + np.minimum(cgf(j - 1), np.log(4)), np.log(2) + cgf(j - 1)) + j * np.log(prob) + utils.logcomb(mm, j) moments = [moment_bound(j) for j in range(2,mm+1,1)] return np.minimum(fastupperbound, utils.stable_logsumexp([0]+ moments)) else: # Compute the O(1) upper bound return fastupperbound def subsample_func(x): # This function returns the RDP at alpha = x # RDP with the linear interpolation upper bound of the CGF epsinf, tmp = subsample_epsdelta(func(np.inf),0,prob) if np.isinf(x): return epsinf if prob == 1.0: return func(x) if (x >= 1.0) and (x <= 2.0): return np.minimum(epsinf, subsample_func_int(2.0) / (2.0-1)) if np.equal(np.mod(x, 1), 0): return np.minimum(epsinf, subsample_func_int(x) / (x-1) ) xc = math.ceil(x) xf = math.floor(x) return np.minimum( epsinf, ((x-xf)*subsample_func_int(xc) + (1-(x-xf))*subsample_func_int(xf)) / (x-1) ) # book keeping self.idxhash[(func, prob)] = self.n # save the index self.n += 1 # increment the number of unique mechanisms self.coeffs.append(coeff) # Update the coefficient self.RDPs.append(subsample_func) # update the analytical functions # also update the integer results up to m_max. if (func,prob) in self.cache: results = self.cache[(func,prob)] else: results = np.zeros_like(self.RDPs_int, float) # m = np.max(self.lambs) mm = np.max(self.alphas) for alpha in range(2, mm+1): results[alpha-1] = subsample_func(alpha) results[0] = results[1] # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy. self.cache[(func,prob)] = results # save in cache self.RDPs_int += results * coeff # update the pure DP eps, delta = subsample_epsdelta(func(np.inf), 0, prob) self.RDP_inf += eps * coeff
def compose_subsampled_mechanism(self, func, prob, coeff=1.0, improved_bound_flag=False): """ # This function is for subsample without replacements :param func: RDP function of the mechanism before amplification by sampling :param prob: proportion of the data to sample :param coeff: number of times the subsampled mechanism is being composed. :param improved_bound_flag: - If True, then it uses Theorem 27 of https://arxiv.org/pdf/1808.00087.pdf - If False (default value), it uses Theorem 9 of https://arxiv.org/pdf/1808.00087.pdf To qualify for the improved bound, the mechanism needs to have a pair of neighboring datasets that is worst for all Renyi-divergence and Pearson-Vajda divergence; Also, the RDP bound needs to be tight (see Definition 26 from the same paper). Gaussian mechanism, Laplace mechanism and many others satisfy this condition. :return: nothing (updates to the RDP accountant's attribute) """ # (find a random subset of proportion prob) self.flag = False self.flag_subsample = True if (func, prob) in self.idxhash: idx = self.idxhash[(func, prob)] # update the coefficients of each function self.coeffs[idx] += coeff # also update the integer CGFs self.RDPs_int += self.cache[(func, prob)] * coeff else: def cgf(x): return x * func(x + 1) if not improved_bound_flag: def subsample_func_int(x): # output the cgf of the subsampled mechanism mm = int(x) eps_inf = func(np.inf) moments_two = 2 * np.log(prob) + utils.logcomb(mm,2) \ + np.minimum(np.log(4) + func(2.0) + np.log(1-np.exp(-func(2.0))), func(2.0) + np.minimum(np.log(2), 2 * (eps_inf+np.log(1-np.exp(-eps_inf))))) moment_bound = lambda j: np.minimum(j * (eps_inf + np.log(1-np.exp(-eps_inf))), np.log(2)) + cgf(j - 1) \ + j * np.log(prob) + utils.logcomb(mm, j) moments = [moment_bound(j) for j in range(3, mm + 1, 1)] return np.minimum( (x - 1) * func(x), utils.stable_logsumexp([0, moments_two] + moments)) else: # we need forward differences of exp(cgf) # The following line is the numerically stable way of implementing it. # The output is in polar form with logarithmic magnitude deltas, signs_deltas = utils.get_forward_diffs(cgf, self.m) #deltas1, signs_deltas1 = get_forward_diffs_direct(func, self.m) #tmp = deltas-deltas1 self.deltas_cache[(func, prob)] = [deltas, signs_deltas] def subsample_func_int(x): # This function evaluates teh CGF at alpha = x, i.e., lamb = x- 1 deltas_local, signs_deltas_local = self.deltas_cache[( func, prob)] if np.isinf(func(x)): return np.inf mm = int(x) eps_inf = func(np.inf) moments_two = 2 * np.log(prob) + utils.logcomb(mm, 2) \ + np.minimum( np.log(4) + func(2.0) + np.log(1 - np.exp(-func(2.0))), func(2.0) + np.minimum(np.log(2), 2 * (eps_inf + np.log(1 - np.exp(-eps_inf))))) moment_bound = lambda j: np.minimum(np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1] + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1], np.minimum(j * (eps_inf + np.log(1 - np.exp(-eps_inf))), np.log(2)) + cgf(j - 1)) \ + j * np.log(prob) + utils.logcomb(mm, j) moment_bound_linear = lambda j: np.minimum(j * (eps_inf + np.log(1-np.exp(-eps_inf))), np.log(2)) + cgf(j - 1) \ + j * np.log(prob) + utils.logcomb(mm, j) fastupperbound = fast_subsampled_cgf_upperbound( func, mm, prob, deltas_local) if mm <= self.alphas[ -1]: # compute the bound exactly. Requires book keeping of O(x^2) # # moments = [ np.minimum(np.minimum((j)*np.log(np.exp(func(np.inf))-1) + np.minimum(cgf(j-1),np.log(4)), # np.log(2) + cgf(j-1)), # np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1] # + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1]) + j*np.log(prob) # +self.logBinomC[int(mm), j] for j in range(2,int(mm+1),1)] moments = [ moment_bound(j) for j in range(3, mm + 1, 1) ] return np.minimum( fastupperbound, utils.stable_logsumexp([0, moments_two] + moments)) elif mm <= self.m_lin_max: # compute the bound with stirling approximation. Everything is O(x) now. # moment_bound = lambda j: np.minimum(j * np.log(np.exp(func(np.inf)) - 1) # + np.minimum(cgf(j - 1), np.log(4)), np.log(2) # + cgf(j - 1)) + j * np.log(prob) + utils.logcomb(mm, j) # moments = [moment_bound(j) for j in range(2,mm+1,1)] moments = [ moment_bound_linear(j) for j in range(3, mm + 1, 1) ] return np.minimum( fastupperbound, utils.stable_logsumexp([0, moments_two] + moments)) else: # Compute the O(1) upper bound return fastupperbound def subsample_func(x): # This function returns the RDP at alpha = x # RDP with the linear interpolation upper bound of the CGF epsinf, tmp = subsample_epsdelta(func(np.inf), 0, prob) if np.isinf(x): return epsinf if prob == 1.0: return func(x) if (x >= 1.0) and (x <= 2.0): return np.minimum(epsinf, subsample_func_int(2.0) / (2.0 - 1)) if np.equal(np.mod(x, 1), 0): return np.minimum(epsinf, subsample_func_int(x) / (x - 1)) xc = math.ceil(x) xf = math.floor(x) return np.min([ epsinf, func(x), ((x - xf) * subsample_func_int(xc) + (1 - (x - xf)) * subsample_func_int(xf)) / (x - 1) ]) # book keeping self.idxhash[(func, prob)] = self.n # save the index self.n += 1 # increment the number of unique mechanisms self.coeffs.append(coeff) # Update the coefficient self.RDPs.append(subsample_func) # update the analytical functions # also update the integer results up to m_max. if (func, prob) in self.cache: results = self.cache[(func, prob)] else: results = np.zeros_like(self.RDPs_int, float) # m = np.max(self.lambs) mm = np.max(self.alphas) for alpha in range(2, mm + 1): results[alpha - 1] = subsample_func(alpha) results[0] = results[ 1] # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy. self.cache[(func, prob)] = results # save in cache self.RDPs_int += results * coeff # update the pure DP eps, delta = subsample_epsdelta(func(np.inf), 0, prob) self.RDP_inf += eps * coeff