def subsample_func(x): # linear interpolation upper bound # This function implements the RDP at alpha = x if np.isinf(func(x)): return np.inf if prob == 1.0: return func(x) if prob == 0: return 0 epsinf, tmp = subsample_epsdelta(func(np.inf), 0, prob) if np.isinf(x): return epsinf if (x >= 1.0) and (x <= 2.0): return np.minimum(epsinf, subsample_func_int(2.0) / (2.0 - 1)) if np.equal(np.mod(x, 1), 0): return np.minimum(epsinf, subsample_func_int(x) / (x - 1)) xc = math.ceil(x) xf = math.floor(x) return np.minimum(epsinf, ((x - xf) * subsample_func_int(xc) + (1 - (x - xf)) * subsample_func_int(xf)) / (x - 1))
def compose_poisson_subsampled_mechanisms1(self, func, prob, coeff=1.0): # This function implements the general amplification bounds for Poisson sampling. # No additional assumptions are needed. # At the moment, we do not support mixing poisson subsampling and standard subsampling. # self.flag = False self.flag_subsample = True if (func, prob) in self.idxhash: idx = self.idxhash[(func, prob)] # update the coefficients of each function self.coeffs[idx] += coeff # also update the integer CGFs self.RDPs_int += self.cache[(func, prob)] * coeff else: # compute an easy to compute upper bound of it. cgf = lambda x: x*func(x+1) def subsample_func_int(x): # This function evaluates the CGF at alpha = x, i.e., lamb = x- 1 if np.isinf(func(x)): return np.inf if prob == 1.0: return func(x) mm = int(x) fastbound = fast_poission_subsampled_cgf_upperbound(func, mm, prob) if x <= self.alphas[-1]: # compute the bound exactly. moments = [cgf(1) + 2*np.log(prob) + (mm-2) * np.log(1 - prob) + self.logBinomC[mm, 2]] moments = moments + [cgf(j-1+1) +j*np.log(prob) + (mm-j) * np.log(1 - prob) + self.logBinomC[mm, j] for j in range(3,mm+1,1)] return utils.stable_logsumexp([(mm-1)*np.log(1-prob)+np.log(1+(mm-1)*prob)]+moments) elif mm <= self.m_lin_max: moments = [cgf(1) + 2*np.log(prob) + (mm-2) * np.log(1 - prob) + utils.logcomb(mm, 2)] moments = moments + [cgf(j-1+1) +j*np.log(prob) + (mm-j) * np.log(1 - prob) + utils.logcomb(mm, j) for j in range(3,mm+1,1)] return utils.stable_logsumexp([(mm-1)*np.log(1-prob)+np.log(1+(mm-1)*prob)]+moments) else: return fastbound def subsample_func(x): # linear interpolation upper bound epsinf, tmp = subsample_epsdelta(func(np.inf),0,prob) if np.isinf(x): return epsinf if (x >= 1.0) and (x <= 2.0): return np.minimum(epsinf, subsample_func_int(2.0) / (2.0-1)) if np.equal(np.mod(x, 1), 0): return np.minimum(epsinf, subsample_func_int(x) / (x-1) ) xc = math.ceil(x) xf = math.floor(x) return np.minimum( epsinf, ((x-xf)*subsample_func_int(xc) + (1-(x-xf))*subsample_func_int(xf)) / (x-1) ) # book keeping self.idxhash[(func, prob)] = self.n # save the index self.n += 1 # increment the number of unique mechanisms self.coeffs.append(coeff) # Update the coefficient self.RDPs.append(subsample_func) # update the analytical functions # also update the integer results if (func,prob) in self.cache: results = self.cache[(func,prob)] else: results = np.zeros_like(self.RDPs_int, float) mm = np.max(self.alphas) # evaluate the RDP up to order mm for alpha in range(2, mm+1): results[alpha-1] = subsample_func_int(alpha) results[0] = results[1] # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy. self.cache[(func,prob)] = results # save in cache self.RDPs_int += results * coeff # update the pure DP tracker eps, delta = subsample_epsdelta(func(np.inf), 0, prob) self.RDP_inf += eps * coeff # TODO: 1. Modularize the several Poission sampling versions. 2. Support both sampling schemes together.
def compose_poisson_subsampled_mechanisms(self, func, prob, coeff=1.0): # This function implements the lower bound for subsampled RDP. # It is also the exact formula of poission_subsampled RDP for many mechanisms including Gaussian mech. # # At the moment, we do not support mixing poisson subsampling and standard subsampling. # TODO: modify the caching identifies so that we can distinguish different types of subsampling # self.flag = False self.flag_subsample = True if (func, prob) in self.idxhash: idx = self.idxhash[(func, prob)] # TODO: this is really where it needs to be changed. # update the coefficients of each function self.coeffs[idx] += coeff # also update the integer CGFs self.RDPs_int += self.cache[(func, prob)] * coeff else: # compute an easy to compute upper bound of it. def cgf(x): return x * func(x+1) def subsample_func_int(x): # This function evaluates teh CGF at alpha = x, i.e., lamb = x- 1 if np.isinf(func(x)): return np.inf mm = int(x) # fastbound = fast_poission_subsampled_cgf_upperbound(func, mm, prob) if x <= self.alphas[-1]: # compute the bound exactly. moments = [cgf(j-1) +j*np.log(prob) + (mm-j) * np.log(1-prob) + self.logBinomC[mm, j] for j in range(2,mm+1,1)] return utils.stable_logsumexp([(mm-1)*np.log(1-prob)+np.log(1+(mm-1)*prob)]+moments) elif mm <= self.m_lin_max: moments = [cgf(j-1) +j*np.log(prob) + (mm-j) * np.log(1-prob) + utils.logcomb(mm,j) for j in range(2,mm+1,1)] return utils.stable_logsumexp([(mm-1)*np.log(1-prob)+np.log(1+(mm-1)*prob)] + moments) else: return fastbound def subsample_func(x): # linear interpolation upper bound # This function implements the RDP at alpha = x if np.isinf(func(x)): return np.inf if prob == 1.0: return func(x) epsinf, tmp = subsample_epsdelta(func(np.inf),0,prob) if np.isinf(x): return epsinf if (x >= 1.0) and (x <= 2.0): return np.minimum(epsinf, subsample_func_int(2.0) / (2.0-1)) if np.equal(np.mod(x, 1), 0): return np.minimum(epsinf, subsample_func_int(x) / (x-1) ) xc = math.ceil(x) xf = math.floor(x) return np.minimum( epsinf, ((x-xf)*subsample_func_int(xc) + (1-(x-xf))*subsample_func_int(xf)) / (x-1) ) # book keeping self.idxhash[(func, prob)] = self.n # save the index self.n += 1 # increment the number of unique mechanisms self.coeffs.append(coeff) # Update the coefficient self.RDPs.append(subsample_func) # update the analytical functions # also update the integer results, with a vectorized computation. # TODO: pre-computing subsampled RDP for integers is error-prone (implement the same thing twice) # TODO: and its benefits are not clear. We should consider removing it and simply call the lambda function. # if (func,prob) in self.cache: results = self.cache[(func,prob)] else: results = np.zeros_like(self.RDPs_int, float) mm = np.max(self.alphas) # evaluate the RDP up to order mm jvec = np.arange(2, mm + 1) logterm3plus = np.zeros_like(results) # This saves everything from j=2 to j = m+1 for j in jvec: logterm3plus[j-2] = cgf(j-1) + j * np.log(prob) #- np.log(1-prob)) for alpha in range(2, mm+1): if np.isinf(logterm3plus[alpha-1]): results[alpha-1] = np.inf else: tmp = utils.stable_logsumexp(logterm3plus[0:alpha-1] + self.logBinomC[alpha , 2:(alpha + 1)] + (alpha+1-jvec[0:alpha-1])*np.log(1-prob)) results[alpha-1] = utils.stable_logsumexp_two((alpha-1)*np.log(1-prob) + np.log(1+(alpha-1)*prob), tmp) / (1.0*alpha-1) results[0] = results[1] # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy. self.cache[(func,prob)] = results # save in cache self.RDPs_int += results * coeff # update the pure DP tracker eps, delta = subsample_epsdelta(func(np.inf), 0, prob) self.RDP_inf += eps * coeff
def compose_subsampled_mechanism(self, func, prob, coeff=1.0): # This function is for subsample without replacements. self.flag = False self.flag_subsample = True if (func, prob) in self.idxhash: idx = self.idxhash[(func, prob)] # update the coefficients of each function self.coeffs[idx] += coeff # also update the integer CGFs self.RDPs_int += self.cache[(func, prob)] * coeff else: def cgf(x): return x * func(x+1) # we need forward differences of thpe exp(cgf) # The following line is the numericall y stable way of implementing it. # The output is in polar form with logarithmic magnitude deltas, signs_deltas = utils.get_forward_diffs(cgf,self.m) #deltas1, signs_deltas1 = get_forward_diffs_direct(func, self.m) #tmp = deltas-deltas1 self.deltas_cache[(func,prob)] = [deltas,signs_deltas] def subsample_func_int(x): # This function evaluates teh CGF at alpha = x, i.e., lamb = x- 1 deltas_local, signs_deltas_local = self.deltas_cache[(func,prob)] if np.isinf(func(x)): return np.inf mm = int(x) fastupperbound = fast_subsampled_cgf_upperbound(func, mm, prob, deltas_local) if mm <= self.alphas[-1]: # compute the bound exactly. Requires book keeping of O(x^2) moments = [ np.minimum(np.minimum((j)*np.log(np.exp(func(np.inf))-1) + np.minimum(cgf(j-1),np.log(4)), np.log(2) + cgf(j-1)), np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1] + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1]) + j*np.log(prob) +self.logBinomC[int(mm), j] for j in range(2,int(mm+1),1)] return np.minimum(fastupperbound, utils.stable_logsumexp([0]+moments)) elif mm <= self.m_lin_max: # compute the bound with stirling approximation. Everything is O(x) now. moment_bound = lambda j: np.minimum(j * np.log(np.exp(func(np.inf)) - 1) + np.minimum(cgf(j - 1), np.log(4)), np.log(2) + cgf(j - 1)) + j * np.log(prob) + utils.logcomb(mm, j) moments = [moment_bound(j) for j in range(2,mm+1,1)] return np.minimum(fastupperbound, utils.stable_logsumexp([0]+ moments)) else: # Compute the O(1) upper bound return fastupperbound def subsample_func(x): # This function returns the RDP at alpha = x # RDP with the linear interpolation upper bound of the CGF epsinf, tmp = subsample_epsdelta(func(np.inf),0,prob) if np.isinf(x): return epsinf if prob == 1.0: return func(x) if (x >= 1.0) and (x <= 2.0): return np.minimum(epsinf, subsample_func_int(2.0) / (2.0-1)) if np.equal(np.mod(x, 1), 0): return np.minimum(epsinf, subsample_func_int(x) / (x-1) ) xc = math.ceil(x) xf = math.floor(x) return np.minimum( epsinf, ((x-xf)*subsample_func_int(xc) + (1-(x-xf))*subsample_func_int(xf)) / (x-1) ) # book keeping self.idxhash[(func, prob)] = self.n # save the index self.n += 1 # increment the number of unique mechanisms self.coeffs.append(coeff) # Update the coefficient self.RDPs.append(subsample_func) # update the analytical functions # also update the integer results up to m_max. if (func,prob) in self.cache: results = self.cache[(func,prob)] else: results = np.zeros_like(self.RDPs_int, float) # m = np.max(self.lambs) mm = np.max(self.alphas) for alpha in range(2, mm+1): results[alpha-1] = subsample_func(alpha) results[0] = results[1] # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy. self.cache[(func,prob)] = results # save in cache self.RDPs_int += results * coeff # update the pure DP eps, delta = subsample_epsdelta(func(np.inf), 0, prob) self.RDP_inf += eps * coeff
def compose_subsampled_mechanism(self, func, prob, coeff=1.0, improved_bound_flag=False): """ # This function is for subsample without replacements :param func: RDP function of the mechanism before amplification by sampling :param prob: proportion of the data to sample :param coeff: number of times the subsampled mechanism is being composed. :param improved_bound_flag: - If True, then it uses Theorem 27 of https://arxiv.org/pdf/1808.00087.pdf - If False (default value), it uses Theorem 9 of https://arxiv.org/pdf/1808.00087.pdf To qualify for the improved bound, the mechanism needs to have a pair of neighboring datasets that is worst for all Renyi-divergence and Pearson-Vajda divergence; Also, the RDP bound needs to be tight (see Definition 26 from the same paper). Gaussian mechanism, Laplace mechanism and many others satisfy this condition. :return: nothing (updates to the RDP accountant's attribute) """ # (find a random subset of proportion prob) self.flag = False self.flag_subsample = True if (func, prob) in self.idxhash: idx = self.idxhash[(func, prob)] # update the coefficients of each function self.coeffs[idx] += coeff # also update the integer CGFs self.RDPs_int += self.cache[(func, prob)] * coeff else: def cgf(x): return x * func(x + 1) if not improved_bound_flag: def subsample_func_int(x): # output the cgf of the subsampled mechanism mm = int(x) eps_inf = func(np.inf) moments_two = 2 * np.log(prob) + utils.logcomb(mm,2) \ + np.minimum(np.log(4) + func(2.0) + np.log(1-np.exp(-func(2.0))), func(2.0) + np.minimum(np.log(2), 2 * (eps_inf+np.log(1-np.exp(-eps_inf))))) moment_bound = lambda j: np.minimum(j * (eps_inf + np.log(1-np.exp(-eps_inf))), np.log(2)) + cgf(j - 1) \ + j * np.log(prob) + utils.logcomb(mm, j) moments = [moment_bound(j) for j in range(3, mm + 1, 1)] return np.minimum( (x - 1) * func(x), utils.stable_logsumexp([0, moments_two] + moments)) else: # we need forward differences of exp(cgf) # The following line is the numerically stable way of implementing it. # The output is in polar form with logarithmic magnitude deltas, signs_deltas = utils.get_forward_diffs(cgf, self.m) #deltas1, signs_deltas1 = get_forward_diffs_direct(func, self.m) #tmp = deltas-deltas1 self.deltas_cache[(func, prob)] = [deltas, signs_deltas] def subsample_func_int(x): # This function evaluates teh CGF at alpha = x, i.e., lamb = x- 1 deltas_local, signs_deltas_local = self.deltas_cache[( func, prob)] if np.isinf(func(x)): return np.inf mm = int(x) eps_inf = func(np.inf) moments_two = 2 * np.log(prob) + utils.logcomb(mm, 2) \ + np.minimum( np.log(4) + func(2.0) + np.log(1 - np.exp(-func(2.0))), func(2.0) + np.minimum(np.log(2), 2 * (eps_inf + np.log(1 - np.exp(-eps_inf))))) moment_bound = lambda j: np.minimum(np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1] + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1], np.minimum(j * (eps_inf + np.log(1 - np.exp(-eps_inf))), np.log(2)) + cgf(j - 1)) \ + j * np.log(prob) + utils.logcomb(mm, j) moment_bound_linear = lambda j: np.minimum(j * (eps_inf + np.log(1-np.exp(-eps_inf))), np.log(2)) + cgf(j - 1) \ + j * np.log(prob) + utils.logcomb(mm, j) fastupperbound = fast_subsampled_cgf_upperbound( func, mm, prob, deltas_local) if mm <= self.alphas[ -1]: # compute the bound exactly. Requires book keeping of O(x^2) # # moments = [ np.minimum(np.minimum((j)*np.log(np.exp(func(np.inf))-1) + np.minimum(cgf(j-1),np.log(4)), # np.log(2) + cgf(j-1)), # np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1] # + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1]) + j*np.log(prob) # +self.logBinomC[int(mm), j] for j in range(2,int(mm+1),1)] moments = [ moment_bound(j) for j in range(3, mm + 1, 1) ] return np.minimum( fastupperbound, utils.stable_logsumexp([0, moments_two] + moments)) elif mm <= self.m_lin_max: # compute the bound with stirling approximation. Everything is O(x) now. # moment_bound = lambda j: np.minimum(j * np.log(np.exp(func(np.inf)) - 1) # + np.minimum(cgf(j - 1), np.log(4)), np.log(2) # + cgf(j - 1)) + j * np.log(prob) + utils.logcomb(mm, j) # moments = [moment_bound(j) for j in range(2,mm+1,1)] moments = [ moment_bound_linear(j) for j in range(3, mm + 1, 1) ] return np.minimum( fastupperbound, utils.stable_logsumexp([0, moments_two] + moments)) else: # Compute the O(1) upper bound return fastupperbound def subsample_func(x): # This function returns the RDP at alpha = x # RDP with the linear interpolation upper bound of the CGF epsinf, tmp = subsample_epsdelta(func(np.inf), 0, prob) if np.isinf(x): return epsinf if prob == 1.0: return func(x) if (x >= 1.0) and (x <= 2.0): return np.minimum(epsinf, subsample_func_int(2.0) / (2.0 - 1)) if np.equal(np.mod(x, 1), 0): return np.minimum(epsinf, subsample_func_int(x) / (x - 1)) xc = math.ceil(x) xf = math.floor(x) return np.min([ epsinf, func(x), ((x - xf) * subsample_func_int(xc) + (1 - (x - xf)) * subsample_func_int(xf)) / (x - 1) ]) # book keeping self.idxhash[(func, prob)] = self.n # save the index self.n += 1 # increment the number of unique mechanisms self.coeffs.append(coeff) # Update the coefficient self.RDPs.append(subsample_func) # update the analytical functions # also update the integer results up to m_max. if (func, prob) in self.cache: results = self.cache[(func, prob)] else: results = np.zeros_like(self.RDPs_int, float) # m = np.max(self.lambs) mm = np.max(self.alphas) for alpha in range(2, mm + 1): results[alpha - 1] = subsample_func(alpha) results[0] = results[ 1] # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy. self.cache[(func, prob)] = results # save in cache self.RDPs_int += results * coeff # update the pure DP eps, delta = subsample_epsdelta(func(np.inf), 0, prob) self.RDP_inf += eps * coeff