def subsample_func_int(x): # This function evaluates teh CGF at alpha = x, i.e., lamb = x- 1 deltas_local, signs_deltas_local = self.deltas_cache[(func,prob)] if np.isinf(func(x)): return np.inf mm = int(x) fastupperbound = fast_subsampled_cgf_upperbound(func, mm, prob, deltas_local) if mm <= self.alphas[-1]: # compute the bound exactly. Requires book keeping of O(x^2) moments = [ np.minimum(np.minimum((j)*np.log(np.exp(func(np.inf))-1) + np.minimum(cgf(j-1),np.log(4)), np.log(2) + cgf(j-1)), np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1] + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1]) + j*np.log(prob) +self.logBinomC[int(mm), j] for j in range(2,int(mm+1),1)] return np.minimum(fastupperbound, utils.stable_logsumexp([0]+moments)) elif mm <= self.m_lin_max: # compute the bound with stirling approximation. Everything is O(x) now. moment_bound = lambda j: np.minimum(j * np.log(np.exp(func(np.inf)) - 1) + np.minimum(cgf(j - 1), np.log(4)), np.log(2) + cgf(j - 1)) + j * np.log(prob) + utils.logcomb(mm, j) moments = [moment_bound(j) for j in range(2,mm+1,1)] return np.minimum(fastupperbound, utils.stable_logsumexp([0]+ moments)) else: # Compute the O(1) upper bound return fastupperbound
def fast_k_subsample_upperbound(func, mm, prob, k): """ :param func: :param mm: :param prob: sample probability :param k: approximate term :return: k-term approximate upper bound in therorem 11 in ICML-19 """ def cgf(x): return (x - 1) * func(x) if np.isinf(func(mm)): return np.inf if mm == 1: return 0 #logBin = utils.get_binom_coeffs(mm) cur_k = np.minimum(k, mm - 1) if (2 * cur_k) >= mm: exact_term_1 = (mm - 1) * np.log(1 - prob) + np.log(mm * prob - prob + 1) exact_term_2 = [ np.log(scipy.special.comb(mm, l)) + (mm - l) * np.log(1 - prob) + l * np.log(prob) + cgf(l) for l in range(2, mm + 1) ] exact_term_2.append(exact_term_1) bound = utils.stable_logsumexp(exact_term_2) return bound s, mag1 = utils.stable_log_diff_exp(0, -func(mm - cur_k)) new_log_term_1 = np.log(1 - prob) * mm + mag1 new_log_term_2 = -func(mm - cur_k) + mm * utils.stable_logsumexp_two( np.log(1 - prob), np.log(prob) + func(mm - cur_k)) new_log_term_3 = [ np.log(scipy.special.comb(mm, l)) + (mm - l) * np.log(1 - prob) + l * np.log(prob) + utils.stable_log_diff_exp( (l - 1) * func(mm - cur_k), cgf(l))[1] for l in range(2, cur_k + 1) ] if len(new_log_term_3) > 0: new_log_term_3 = utils.stable_logsumexp(new_log_term_3) else: return utils.stable_logsumexp_two(new_log_term_1, new_log_term_2) new_log_term_4 = [ np.log(scipy.special.comb(mm, mm - l)) + (mm - l) * np.log(1 - prob) + l * np.log(prob) + utils.stable_log_diff_exp(cgf(l), (l - 1) * func(mm - cur_k))[1] for l in range(mm - cur_k + 1, mm + 1) ] new_log_term_4.append(new_log_term_1) new_log_term_4.append(new_log_term_2) new_log_term_4 = utils.stable_logsumexp(new_log_term_4) s, new_log_term_5 = utils.stable_log_diff_exp(new_log_term_4, new_log_term_3) new_bound = new_log_term_5 return new_bound
def fast_subsampled_cgf_upperbound(func, mm, prob, deltas_local): # evaulate the fast CGF bound for the subsampled mechanism # func evaluates the RDP of the base mechanism # mm is alpha. NOT lambda. return np.inf if np.isinf(func(mm)): return np.inf if mm == 1: return 0 secondterm = 2 * np.log(prob) + + np.log(mm) + np.log(mm - 1) - np.log(2) \ + np.mininum(np.log(4) + func(2.0) + np.log(1 - np.exp(-func(2.0))), func(2.0) + np.mininum(np.log(2), 2 * (eps_inf + np.log(1 - np.exp(-eps_inf))))) # secondterm = np.minimum(np.minimum((2) * np.log(np.exp(func(np.inf)) - 1) # + np.minimum(func(2), np.log(4)), # np.log(2) + func(2)), # np.log(4) + 0.5 * deltas_local[int(2 * np.floor(2 / 2.0)) - 1] # + 0.5 * deltas_local[int(2 * np.ceil(2 / 2.0)) - 1] # ) + 2 * np.log(prob) + np.log(mm) + np.log(mm - 1) - np.log(2) if mm == 2: return utils.stable_logsumexp([0, secondterm]) # approximate the remaining terms using a geometric series or binomial series log_exp_eps_minus_one = func(np.inf) + np.log(1 - np.exp(-func(np.inf))) if mm == 3: return utils.stable_logsumexp([ 0, secondterm, (3 * (np.log(prob) + np.log(mm)) + 2 * func(mm) + np.minumum(np.log(2), 3 * log_exp_eps_minus_one)) ]) logratio1 = np.log(prob) + np.log(mm) + func(mm) logratio2 = logratio1 + log_exp_eps_minus_one s, mag = utils.stable_log_diff_exp(1, logratio1) s, mag2 = utils.stable_log_diff_exp(1, (mm - 3) * logratio1) remaining_terms1 = (np.log(2) + 3 * (np.log(prob) + np.log(mm)) + 2 * func(mm) + mag2 - mag) s, mag = utils.stable_log_diff_exp(1, logratio2) s, mag2 = utils.stable_log_diff_exp(1, (mm - 3) * logratio2) remaining_terms2 = (3 * (np.log(prob) + np.log(mm) + log_exp_eps_minus_one) + 2 * func(mm) + mag2 - mag) return utils.stable_logsumexp( [0, secondterm, np.minimum(remaining_terms1, remaining_terms2)])
def fast_poission_subsampled_cgf_upperbound(func, mm, prob): # evaulate the fast CGF bound for the subsampled mechanism # func evaluates the RDP of the base mechanism # mm is alpha. NOT lambda. if np.isinf(func(mm)): return np.inf if mm == 1: return 0 # Bound #1: log [ (1-\gamma + \gamma e^{func(mm)})^mm ] bound1 = mm * utils.stable_logsumexp_two(np.log(1-prob), np.log(prob) + func(mm)) # Bound #2: log [ (1-gamma)^alpha E [ 1 + gamma/(1-gamma) E[p/q]]^mm ] # log[ (1-gamma)^\alpha { 1 + alpha gamma / (1-gamma) + gamma^2 /(1-gamma)^2 * alpha(alpha-1) /2 e^eps(2)) # + alpha \choose 3 * gamma^3 / (1-gamma)^3 / e^(-2 eps(alpha)) * (1 + gamma /(1-gamma) e^{eps(alpha)}) ^ (alpha - 3) } # ] if mm >= 3: bound2 = utils.stable_logsumexp([mm * np.log(1-prob), (mm-1) * np.log(1-prob) + np.log(mm) + np.log(prob), (mm-2)*np.log(1-prob) + 2 * np.log(prob) + np.log(mm) + np.log(mm-1) + func(2), np.log(mm) + np.log(mm-1) + np.log(mm-2) - np.log(3*2) + 3 * np.log(prob) + (mm-3)*np.log(1-prob) + 2 * func(mm) + (mm-3) * utils.stable_logsumexp_two(0, np.log(prob) - np.log(1-prob) + func(mm))]) else: bound2 = bound1 #print('www={} func={} mm={}'.format(np.exp(func(mm))-1),func, mm) #print('bound1 ={} bound2 ={}'.format(bound1,bound2)) return np.minimum(bound1,bound2)
def phi_rr_q(params, t): """ The closed-form log phi-function for Randomized Response, where the privacy loss R.V. is drawn from the distribution Q (see Definition 12 in https://arxiv.org/pdf/2106.08567.pdf) The generalized randomize response can represent any pure-DP mechanisms. Args: t: the order of the characteristic function. params: contains two parameters: p and q. p: the probability to output one for dataset X. q: the probability to output one for dataset X'. default q = 1 - p. Return: The log phi-function of randomized response. """ p = params['q'] q = params['p'] term_1 = np.log(p / q) term_2 = np.log((1 - p) / (1 - q)) a = [] left = np.log(p) + t * 1.0j * term_1 right = np.log(1 - p) + 1.0j * t * term_2 a.append(left) a.append(right) return stable_logsumexp(a)
def phi_rr_p(params, t): """ The closed-form log phi-function for Randomized Response, where the privacy loss R.V. is drawn from the distribution P. Args: t: the order of the characteristic function. params: contains two parameters: p and q. p: the probability to output one for dataset X. q: the probability to output one for dataset X'. default q = 1 - p. Return: The log phi-function of randomized response. """ p = params['p'] # generalized randomized response q = params['q'] term_1 = np.log(p / q) term_2 = np.log((1 - p) / (1 - q)) a = [] left = np.log(p) + t * 1.0j * term_1 right = np.log(1 - p) + 1.0j * t * term_2 a.append(left) a.append(right) return stable_logsumexp(a)
def general_upperbound(func, mm, prob): """ :param func: :param mm: alpha in RDP :param prob: sample probability :return: the upperbound in theorem 1 in 2019 ICML,could be applied for general case(including poisson distribution) k_approx = 100 k approximation is applied here """ def cgf(x): return (x - 1) * func(x) if np.isinf(func(mm)): return np.inf if mm == 1 or mm == 0: return 0 cur_k = np.minimum( 50, mm - 1 ) # choose small k-approx for general upperbound (here is 50) in case of scipy-accuracy log_term_1 = mm * np.log(1 - prob) #logBin = utils.get_binom_coeffs(mm) log_term_2 = np.log(3) - func(mm) + mm * utils.stable_logsumexp_two( np.log(1 - prob), np.log(prob) + func(mm)) neg_term_3 = [ np.log(scipy.special.comb(mm, l)) + np.log(3) + (mm - l) * np.log(1 - prob) + l * np.log(prob) + utils.stable_log_diff_exp((l - 1) * func(mm), cgf(l))[1] for l in range(3, cur_k + 1) ] neg_term_4 = np.log(mm * (mm - 1) / 2) + 2 * np.log(prob) + ( mm - 2) * np.log(1 - prob) + utils.stable_log_diff_exp( np.log(3) + func(mm), func(2))[1] neg_term_5 = np.log(2) + np.log(prob) + np.log(mm) + (mm - 1) * np.log(1 - prob) neg_term_6 = mm * np.log(1 - prob) + np.log(3) - func(mm) pos_term = utils.stable_logsumexp([log_term_1, log_term_2]) neg_term_3.append(neg_term_4) neg_term_3.append(neg_term_5) neg_term_3.append(neg_term_6) neg_term = utils.stable_logsumexp(neg_term_3) bound = utils.stable_log_diff_exp(pos_term, neg_term)[1] return bound
def subsample_func_int(x): # This function evaluates the CGF at alpha = x, i.e., lamb = x- 1 if np.isinf(func(x)): return np.inf if prob == 1.0: return func(x) mm = int(x) fastbound = fast_poission_subsampled_cgf_upperbound( func, mm, prob) if x <= self.alphas[-1]: # compute the bound exactly. moments = [ cgf(1) + 2 * np.log(prob) + (mm - 2) * np.log(1 - prob) + self.logBinomC[mm, 2] ] moments = moments + [ cgf(j - 1 + 1) + j * np.log(prob) + (mm - j) * np.log(1 - prob) + self.logBinomC[mm, j] for j in range(3, mm + 1, 1) ] return utils.stable_logsumexp( [(mm - 1) * np.log(1 - prob) + np.log(1 + (mm - 1) * prob)] + moments) elif mm <= self.m_lin_max: moments = [ cgf(1) + 2 * np.log(prob) + (mm - 2) * np.log(1 - prob) + utils.logcomb(mm, 2) ] moments = moments + [ cgf(j - 1 + 1) + j * np.log(prob) + (mm - j) * np.log(1 - prob) + utils.logcomb(mm, j) for j in range(3, mm + 1, 1) ] return utils.stable_logsumexp( [(mm - 1) * np.log(1 - prob) + np.log(1 + (mm - 1) * prob)] + moments) else: return fastbound
def subsample_func_int(x): # This function evaluates teh CGF at alpha = x, i.e., lamb = x- 1 if np.isinf(func(x)): return np.inf mm = int(x) # fastbound = fast_poission_subsampled_cgf_upperbound( func, mm, prob) k = self.alphas[-1] fastbound_k = fast_k_subsample_upperbound(func, mm, prob, k) if self.approx == True: return fastbound_k #fastbound = min(fastbound, fastbound_k) if x <= self.alphas[-1]: # compute the bound exactly. moments = [ cgf(j - 1) + j * np.log(prob) + (mm - j) * np.log(1 - prob) + self.logBinomC[mm, j] for j in range(2, mm + 1, 1) ] return utils.stable_logsumexp( [(mm - 1) * np.log(1 - prob) + np.log(1 + (mm - 1) * prob)] + moments) elif mm <= self.m_lin_max: moments = [ cgf(j - 1) + j * np.log(prob) + (mm - j) * np.log(1 - prob) + utils.logcomb(mm, j) for j in range(2, mm + 1, 1) ] return utils.stable_logsumexp( [(mm - 1) * np.log(1 - prob) + np.log(1 + (mm - 1) * prob)] + moments) else: return fastbound
def subsample_func_int(x): # output the cgf of the subsampled mechanism mm = int(x) eps_inf = func(np.inf) moments_two = 2 * np.log(prob) + utils.logcomb(mm,2) \ + np.minimum(np.log(4) + func(2.0) + np.log(1-np.exp(-func(2.0))), func(2.0) + np.minimum(np.log(2), 2 * (eps_inf+np.log(1-np.exp(-eps_inf))))) moment_bound = lambda j: np.minimum(j * (eps_inf + np.log(1-np.exp(-eps_inf))), np.log(2)) + cgf(j - 1) \ + j * np.log(prob) + utils.logcomb(mm, j) moments = [moment_bound(j) for j in range(3, mm + 1, 1)] return np.minimum( (x - 1) * func(x), utils.stable_logsumexp([0, moments_two] + moments))
def fast_k_subsample_upperbound(func, mm, prob, k): # evaluate the fast k-term approximate upperbound for the subsampled mechanism in proposition 8 # func evaluates the RDP of the base mechanism # mm is alpha, prob is gamma, k is k-term for approximation # log ( (1 - gamma + alpha * gamma)(1 - gamma)^(alpha - 1) + sum_{l=2}^k (alpha choose l) * (1 - gamma)^{alpha - l} gamma^l e^{(l-1)\eps(l)} \ # + \eta(\eps(alpha), alpha, gamma) # \eta(\eps(alpha),alpha,gamma) = (alpha choose {k+1}) * gamma^{k + 1} * e^{k * \eps(alpha)} * (1 - gamma + gamma*e^{\eps(alpha)})^{alpha-k-1} if np.isinf(func(mm)): return np.inf if mm == 1: return 0 def cgf(x): return (x-1) * func(x) log_term_1 = (mm-1) * np.log(1-prob) + np.log(1 - prob + mm * prob) logBinomC = utils.get_binom_coeffs(mm) log_term_2 = [(logBinomC[int(mm),j] + j * np.log(prob) + (mm - j) * np.log(1 - prob) + cgf(j)) for j in range(2,k+1)] log_term_3 = logBinomC[int(mm),k+1]+(k+1) * np.log(prob) + k * func(mm) + (mm - k - 1) * np.log(1 - prob + prob * np.exp(func(mm))) log_term_2.append(log_term_1) log_term_2.append(log_term_3) bound = utils.stable_logsumexp(log_term_2)/(mm-1) return bound
def compose_poisson_subsampled_mechanisms(self, func, prob, coeff=1.0): # This function implements the lower bound for subsampled RDP. # It is also the exact formula of poission_subsampled RDP for many mechanisms including Gaussian mech. # # At the moment, we do not support mixing poisson subsampling and standard subsampling. # TODO: modify the caching identifies so that we can distinguish different types of subsampling # self.flag = False self.flag_subsample = True if (func, prob) in self.idxhash: idx = self.idxhash[(func, prob)] # TODO: this is really where it needs to be changed. # update the coefficients of each function self.coeffs[idx] += coeff # also update the integer CGFs self.RDPs_int += self.cache[(func, prob)] * coeff else: # compute an easy to compute upper bound of it. def cgf(x): return x * func(x+1) def subsample_func_int(x): # This function evaluates teh CGF at alpha = x, i.e., lamb = x- 1 if np.isinf(func(x)): return np.inf mm = int(x) # fastbound = fast_poission_subsampled_cgf_upperbound(func, mm, prob) if x <= self.alphas[-1]: # compute the bound exactly. moments = [cgf(j-1) +j*np.log(prob) + (mm-j) * np.log(1-prob) + self.logBinomC[mm, j] for j in range(2,mm+1,1)] return utils.stable_logsumexp([(mm-1)*np.log(1-prob)+np.log(1+(mm-1)*prob)]+moments) elif mm <= self.m_lin_max: moments = [cgf(j-1) +j*np.log(prob) + (mm-j) * np.log(1-prob) + utils.logcomb(mm,j) for j in range(2,mm+1,1)] return utils.stable_logsumexp([(mm-1)*np.log(1-prob)+np.log(1+(mm-1)*prob)] + moments) else: return fastbound def subsample_func(x): # linear interpolation upper bound # This function implements the RDP at alpha = x if np.isinf(func(x)): return np.inf if prob == 1.0: return func(x) epsinf, tmp = subsample_epsdelta(func(np.inf),0,prob) if np.isinf(x): return epsinf if (x >= 1.0) and (x <= 2.0): return np.minimum(epsinf, subsample_func_int(2.0) / (2.0-1)) if np.equal(np.mod(x, 1), 0): return np.minimum(epsinf, subsample_func_int(x) / (x-1) ) xc = math.ceil(x) xf = math.floor(x) return np.minimum( epsinf, ((x-xf)*subsample_func_int(xc) + (1-(x-xf))*subsample_func_int(xf)) / (x-1) ) # book keeping self.idxhash[(func, prob)] = self.n # save the index self.n += 1 # increment the number of unique mechanisms self.coeffs.append(coeff) # Update the coefficient self.RDPs.append(subsample_func) # update the analytical functions # also update the integer results, with a vectorized computation. # TODO: pre-computing subsampled RDP for integers is error-prone (implement the same thing twice) # TODO: and its benefits are not clear. We should consider removing it and simply call the lambda function. # if (func,prob) in self.cache: results = self.cache[(func,prob)] else: results = np.zeros_like(self.RDPs_int, float) mm = np.max(self.alphas) # evaluate the RDP up to order mm jvec = np.arange(2, mm + 1) logterm3plus = np.zeros_like(results) # This saves everything from j=2 to j = m+1 for j in jvec: logterm3plus[j-2] = cgf(j-1) + j * np.log(prob) #- np.log(1-prob)) for alpha in range(2, mm+1): if np.isinf(logterm3plus[alpha-1]): results[alpha-1] = np.inf else: tmp = utils.stable_logsumexp(logterm3plus[0:alpha-1] + self.logBinomC[alpha , 2:(alpha + 1)] + (alpha+1-jvec[0:alpha-1])*np.log(1-prob)) results[alpha-1] = utils.stable_logsumexp_two((alpha-1)*np.log(1-prob) + np.log(1+(alpha-1)*prob), tmp) / (1.0*alpha-1) results[0] = results[1] # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy. self.cache[(func,prob)] = results # save in cache self.RDPs_int += results * coeff # update the pure DP tracker eps, delta = subsample_epsdelta(func(np.inf), 0, prob) self.RDP_inf += eps * coeff
def fast_subsampled_cgf_upperbound(func, mm, prob, deltas_local): # evaulate the fast CGF bound for the subsampled mechanism # func evaluates the RDP of the base mechanism # mm is alpha. NOT lambda. return np.inf if np.isinf(func(mm)): return np.inf if mm == 1: return 0 secondterm = np.minimum(np.minimum((2) * np.log(np.exp(func(np.inf)) - 1) + np.minimum(func(2), np.log(4)), np.log(2) + func(2)), np.log(4) + 0.5 * deltas_local[int(2 * np.floor(2 / 2.0)) - 1] + 0.5 * deltas_local[int(2 * np.ceil(2 / 2.0)) - 1] ) + 2 * np.log(prob) + np.log(mm) + np.log(mm - 1) - np.log(2) if mm == 2: return utils.stable_logsumexp([0, secondterm]) # approximate the remaining terms using a geometric series logratio1 = np.log(prob) + np.log(mm) + func(mm) logratio2 = logratio1 + np.log(np.exp(func(np.inf)) - 1) logratio = np.minimum(logratio1, logratio2) if logratio1 > logratio2: coeff = 1 else: coeff = 2 if mm == 3: return utils.stable_logsumexp([0, secondterm, np.log(coeff) + 3 * logratio]) # Calculate the sum of the geometric series starting from the third term. This is a total of mm-2 terms. if logratio < 0: geometric_series_bound = np.log(coeff) + 3 * logratio - np.log(1 - np.exp(logratio)) \ + np.log(1 - np.exp((mm - 2) * logratio)) elif logratio > 0: geometric_series_bound = np.log(coeff) + 3 * logratio + (mm-2) * logratio - np.log(np.exp(logratio) - 1) else: geometric_series_bound = np.log(coeff) + np.log(mm - 2) # we will approximate using (1+h)^mm logh1 = np.log(prob) + func(mm - 1) logh2 = logh1 + np.log(np.exp(func(np.inf)) - 1) binomial_series_bound1 = np.log(2) + mm * utils.stable_logsumexp_two(0, logh1) binomial_series_bound2 = mm * utils.stable_logsumexp_two(0, logh2) tmpsign, binomial_series_bound1 \ = utils.stable_sum_signed(True, binomial_series_bound1, False, np.log(2) + utils.stable_logsumexp([0, logh1 + np.log(mm), 2 * logh1 + np.log(mm) + np.log(mm - 1) - np.log(2)])) tmpsign, binomial_series_bound2 \ = utils.stable_sum_signed(True, binomial_series_bound2, False, utils.stable_logsumexp([0, logh2 + np.log(mm), 2 * logh2 + np.log(mm) + np.log(mm - 1) - np.log(2)])) remainder = np.min([geometric_series_bound, binomial_series_bound1, binomial_series_bound2]) return utils.stable_logsumexp([0, secondterm, remainder])
def subsample_func_int(x): # This function evaluates teh CGF at alpha = x, i.e., lamb = x- 1 deltas_local, signs_deltas_local = self.deltas_cache[( func, prob)] if np.isinf(func(x)): return np.inf mm = int(x) eps_inf = func(np.inf) moments_two = 2 * np.log(prob) + utils.logcomb(mm, 2) \ + np.minimum( np.log(4) + func(2.0) + np.log(1 - np.exp(-func(2.0))), func(2.0) + np.minimum(np.log(2), 2 * (eps_inf + np.log(1 - np.exp(-eps_inf))))) moment_bound = lambda j: np.minimum(np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1] + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1], np.minimum(j * (eps_inf + np.log(1 - np.exp(-eps_inf))), np.log(2)) + cgf(j - 1)) \ + j * np.log(prob) + utils.logcomb(mm, j) moment_bound_linear = lambda j: np.minimum(j * (eps_inf + np.log(1-np.exp(-eps_inf))), np.log(2)) + cgf(j - 1) \ + j * np.log(prob) + utils.logcomb(mm, j) fastupperbound = fast_subsampled_cgf_upperbound( func, mm, prob, deltas_local) if mm <= self.alphas[ -1]: # compute the bound exactly. Requires book keeping of O(x^2) # # moments = [ np.minimum(np.minimum((j)*np.log(np.exp(func(np.inf))-1) + np.minimum(cgf(j-1),np.log(4)), # np.log(2) + cgf(j-1)), # np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1] # + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1]) + j*np.log(prob) # +self.logBinomC[int(mm), j] for j in range(2,int(mm+1),1)] moments = [ moment_bound(j) for j in range(3, mm + 1, 1) ] if mm == 50: print('moments', moments) print( 'current alpha', mm, 'rdp', utils.stable_logsumexp([0, moments_two] + moments)) return np.minimum( fastupperbound, utils.stable_logsumexp([0, moments_two] + moments)) elif mm <= self.m_lin_max: # compute the bound with stirling approximation. Everything is O(x) now. # moment_bound = lambda j: np.minimum(j * np.log(np.exp(func(np.inf)) - 1) # + np.minimum(cgf(j - 1), np.log(4)), np.log(2) # + cgf(j - 1)) + j * np.log(prob) + utils.logcomb(mm, j) # moments = [moment_bound(j) for j in range(2,mm+1,1)] moments = [ moment_bound_linear(j) for j in range(3, mm + 1, 1) ] return np.minimum( fastupperbound, utils.stable_logsumexp([0, moments_two] + moments)) else: # Compute the O(1) upper bound return fastupperbound
def phi_subsample_gaussian_q(params, t, phi_min=False, phi_max=False, L=20, N=1e4): """ The phi-function of the privacy loss R.V. log(q)/log(p), i.e., phi(t) := E_q e^{t log(q)/log(p)}. We provide two approaches to approximate the phi-function. In the first approach, we provide valid upper and lower bounds by setting phi_max or phi_min to be True. In the second approach (both phi_min = False and phi_max = False), we compute the phi-function using Gaussian quadrature directly. We recommend the second approach as it's more efficient. Args: phi_min: if True, the function provide the lower bound approximation of delta(epsilon). phi_max: if True, the function provide the upper bound approximation of delta(epsilon). if not phi_min and not phi_max, the function provide gaussian quadrature approximation. gamma: the sampling ratio. sigma: the std of the noise divide by the l2 sensitivity. In the approximation (first approach), we first truncate the 1-dim output space to [-L, L] and then divide it into N points. L, N: used in Approach 1, truncates the output space into [-L, L] and discretize it into N bins. Returns: The phi-function evaluated at the t-th order. """ sigma = params['sigma'] gamma = params['gamma'] """ The qua function (used for Double Quadrature method) computes e^{it log(p)/log(q)}. Gaussian quadrature requires the integration interval is [-1, 1]. To integrate over [-inf, inf], we first convert y (the integral in Gaussian quadrature) to new_y. The privacy loss R.V. log(p/q) = -log(gamma * e^(2 * new_y - 1)/2 * sigma**2 + 1 -gamma) """ def qua(y): new_y = y * 1.0 / (1 - y**2) phi_result = -1.0 * utils.stable_logsumexp_two( np.log(gamma) + (2 * new_y - 1) / (2 * sigma**2), np.log(1 - gamma)) phi_result = np.exp(phi_result * 1.0j * t) inte_function = phi_result * np.exp(-new_y**2 / (2 * sigma**2)) return inte_function # inte_f implements the integraion over an infinite intervals. # int_-infty^infty f(x)dx = int_-1^1 f(y/1-y^2) * (1 + y**2) / ((1 - y ** 2) ** 2). inte_f = lambda y: qua(y) * (1 + y**2) / ((1 - y**2)**2) if not phi_max and not phi_min: # Double quadrature: res computes the phi-function using Gaussian quadrature. res = integrate.quadrature(inte_f, -1.0, 1.0, tol=1e-15, rtol=1e-15, maxiter=100) result = np.log(res[0]) - np.log(np.sqrt(2 * np.pi) * sigma) return result dx = 2.0 * L / N # discretisation interval \Delta x y = np.linspace(-L, L - dx, N, dtype=np.complex128) stable = -1.0 * utils.stable_logsumexp_two( np.log(gamma) + (2 * y - 1) / (2 * sigma**2), np.log(1 - gamma)) if phi_min: # return the left riemann stable stable = [ min(stable[max(i - 1, 0)], stable[i]) for i in range(len(stable)) ] elif phi_max: stable = [ max(stable[max(i - 1, 0)], stable[i]) for i in range(len(stable)) ] exp_term = 1.0j * t * stable result = utils.stable_logsumexp(exp_term - y**2 / (2 * sigma**2)) - np.log( np.sqrt(2 * np.pi) * sigma) new_result = result + np.log(dx) return new_result
def phi_subsample_gaussian_p(params, t, phi_min=False, phi_max=False, L=20, N=1e4): """ The log phi-function of the poisson subsample Gaussian mechanism. The phi-function of the subsample Gaussian is not symmetric. This function implements the log phi-function phi(t) := E_p e^{t log(p)/log(q)}. We provide two approaches to approximate phi-function. Approach 1: we provide valid upper and lower bounds by setting phi_max or phi_min to be True. This discretization-based approach will truncate and discretize the output space. Approach 2: (both phi_min = False and phi_max = False), we compute the phi-function using Gaussian quadrature directly. We recommend Args: phi_min: if True, the function provide the lower bound approximation of delta(epsilon). phi_max: if True, the function provide the upper bound approximation of delta(epsilon). if not phi_min and not phi_max, the function provide gaussian quadrature approximation. gamma: the sampling ratio. sigma: the std of the noise divide by the l2 sensitivity. L, N: used in Approach 1, truncates the output space into [-L, L] and discretize it into N bins. """ sigma = params['sigma'] gamma = params['gamma'] """ The qua function (used for Double Quadrature method) computes e^{it log(p)/log(q)} Gaussian quadrature requires the integration interval is [-1, 1]. To integrate over [-inf, inf], we first convert y (the integral in Gaussian quadrature) to new_y. The privacy loss R.V. log(p/q) = log(gamma * e^(2 * new_y - 1)/2 * sigma**2 + 1 -gamma) """ def qua(y): new_y = y * 1.0 / (1 - y**2) stable = utils.stable_logsumexp_two( np.log(gamma) + (2 * new_y - 1) / (2 * sigma**2), np.log(1 - gamma)) exp_term = np.exp(1.0j * stable * t) density_term = utils.stable_logsumexp_two( -new_y**2 / (2 * sigma**2) + np.log(1 - gamma), -(new_y - 1)**2 / (2 * sigma**2) + np.log(gamma)) inte_function = np.exp(density_term) * exp_term return inte_function # inte_f implements the integration over an infinite intervals. # int_-infty^infty f(x)dx = int_-1^1 f(y/(1-y^2)) * (1 + y**2) / ((1 - y ** 2) ** 2). inte_f = lambda y: qua(y) * (1 + y**2) / ((1 - y**2)**2) if not phi_min and not phi_max: # Double quadrature: res computes the phi-function using Gaussian quadrature. res = integrate.quadrature(inte_f, -1.0, 1.0, tol=1e-15, rtol=1e-15, maxiter=100) result = np.log(res[0]) - np.log(np.sqrt(2 * np.pi) * sigma) return result """ Return the lower and upper bound approximation """ N = int(N) dx = 2.0 * L / N y = np.linspace(-L, L - dx, N, dtype=np.complex128) # represent y stable = utils.stable_logsumexp_two( np.log(gamma) + (2 * y - 1) / (2 * sigma**2), np.log(1 - gamma)) if phi_min: # return the left riemann stable (lower bound of the privacy guarantee). stable = [ min(stable[max(i - 1, 0)], stable[i]) for i in range(len(stable)) ] elif phi_max: stable = [ max(stable[max(i - 1, 0)], stable[i]) for i in range(len(stable)) ] stable_1 = utils.stable_logsumexp(1.0j * stable * t - (y - 1)**2 / (2 * sigma**2)) + np.log(gamma) - np.log( np.sqrt(2 * np.pi) * sigma) stable_2 = utils.stable_logsumexp(1.0j * stable * t - y**2 / (2 * sigma**2)) + np.log( 1 - gamma) - np.log( np.sqrt(2 * np.pi) * sigma) p_y = utils.stable_logsumexp_two(stable_1, stable_2) result = p_y + np.log(dx) return result