def fast_poission_subsampled_cgf_upperbound(func, mm, prob): # evaulate the fast CGF bound for the subsampled mechanism # func evaluates the RDP of the base mechanism # mm is alpha. NOT lambda. if np.isinf(func(mm)): return np.inf if mm == 1: return 0 # Bound #1: log [ (1-\gamma + \gamma e^{func(mm)})^mm ] bound1 = mm * utils.stable_logsumexp_two(np.log(1-prob), np.log(prob) + func(mm)) # Bound #2: log [ (1-gamma)^alpha E [ 1 + gamma/(1-gamma) E[p/q]]^mm ] # log[ (1-gamma)^\alpha { 1 + alpha gamma / (1-gamma) + gamma^2 /(1-gamma)^2 * alpha(alpha-1) /2 e^eps(2)) # + alpha \choose 3 * gamma^3 / (1-gamma)^3 / e^(-2 eps(alpha)) * (1 + gamma /(1-gamma) e^{eps(alpha)}) ^ (alpha - 3) } # ] if mm >= 3: bound2 = utils.stable_logsumexp([mm * np.log(1-prob), (mm-1) * np.log(1-prob) + np.log(mm) + np.log(prob), (mm-2)*np.log(1-prob) + 2 * np.log(prob) + np.log(mm) + np.log(mm-1) + func(2), np.log(mm) + np.log(mm-1) + np.log(mm-2) - np.log(3*2) + 3 * np.log(prob) + (mm-3)*np.log(1-prob) + 2 * func(mm) + (mm-3) * utils.stable_logsumexp_two(0, np.log(prob) - np.log(1-prob) + func(mm))]) else: bound2 = bound1 #print('www={} func={} mm={}'.format(np.exp(func(mm))-1),func, mm) #print('bound1 ={} bound2 ={}'.format(bound1,bound2)) return np.minimum(bound1,bound2)
def fun(y): if y == 0: if x == 1: return 0 else: return np.inf elif y == 1: if x == 0: return 0 else: return np.inf diff1 = (utils.stable_logsumexp_two( alpha * np.log(x) + (1 - alpha) * np.log(1 - y), alpha * np.log(1 - x) + (1 - alpha) * np.log(y)) - rho * (alpha - 1)) diff2 = (utils.stable_logsumexp_two( alpha * np.log(y) + (1 - alpha) * np.log(1 - x), alpha * np.log(1 - y) + (1 - alpha) * np.log(x)) - rho * (alpha - 1)) if alpha > 1: return np.maximum(diff1, diff2) else: # alpha < 1 # Notice that the sign of the inequality is toggled return np.minimum(diff1, diff2)
def fast_k_subsample_upperbound(func, mm, prob, k): """ :param func: :param mm: :param prob: sample probability :param k: approximate term :return: k-term approximate upper bound in therorem 11 in ICML-19 """ def cgf(x): return (x - 1) * func(x) if np.isinf(func(mm)): return np.inf if mm == 1: return 0 #logBin = utils.get_binom_coeffs(mm) cur_k = np.minimum(k, mm - 1) if (2 * cur_k) >= mm: exact_term_1 = (mm - 1) * np.log(1 - prob) + np.log(mm * prob - prob + 1) exact_term_2 = [ np.log(scipy.special.comb(mm, l)) + (mm - l) * np.log(1 - prob) + l * np.log(prob) + cgf(l) for l in range(2, mm + 1) ] exact_term_2.append(exact_term_1) bound = utils.stable_logsumexp(exact_term_2) return bound s, mag1 = utils.stable_log_diff_exp(0, -func(mm - cur_k)) new_log_term_1 = np.log(1 - prob) * mm + mag1 new_log_term_2 = -func(mm - cur_k) + mm * utils.stable_logsumexp_two( np.log(1 - prob), np.log(prob) + func(mm - cur_k)) new_log_term_3 = [ np.log(scipy.special.comb(mm, l)) + (mm - l) * np.log(1 - prob) + l * np.log(prob) + utils.stable_log_diff_exp( (l - 1) * func(mm - cur_k), cgf(l))[1] for l in range(2, cur_k + 1) ] if len(new_log_term_3) > 0: new_log_term_3 = utils.stable_logsumexp(new_log_term_3) else: return utils.stable_logsumexp_two(new_log_term_1, new_log_term_2) new_log_term_4 = [ np.log(scipy.special.comb(mm, mm - l)) + (mm - l) * np.log(1 - prob) + l * np.log(prob) + utils.stable_log_diff_exp(cgf(l), (l - 1) * func(mm - cur_k))[1] for l in range(mm - cur_k + 1, mm + 1) ] new_log_term_4.append(new_log_term_1) new_log_term_4.append(new_log_term_2) new_log_term_4 = utils.stable_logsumexp(new_log_term_4) s, new_log_term_5 = utils.stable_log_diff_exp(new_log_term_4, new_log_term_3) new_bound = new_log_term_5 return new_bound
def qua(y): new_y = y * 1.0 / (1 - y**2) stable = utils.stable_logsumexp_two( np.log(gamma) + (2 * new_y - 1) / (2 * sigma**2), np.log(1 - gamma)) exp_term = np.exp(1.0j * stable * t) density_term = utils.stable_logsumexp_two( -new_y**2 / (2 * sigma**2) + np.log(1 - gamma), -(new_y - 1)**2 / (2 * sigma**2) + np.log(gamma)) inte_function = np.exp(density_term) * exp_term return inte_function
def fun(logx): if np.isneginf(logx): output = np.log(delta) - fun1(logx) return output, output else: log_neg_grad_l, log_neg_grad_h = fun2(logx) log_one_minus_f = fun1(logx) low = utils.stable_logsumexp_two( log_neg_grad_l + logx, np.log(delta)) - log_one_minus_f high = utils.stable_logsumexp_two( log_neg_grad_h + logx, np.log(delta)) - log_one_minus_f return low, high
def RDP_independent_noisy_screen(params, alpha): """ return the approximation of data-independent RDP of ``Noisy Screening" (Theorem 7 in Private-kNN) The exact data-independent bound requires searching a max_count from [k/c, k] to maximize RDP_noisy_screening for any alpha """ threshold = params['thresh'] k = params['k'] sigma = params['sigma'] import scipy.stats rdp = [] for adjacent in [+1, -1]: for max_count in [int(k / 10), threshold]: logp = scipy.stats.norm.logsf(threshold - max_count, scale=sigma) logq = scipy.stats.norm.logsf(threshold - max_count + adjacent, scale=sigma) log1q = _log1mexp(logq) log1p = _log1mexp(logp) if alpha == 1: return np.exp(logp) * (logp - logq) + (1 - np.exp(logp)) * ( log1p - log1q) elif np.isinf(alpha): return np.abs(np.exp(logp - logq) * 1.0) term1 = alpha * logp - (alpha - 1) * logq term2 = alpha * log1p - (alpha - 1) * log1q log_term = utils.stable_logsumexp_two(term1, term2) rdp.append(log_term) log_term = np.max(log_term) return 1.0 * log_term / (alpha - 1)
def RDP_gaussian_svt_c1(params, alpha): """ The RDP of the gaussian-based SVT with the cut-off parameter c=1. The detailed algorithm is described in Theorem 8 in https://papers.nips.cc/paper/2020/file/e9bf14a419d77534105016f5ec122d62-Paper.pdf/ Args: k:the maximum length before svt stops sigma: noise added to the threshold. c: the cut-off parameter in SVT. """ sigma = params['sigma'] k = params['k'] margin = params['margin'] c = 1 rdp_rho = 0.5 / (sigma**2) * alpha ret_rdp = np.log(k) / (alpha - 1) + rdp_rho * 2 if alpha == 1: return ret_rdp * c ################ Implement corollary 15 in NeurIPS-20 inside_part = np.log(2 * np.sqrt(3) * math.pi * (1 + 9 * margin**2 / (sigma**2))) moment_term = utils.stable_logsumexp_two( 0, inside_part + margin**2 * 1.0 / (sigma**2)) moment_term = moment_term / (2.0 * (alpha - 1)) moment_based = moment_term + rdp_rho * 2 return min(moment_based, ret_rdp) * c
def RDP_gaussian_svt_c1(params, alpha): """ This is for gaussian-svt with c=1 k is the maximum length before svt stops :param params: :param alpha: :return: """ sigma = params['sigma'] k = params['k'] margin = params['margin'] c = 1 rdp_rho = 0.5 / (sigma**2) * alpha ret_rdp = np.log(k) / (alpha - 1) + rdp_rho * 2 if alpha == 1: return ret_rdp * c ################ Implement corollary 15 in NeurIPS-20 inside_part = np.log(2 * np.sqrt(3) * math.pi * (1 + 9 * margin**2 / (sigma**2))) moment_term = utils.stable_logsumexp_two( 0, inside_part + margin**2 * 1.0 / (sigma**2)) moment_term = moment_term / (2.0 * (alpha - 1)) moment_based = moment_term + rdp_rho * 2 return min(moment_based, ret_rdp) * c
def qua(y): new_y = y * 1.0 / (1 - y**2) phi_result = -1.0 * utils.stable_logsumexp_two( np.log(gamma) + (2 * new_y - 1) / (2 * sigma**2), np.log(1 - gamma)) phi_result = np.exp(phi_result * 1.0j * t) inte_function = phi_result * np.exp(-new_y**2 / (2 * sigma**2)) return inte_function
def phi_laplace(params, t): """ The closed-form log phi-function fof Laplace mechanism. Args: t: the order of the characteristic function. b: the parameter for Laplace mechanism. Return: The log phi-function of Laplace mechanism. """ b = params['b'] term_1 = 1.j * t / b term_2 = -(1.j * t + 1) / b result = utils.stable_logsumexp_two( utils.stable_logsumexp_two(0, np.log(1. / (2 * t * 1.j + 1))) + term_1, np.log(2 * t * 1.j) - np.log(2 * t * 1.j + 1) + term_2) return result + np.log(0.5)
def rdp_int(x): if x == np.inf: return eps s, mag = utils.stable_log_diff_exp(eps,0) s, mag2 = utils.stable_log_diff_exp(eps2,0) s, mag3 = utils.stable_log_diff_exp(x*utils.stable_logsumexp_two(np.log(1-prob),np.log(prob)+eps), np.log(x) + np.log(prob) + mag) s, mag4 = utils.stable_log_diff_exp(mag3, np.log(1.0*x/2)+np.log(x-1)+2*np.log(prob) + np.log( np.exp(2*mag) - np.exp(np.min([mag,2*mag,mag2])))) return 1/(x-1)*mag4
def general_upperbound(func, mm, prob): """ :param func: :param mm: alpha in RDP :param prob: sample probability :return: the upperbound in theorem 1 in 2019 ICML,could be applied for general case(including poisson distribution) k_approx = 100 k approximation is applied here """ def cgf(x): return (x - 1) * func(x) if np.isinf(func(mm)): return np.inf if mm == 1 or mm == 0: return 0 cur_k = np.minimum( 50, mm - 1 ) # choose small k-approx for general upperbound (here is 50) in case of scipy-accuracy log_term_1 = mm * np.log(1 - prob) #logBin = utils.get_binom_coeffs(mm) log_term_2 = np.log(3) - func(mm) + mm * utils.stable_logsumexp_two( np.log(1 - prob), np.log(prob) + func(mm)) neg_term_3 = [ np.log(scipy.special.comb(mm, l)) + np.log(3) + (mm - l) * np.log(1 - prob) + l * np.log(prob) + utils.stable_log_diff_exp((l - 1) * func(mm), cgf(l))[1] for l in range(3, cur_k + 1) ] neg_term_4 = np.log(mm * (mm - 1) / 2) + 2 * np.log(prob) + ( mm - 2) * np.log(1 - prob) + utils.stable_log_diff_exp( np.log(3) + func(mm), func(2))[1] neg_term_5 = np.log(2) + np.log(prob) + np.log(mm) + (mm - 1) * np.log(1 - prob) neg_term_6 = mm * np.log(1 - prob) + np.log(3) - func(mm) pos_term = utils.stable_logsumexp([log_term_1, log_term_2]) neg_term_3.append(neg_term_4) neg_term_3.append(neg_term_5) neg_term_3.append(neg_term_6) neg_term = utils.stable_logsumexp(neg_term_3) bound = utils.stable_log_diff_exp(pos_term, neg_term)[1] return bound
def RDP_laplace(params, alpha): """ :param params: 'b' --- is the is the ratio of the scale parameter and L1 sensitivity :param alpha: The order of the Renyi Divergence :return: Evaluation of the RDP's epsilon """ b = params['b'] # assert(b > 0) # assert(alpha >= 0) alpha=1.0*alpha if alpha <= 1: return (1 / b + np.exp(-1 / b) - 1) elif np.isinf(alpha): return 1/b else: # alpha > 1 return utils.stable_logsumexp_two((alpha-1.0) / b + np.log(alpha / (2.0 * alpha - 1)), -1.0*alpha / b + np.log((alpha-1.0) / (2.0 * alpha - 1)))/(alpha-1)
def fun(x): # the input the RDP's \alpha if x <= 1: return np.inf else: if naive: return np.log(1 / delta) / (x - 1) + rdp(x) bbghs = np.maximum( rdp(x) + np.log((x - 1) / x) - (np.log(delta) + np.log(x)) / (x - 1), 0) """ The following is for optimal conversion 1/(alpha -1 )log(e^{(alpha-1)*rdp -1}/(alpha*delta) +1 ) """ sign, term_1 = utils.stable_log_diff_exp( (x - 1) * rdp(x), 0) result = utils.stable_logsumexp_two( term_1 - np.log(x) - np.log(delta), 0) return min(result * 1.0 / (x - 1), bbghs)
def RDP_randresponse(params, alpha): """ :param params: 'p' --- is the Bernoulli probability p of outputting the truth :param alpha: The order of the Renyi Divergence :return: Evaluation of the RDP's epsilon """ p = params['p'] assert((p >= 0) and (p <= 1)) # assert(alpha >= 0) if p == 1 or p == 0: return np.inf if alpha <= 1: return (2 * p - 1) * np.log(p / (1 - p)) elif np.isinf(alpha): return np.abs(np.log((1.0*p/(1-p)))) else: # alpha > 1 return utils.stable_logsumexp_two(alpha * np.log(p) + (1 - alpha) * np.log(1 - p), alpha * np.log(1 - p) + (1 - alpha) * np.log(p))/(alpha-1)
def RDP_noisy_screen(params, alpha): """ return the data-dependent RDP of ``Noisy Screening" (Theorem 7 in Private-kNN) param logp, logq: the log of p and q, where p is probability of P(max_vote + noise > Threshold) """ logp = params['logp'] logq = params['logq'] log1q = _log1mexp(logq) log1p = _log1mexp(logp) if alpha == 1: return np.exp(logp) * (logp - logq) + (1 - np.exp(logp)) * (log1p - log1q) elif np.isinf(alpha): return np.abs(np.exp(logp - logq) * 1.0) term1 = alpha * logp - (alpha - 1) * logq term2 = alpha * log1p - (alpha - 1) * log1q log_term = utils.stable_logsumexp_two(term1, term2) return 1.0 * log_term / (alpha - 1)
def RDP_independent_noisy_screen(params, alpha): """ The data-independent RDP of ``Noisy Screening" (Theorem 7 in Private-kNN). The method is described in https://openaccess.thecvf.com/content_CVPR_2020/html/Zhu_Private-kNN_Practical_Differential_Privacy_for_Computer_Vision_CVPR_2020_paper.html) The exact data-independent bound requires searching a max_count from [k/c, k] to maximize RDP_noisy_screening for any alpha Args: params: contains three parameters. params['thresh'] is the threshold for noisy screening, k is the number of neighbors in Private-kNN, sigma is the noisy scale. Returns: The RDP of data-independent noisy screening. """ threshold = params['thresh'] k = params['k'] sigma = params['sigma'] import scipy.stats rdp = [] for adjacent in [+1, -1]: for max_count in [int(k / 10), threshold]: logp = scipy.stats.norm.logsf(threshold - max_count, scale=sigma) logq = scipy.stats.norm.logsf(threshold - max_count + adjacent, scale=sigma) log1q = _log1mexp(logq) log1p = _log1mexp(logp) if alpha == 1: return np.exp(logp) * (logp - logq) + (1 - np.exp(logp)) * ( log1p - log1q) elif np.isinf(alpha): return np.abs(np.exp(logp - logq) * 1.0) term1 = alpha * logp - (alpha - 1) * logq term2 = alpha * log1p - (alpha - 1) * log1q log_term = utils.stable_logsumexp_two(term1, term2) rdp.append(log_term) log_term = np.max(log_term) return 1.0 * log_term / (alpha - 1)
def RDP_svt_laplace(params, alpha): """ The RDP of Laplace-based SVT. Args: params['b']: the Laplace noise scale divide the sensitivity. params['k']: the SVT algorithm stops either k queries is achieved or the cut-off c is achieved. """ b = params['b'] k = params[ 'k'] # the algorithm stops either k is achieved or c is achieved c = max(params['c'], 1) alpha = 1.0 * alpha if alpha <= 1: eps_1 = (1 / b + np.exp(-1 / b) - 1) elif np.isinf(alpha): eps_1 = 1 / b else: # alpha > 1 eps_1 = utils.stable_logsumexp_two( (alpha - 1.0) / b + np.log(alpha / (2.0 * alpha - 1)), -1.0 * alpha / b + np.log( (alpha - 1.0) / (2.0 * alpha - 1))) / (alpha - 1) eps_2 = 1 / b # infinity rdp on nu c_log_n_c = c * np.log(k / c) tilde_eps = eps_2 * (c + 1) # eps_infinity ret_rdp = min(c * eps_2 + eps_1, c_log_n_c * 1.0 / (alpha - 1) + eps_1 * (c + 1)) ret_rdp = min(ret_rdp, 0.5 * alpha * tilde_eps**2) if np.isinf(alpha) or alpha == 1: return ret_rdp # The following is sinh-based method tilde_eps = eps_2 * (c + 1) cdp_bound = np.sinh(alpha * tilde_eps) - np.sinh((alpha - 1) * tilde_eps) cdp_bound = cdp_bound / np.sinh(tilde_eps) cdp_bound = 1.0 / (alpha - 1) * np.log(cdp_bound) return min(ret_rdp, cdp_bound)
def RDP_svt_laplace(params, alpha): """ Laplace-SVT (via RDP), used in NeurIPS-20 :param b is the noise scale for rho :param params: :param alpha: :return: """ b = params['b'] k = params[ 'k'] # the algorithm stops either k is achieved or c is achieved c = max(params['c'], 1) alpha = 1.0 * alpha if alpha <= 1: eps_1 = (1 / b + np.exp(-1 / b) - 1) elif np.isinf(alpha): eps_1 = 1 / b else: # alpha > 1 eps_1 = utils.stable_logsumexp_two( (alpha - 1.0) / b + np.log(alpha / (2.0 * alpha - 1)), -1.0 * alpha / b + np.log( (alpha - 1.0) / (2.0 * alpha - 1))) / (alpha - 1) eps_2 = 1 / b # infinity rdp on nu c_log_n_c = c * np.log(k / c) tilde_eps = eps_2 * (c + 1) # eps_infinity ret_rdp = min(c * eps_2 + eps_1, c_log_n_c * 1.0 / (alpha - 1) + eps_1 * (c + 1)) ret_rdp = min(ret_rdp, 0.5 * alpha * tilde_eps**2) if np.isinf(alpha) or alpha == 1: return ret_rdp # The following is sinh-based method tilde_eps = eps_2 * (c + 1) cdp_bound = np.sinh(alpha * tilde_eps) - np.sinh((alpha - 1) * tilde_eps) cdp_bound = cdp_bound / np.sinh(tilde_eps) cdp_bound = 1.0 / (alpha - 1) * np.log(cdp_bound) return min(ret_rdp, cdp_bound)
def diff2_general(logx, u): return (utils.stable_logsumexp_two( alpha * np.log(1 - np.exp(u)) + (1 - alpha) * np.log(1 - np.exp(logx)), alpha * u + (1 - alpha) * logx) - rho * (alpha - 1))
def phi_subsample_gaussian_p(params, t, phi_min=False, phi_max=False, L=20, N=1e4): """ The log phi-function of the poisson subsample Gaussian mechanism. The phi-function of the subsample Gaussian is not symmetric. This function implements the log phi-function phi(t) := E_p e^{t log(p)/log(q)}. We provide two approaches to approximate phi-function. Approach 1: we provide valid upper and lower bounds by setting phi_max or phi_min to be True. This discretization-based approach will truncate and discretize the output space. Approach 2: (both phi_min = False and phi_max = False), we compute the phi-function using Gaussian quadrature directly. We recommend Args: phi_min: if True, the function provide the lower bound approximation of delta(epsilon). phi_max: if True, the function provide the upper bound approximation of delta(epsilon). if not phi_min and not phi_max, the function provide gaussian quadrature approximation. gamma: the sampling ratio. sigma: the std of the noise divide by the l2 sensitivity. L, N: used in Approach 1, truncates the output space into [-L, L] and discretize it into N bins. """ sigma = params['sigma'] gamma = params['gamma'] """ The qua function (used for Double Quadrature method) computes e^{it log(p)/log(q)} Gaussian quadrature requires the integration interval is [-1, 1]. To integrate over [-inf, inf], we first convert y (the integral in Gaussian quadrature) to new_y. The privacy loss R.V. log(p/q) = log(gamma * e^(2 * new_y - 1)/2 * sigma**2 + 1 -gamma) """ def qua(y): new_y = y * 1.0 / (1 - y**2) stable = utils.stable_logsumexp_two( np.log(gamma) + (2 * new_y - 1) / (2 * sigma**2), np.log(1 - gamma)) exp_term = np.exp(1.0j * stable * t) density_term = utils.stable_logsumexp_two( -new_y**2 / (2 * sigma**2) + np.log(1 - gamma), -(new_y - 1)**2 / (2 * sigma**2) + np.log(gamma)) inte_function = np.exp(density_term) * exp_term return inte_function # inte_f implements the integration over an infinite intervals. # int_-infty^infty f(x)dx = int_-1^1 f(y/(1-y^2)) * (1 + y**2) / ((1 - y ** 2) ** 2). inte_f = lambda y: qua(y) * (1 + y**2) / ((1 - y**2)**2) if not phi_min and not phi_max: # Double quadrature: res computes the phi-function using Gaussian quadrature. res = integrate.quadrature(inte_f, -1.0, 1.0, tol=1e-15, rtol=1e-15, maxiter=100) result = np.log(res[0]) - np.log(np.sqrt(2 * np.pi) * sigma) return result """ Return the lower and upper bound approximation """ N = int(N) dx = 2.0 * L / N y = np.linspace(-L, L - dx, N, dtype=np.complex128) # represent y stable = utils.stable_logsumexp_two( np.log(gamma) + (2 * y - 1) / (2 * sigma**2), np.log(1 - gamma)) if phi_min: # return the left riemann stable (lower bound of the privacy guarantee). stable = [ min(stable[max(i - 1, 0)], stable[i]) for i in range(len(stable)) ] elif phi_max: stable = [ max(stable[max(i - 1, 0)], stable[i]) for i in range(len(stable)) ] stable_1 = utils.stable_logsumexp(1.0j * stable * t - (y - 1)**2 / (2 * sigma**2)) + np.log(gamma) - np.log( np.sqrt(2 * np.pi) * sigma) stable_2 = utils.stable_logsumexp(1.0j * stable * t - y**2 / (2 * sigma**2)) + np.log( 1 - gamma) - np.log( np.sqrt(2 * np.pi) * sigma) p_y = utils.stable_logsumexp_two(stable_1, stable_2) result = p_y + np.log(dx) return result
def phi_subsample_gaussian_q(params, t, phi_min=False, phi_max=False, L=20, N=1e4): """ The phi-function of the privacy loss R.V. log(q)/log(p), i.e., phi(t) := E_q e^{t log(q)/log(p)}. We provide two approaches to approximate the phi-function. In the first approach, we provide valid upper and lower bounds by setting phi_max or phi_min to be True. In the second approach (both phi_min = False and phi_max = False), we compute the phi-function using Gaussian quadrature directly. We recommend the second approach as it's more efficient. Args: phi_min: if True, the function provide the lower bound approximation of delta(epsilon). phi_max: if True, the function provide the upper bound approximation of delta(epsilon). if not phi_min and not phi_max, the function provide gaussian quadrature approximation. gamma: the sampling ratio. sigma: the std of the noise divide by the l2 sensitivity. In the approximation (first approach), we first truncate the 1-dim output space to [-L, L] and then divide it into N points. L, N: used in Approach 1, truncates the output space into [-L, L] and discretize it into N bins. Returns: The phi-function evaluated at the t-th order. """ sigma = params['sigma'] gamma = params['gamma'] """ The qua function (used for Double Quadrature method) computes e^{it log(p)/log(q)}. Gaussian quadrature requires the integration interval is [-1, 1]. To integrate over [-inf, inf], we first convert y (the integral in Gaussian quadrature) to new_y. The privacy loss R.V. log(p/q) = -log(gamma * e^(2 * new_y - 1)/2 * sigma**2 + 1 -gamma) """ def qua(y): new_y = y * 1.0 / (1 - y**2) phi_result = -1.0 * utils.stable_logsumexp_two( np.log(gamma) + (2 * new_y - 1) / (2 * sigma**2), np.log(1 - gamma)) phi_result = np.exp(phi_result * 1.0j * t) inte_function = phi_result * np.exp(-new_y**2 / (2 * sigma**2)) return inte_function # inte_f implements the integraion over an infinite intervals. # int_-infty^infty f(x)dx = int_-1^1 f(y/1-y^2) * (1 + y**2) / ((1 - y ** 2) ** 2). inte_f = lambda y: qua(y) * (1 + y**2) / ((1 - y**2)**2) if not phi_max and not phi_min: # Double quadrature: res computes the phi-function using Gaussian quadrature. res = integrate.quadrature(inte_f, -1.0, 1.0, tol=1e-15, rtol=1e-15, maxiter=100) result = np.log(res[0]) - np.log(np.sqrt(2 * np.pi) * sigma) return result dx = 2.0 * L / N # discretisation interval \Delta x y = np.linspace(-L, L - dx, N, dtype=np.complex128) stable = -1.0 * utils.stable_logsumexp_two( np.log(gamma) + (2 * y - 1) / (2 * sigma**2), np.log(1 - gamma)) if phi_min: # return the left riemann stable stable = [ min(stable[max(i - 1, 0)], stable[i]) for i in range(len(stable)) ] elif phi_max: stable = [ max(stable[max(i - 1, 0)], stable[i]) for i in range(len(stable)) ] exp_term = 1.0j * t * stable result = utils.stable_logsumexp(exp_term - y**2 / (2 * sigma**2)) - np.log( np.sqrt(2 * np.pi) * sigma) new_result = result + np.log(dx) return new_result
def compose_poisson_subsampled_mechanisms(self, func, prob, coeff=1.0): # This function implements the lower bound for subsampled RDP. # It is also the exact formula of poission_subsampled RDP for many mechanisms including Gaussian mech. # # At the moment, we do not support mixing poisson subsampling and standard subsampling. # TODO: modify the caching identifies so that we can distinguish different types of subsampling # self.flag = False self.flag_subsample = True if (func, prob) in self.idxhash: idx = self.idxhash[(func, prob)] # TODO: this is really where it needs to be changed. # update the coefficients of each function self.coeffs[idx] += coeff # also update the integer CGFs self.RDPs_int += self.cache[(func, prob)] * coeff else: # compute an easy to compute upper bound of it. def cgf(x): return x * func(x+1) def subsample_func_int(x): # This function evaluates teh CGF at alpha = x, i.e., lamb = x- 1 if np.isinf(func(x)): return np.inf mm = int(x) # fastbound = fast_poission_subsampled_cgf_upperbound(func, mm, prob) if x <= self.alphas[-1]: # compute the bound exactly. moments = [cgf(j-1) +j*np.log(prob) + (mm-j) * np.log(1-prob) + self.logBinomC[mm, j] for j in range(2,mm+1,1)] return utils.stable_logsumexp([(mm-1)*np.log(1-prob)+np.log(1+(mm-1)*prob)]+moments) elif mm <= self.m_lin_max: moments = [cgf(j-1) +j*np.log(prob) + (mm-j) * np.log(1-prob) + utils.logcomb(mm,j) for j in range(2,mm+1,1)] return utils.stable_logsumexp([(mm-1)*np.log(1-prob)+np.log(1+(mm-1)*prob)] + moments) else: return fastbound def subsample_func(x): # linear interpolation upper bound # This function implements the RDP at alpha = x if np.isinf(func(x)): return np.inf if prob == 1.0: return func(x) epsinf, tmp = subsample_epsdelta(func(np.inf),0,prob) if np.isinf(x): return epsinf if (x >= 1.0) and (x <= 2.0): return np.minimum(epsinf, subsample_func_int(2.0) / (2.0-1)) if np.equal(np.mod(x, 1), 0): return np.minimum(epsinf, subsample_func_int(x) / (x-1) ) xc = math.ceil(x) xf = math.floor(x) return np.minimum( epsinf, ((x-xf)*subsample_func_int(xc) + (1-(x-xf))*subsample_func_int(xf)) / (x-1) ) # book keeping self.idxhash[(func, prob)] = self.n # save the index self.n += 1 # increment the number of unique mechanisms self.coeffs.append(coeff) # Update the coefficient self.RDPs.append(subsample_func) # update the analytical functions # also update the integer results, with a vectorized computation. # TODO: pre-computing subsampled RDP for integers is error-prone (implement the same thing twice) # TODO: and its benefits are not clear. We should consider removing it and simply call the lambda function. # if (func,prob) in self.cache: results = self.cache[(func,prob)] else: results = np.zeros_like(self.RDPs_int, float) mm = np.max(self.alphas) # evaluate the RDP up to order mm jvec = np.arange(2, mm + 1) logterm3plus = np.zeros_like(results) # This saves everything from j=2 to j = m+1 for j in jvec: logterm3plus[j-2] = cgf(j-1) + j * np.log(prob) #- np.log(1-prob)) for alpha in range(2, mm+1): if np.isinf(logterm3plus[alpha-1]): results[alpha-1] = np.inf else: tmp = utils.stable_logsumexp(logterm3plus[0:alpha-1] + self.logBinomC[alpha , 2:(alpha + 1)] + (alpha+1-jvec[0:alpha-1])*np.log(1-prob)) results[alpha-1] = utils.stable_logsumexp_two((alpha-1)*np.log(1-prob) + np.log(1+(alpha-1)*prob), tmp) / (1.0*alpha-1) results[0] = results[1] # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy. self.cache[(func,prob)] = results # save in cache self.RDPs_int += results * coeff # update the pure DP tracker eps, delta = subsample_epsdelta(func(np.inf), 0, prob) self.RDP_inf += eps * coeff
def fast_subsampled_cgf_upperbound(func, mm, prob, deltas_local): # evaulate the fast CGF bound for the subsampled mechanism # func evaluates the RDP of the base mechanism # mm is alpha. NOT lambda. return np.inf if np.isinf(func(mm)): return np.inf if mm == 1: return 0 secondterm = np.minimum(np.minimum((2) * np.log(np.exp(func(np.inf)) - 1) + np.minimum(func(2), np.log(4)), np.log(2) + func(2)), np.log(4) + 0.5 * deltas_local[int(2 * np.floor(2 / 2.0)) - 1] + 0.5 * deltas_local[int(2 * np.ceil(2 / 2.0)) - 1] ) + 2 * np.log(prob) + np.log(mm) + np.log(mm - 1) - np.log(2) if mm == 2: return utils.stable_logsumexp([0, secondterm]) # approximate the remaining terms using a geometric series logratio1 = np.log(prob) + np.log(mm) + func(mm) logratio2 = logratio1 + np.log(np.exp(func(np.inf)) - 1) logratio = np.minimum(logratio1, logratio2) if logratio1 > logratio2: coeff = 1 else: coeff = 2 if mm == 3: return utils.stable_logsumexp([0, secondterm, np.log(coeff) + 3 * logratio]) # Calculate the sum of the geometric series starting from the third term. This is a total of mm-2 terms. if logratio < 0: geometric_series_bound = np.log(coeff) + 3 * logratio - np.log(1 - np.exp(logratio)) \ + np.log(1 - np.exp((mm - 2) * logratio)) elif logratio > 0: geometric_series_bound = np.log(coeff) + 3 * logratio + (mm-2) * logratio - np.log(np.exp(logratio) - 1) else: geometric_series_bound = np.log(coeff) + np.log(mm - 2) # we will approximate using (1+h)^mm logh1 = np.log(prob) + func(mm - 1) logh2 = logh1 + np.log(np.exp(func(np.inf)) - 1) binomial_series_bound1 = np.log(2) + mm * utils.stable_logsumexp_two(0, logh1) binomial_series_bound2 = mm * utils.stable_logsumexp_two(0, logh2) tmpsign, binomial_series_bound1 \ = utils.stable_sum_signed(True, binomial_series_bound1, False, np.log(2) + utils.stable_logsumexp([0, logh1 + np.log(mm), 2 * logh1 + np.log(mm) + np.log(mm - 1) - np.log(2)])) tmpsign, binomial_series_bound2 \ = utils.stable_sum_signed(True, binomial_series_bound2, False, utils.stable_logsumexp([0, logh2 + np.log(mm), 2 * logh2 + np.log(mm) + np.log(mm - 1) - np.log(2)])) remainder = np.min([geometric_series_bound, binomial_series_bound1, binomial_series_bound2]) return utils.stable_logsumexp([0, secondterm, remainder])
def RDP_depend_pate_gaussian(params, alpha): """ Return the data-dependent RDP of GNMAX (proposed in PATE2) Bounds RDP from above of GNMax given an upper bound on q (Theorem 6). Args: logq: Natural logarithm of the probability of a non-argmax outcome. sigma: Standard deviation of Gaussian noise. orders: An array_like list of Renyi orders. Returns: Upper bound on RPD for all orders. A scalar if orders is a scalar. Raises: ValueError: If the input is malformed. """ logq = params['logq'] sigma = params['sigma'] if alpha == 1: p = np.exp(logq) w = (2 * p - 1) * (logq - _log1mexp(logq)) return w if logq > 0 or sigma < 0 or np.any(alpha < 1): # not defined for alpha=1 raise ValueError("Inputs are malformed.") if np.isneginf(logq): # If the mechanism's output is fixed, it has 0-DP. print('isneginf', logq) if np.isscalar(alpha): return 0. else: return np.full_like(alpha, 0., dtype=np.float) variance = sigma**2 # Use two different higher orders: mu_hi1 and mu_hi2 computed according to # Proposition 10. mu_hi2 = math.sqrt(variance * -logq) mu_hi1 = mu_hi2 + 1 orders_vec = np.atleast_1d(alpha) ret = orders_vec / variance # baseline: data-independent bound # Filter out entries where data-dependent bound does not apply. mask = np.logical_and(mu_hi1 > orders_vec, mu_hi2 > 1) rdp_hi1 = mu_hi1 / variance rdp_hi2 = mu_hi2 / variance log_a2 = (mu_hi2 - 1) * rdp_hi2 # Make sure q is in the increasing wrt q range and A is positive. if (np.any(mask) and logq <= log_a2 - mu_hi2 * (math.log(1 + 1 / (mu_hi1 - 1)) + math.log(1 + 1 / (mu_hi2 - 1))) and -logq > rdp_hi2): # Use log1p(x) = log(1 + x) to avoid catastrophic cancellations when x ~ 0. log1q = _log1mexp(logq) # log1q = log(1-q) log_a = (alpha - 1) * (log1q - _log1mexp( (logq + rdp_hi2) * (1 - 1 / mu_hi2))) log_b = (alpha - 1) * (rdp_hi1 - logq / (mu_hi1 - 1)) # Use logaddexp(x, y) = log(e^x + e^y) to avoid overflow for large x, y. log_s1 = utils.stable_logsumexp_two(log1q + log_a, logq + log_b) log_s = np.logaddexp(log1q + log_a, logq + log_b) ret[mask] = np.minimum(ret, log_s / (alpha - 1))[mask] # print('alpha ={} mask {}'.format(alpha,ret)) if ret[mask] < 0: print('negative ret', ret) print('log_s1 ={} log_s = {}'.format(log_s1, log_s)) print('alpha = {} mu_hi1 ={}'.format(alpha, mu_hi1)) print('log1q = {} log_a = {} log_b={} log_s = {}'.format( log1q, log_a, log_b, log_s)) ret[mask] = 1. / (sigma**2) * alpha # print('replace ret with', ret) assert np.all(ret >= 0) if np.isscalar(alpha): return np.asscalar(ret) else: return ret