Exemple #1
0
    def get_eps(self, delta): # minimize over \lambda
        if not self.flag:
            self.build_zeroth_oracle()
            self.flag = True

        if delta<0 or delta > 1:
            print("Error! delta is a probability and must be between 0 and 1")
        if delta == 0:
            return self.RDP_inf
        else:
            def fun(x): # the input the RDP's \alpha
                if x <= 1:
                    return np.inf
                else:
                    return np.log(1 / delta)/(x-1) + self.evalRDP(x)

            def fun_int(i): # the input is RDP's \alpha in integer
                if i <= 1 | i >= len(self.RDPs_int):
                    return np.inf
                else:
                    return np.log(1 / delta) / (i-1) + self.RDPs_int[i - 1]


            # When do we have computational constraints?
            # Only when we have subsampled items.

            # First check if the forward difference is positive at self.m, or if it is infinite
            while (self.m<self.m_max) and (not np.isposinf(fun(self.m))) and (fun_int(self.m-1)-fun_int(self.m-2) < 0):
                # If so, double m, expand logBimomC until the forward difference is positive


                if self.flag_subsample:

                    # The following line is m^2 time.
                    self.logBinomC = utils.get_binom_coeffs(self.m*2+1)

                    # Update deltas_caches
                    for key, val in self.deltas_cache.items():
                        if type(key) is tuple:
                            func_tmp = key[0]
                        else:
                            func_tmp = key
                        cgf = lambda x:  x*func_tmp(x+1)
                        deltas,signs_deltas = utils.get_forward_diffs(cgf,self.m*2)

                        self.deltas_cache[key] = [deltas, signs_deltas]

                new_alphas = range(self.m + 1, self.m * 2 + 1, 1)
                self.alphas = np.concatenate((self.alphas, np.array(new_alphas)))  # array of integers
                self.m = self.m * 2

                mm = np.max(self.alphas)

                rdp_int_new = np.zeros_like(self.alphas, float)

                for key,val in self.cache.items():
                    idx = self.idxhash[key]
                    rdp = self.RDPs[idx]
                    newarray = np.zeros_like(self.alphas, float)
                    for j in range(2,mm+1,1):
                        newarray[j-1] = rdp(1.0*j)
                    newarray[0]=newarray[1]
                    coeff = self.coeffs[idx]
                    rdp_int_new += newarray * coeff
                    self.cache[key] = newarray

                self.RDPs_int = rdp_int_new

                # # update the integer CGF and the cache for each function
                # rdp_int_new = np.zeros_like(self.RDPs_int)
                # for key,val in self.cache.items():
                #     idx = self.idxhash[key]
                #     rdp = self.RDPs[idx]
                #     newarray = np.zeros_like(self.RDPs_int)
                #     for j in range(self.m):
                #         newarray[j] = rdp(1.0*(j+self.m+1))
                #
                #     coeff = self.coeffs[idx]
                #     rdp_int_new += newarray * coeff
                #     self.cache[key] = np.concatenate((val, newarray))
                #
                # # update the corresponding quantities
                # self.RDPs_int = np.concatenate((self.RDPs_int, rdp_int_new))

                #self.m = self.m*2

            bestint = np.argmin(np.log(1 / delta)/(self.alphas[1:]-1) + self.RDPs_int[1:]) + 1

            if bestint == self.m-1:
                if self.verbose:
                    print('Warning: Reach quadratic upper bound: m_max.')
                # In this case, we matches the maximum qudaratic upper bound
                # Fix it by calling O(1) upper bounds and do logarithmic search
                cur = fun(bestint)
                while (not np.isposinf(cur)) and fun(bestint-1)-fun(bestint-2) < -1e-8:
                    bestint = bestint*2
                    cur = fun(bestint)
#                    if bestint > self.m_lin_max:
#                        print('Warning: Reach linear upper bound: m_lin_max.')
#                        return cur

                results = minimize_scalar(fun, method='Bounded', bounds=[self.m-1, bestint + 2],
                                          options={'disp': False})
                if results.success:
                    return results.fun
                else:
                    return None
                #return fun(bestint)

            if bestint == 0:
                if self.verbose:
                    print('Warning: Smallest alpha = 1.')

            # find the best integer alpha.
            bestalpha = self.alphas[bestint]

            results = minimize_scalar(fun,  method='Bounded',bounds=[bestalpha-1, bestalpha+1],
                                      options={'disp':False})
            # the while loop above ensures that bestint+2 is at most m, and also bestint is at least 0.
            if results.success:
                return results.fun
            else:
                # There are cases when certain \delta is not feasible.
                # For example, let p and q be uniform the privacy R.V. is either 0 or \infty and unless all \infty
                # events are taken cared of by \delta, \epsilon cannot be < \infty
                return -1
Exemple #2
0
    def compose_subsampled_mechanism(self, func, prob, coeff=1.0):
        # This function is for subsample without replacements.
        self.flag = False
        self.flag_subsample = True
        if (func, prob) in self.idxhash:
            idx = self.idxhash[(func, prob)]
            # update the coefficients of each function
            self.coeffs[idx] += coeff
            # also update the integer CGFs
            self.RDPs_int += self.cache[(func, prob)] * coeff
        else:

            def cgf(x):
                return x * func(x+1)
            # we need forward differences of thpe exp(cgf)
            # The following line is the numericall y stable way of implementing it.
            # The output is in polar form with logarithmic magnitude
            deltas, signs_deltas = utils.get_forward_diffs(cgf,self.m)

            #deltas1, signs_deltas1 = get_forward_diffs_direct(func, self.m)

            #tmp = deltas-deltas1

            self.deltas_cache[(func,prob)] = [deltas,signs_deltas]

            def subsample_func_int(x):
                # This function evaluates teh CGF at alpha = x, i.e., lamb =  x- 1
                deltas_local, signs_deltas_local = self.deltas_cache[(func,prob)]
                if np.isinf(func(x)):
                    return np.inf

                mm = int(x)

                fastupperbound = fast_subsampled_cgf_upperbound(func, mm, prob, deltas_local)

                if mm <= self.alphas[-1]: # compute the bound exactly. Requires book keeping of O(x^2)

                    moments = [ np.minimum(np.minimum((j)*np.log(np.exp(func(np.inf))-1) + np.minimum(cgf(j-1),np.log(4)),
                                                      np.log(2) + cgf(j-1)),
                                           np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1]
                                           + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1]) + j*np.log(prob)
                                +self.logBinomC[int(mm), j] for j in range(2,int(mm+1),1)]

                    return np.minimum(fastupperbound, utils.stable_logsumexp([0]+moments))
                elif mm <= self.m_lin_max:  # compute the bound with stirling approximation. Everything is O(x) now.
                    moment_bound = lambda j: np.minimum(j * np.log(np.exp(func(np.inf)) - 1)
                                                        + np.minimum(cgf(j - 1), np.log(4)), np.log(2)
                                                        + cgf(j - 1)) + j * np.log(prob) + utils.logcomb(mm, j)
                    moments = [moment_bound(j) for j in range(2,mm+1,1)]
                    return np.minimum(fastupperbound, utils.stable_logsumexp([0]+ moments))
                else: # Compute the O(1) upper bound
                    return fastupperbound



            def subsample_func(x):
                # This function returns the RDP at alpha = x
                # RDP with the linear interpolation upper bound of the CGF

                epsinf, tmp = subsample_epsdelta(func(np.inf),0,prob)

                if np.isinf(x):
                    return epsinf
                if prob == 1.0:
                    return func(x)

                if (x >= 1.0) and (x <= 2.0):
                    return np.minimum(epsinf, subsample_func_int(2.0) / (2.0-1))
                if np.equal(np.mod(x, 1), 0):
                    return np.minimum(epsinf, subsample_func_int(x) / (x-1) )
                xc = math.ceil(x)
                xf = math.floor(x)
                return np.minimum(
                    epsinf,
                    ((x-xf)*subsample_func_int(xc) + (1-(x-xf))*subsample_func_int(xf)) / (x-1)
                )


            # book keeping
            self.idxhash[(func, prob)] = self.n # save the index
            self.n += 1 # increment the number of unique mechanisms
            self.coeffs.append(coeff) # Update the coefficient
            self.RDPs.append(subsample_func) # update the analytical functions

            # also update the integer results up to m_max.
            if (func,prob) in self.cache:
                results = self.cache[(func,prob)]
            else:
                results = np.zeros_like(self.RDPs_int, float)
                # m = np.max(self.lambs)
                mm = np.max(self.alphas)
                for alpha in range(2, mm+1):
                    results[alpha-1] = subsample_func(alpha)
                results[0] = results[1] # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy.
                self.cache[(func,prob)] = results # save in cache

            self.RDPs_int += results * coeff
        # update the pure DP
        eps, delta = subsample_epsdelta(func(np.inf), 0, prob)
        self.RDP_inf += eps * coeff
Exemple #3
0
    def compose_subsampled_mechanism(self,
                                     func,
                                     prob,
                                     coeff=1.0,
                                     improved_bound_flag=False):
        """
            # This function is for subsample without replacements
        :param func:  RDP function of the mechanism before amplification by sampling
        :param prob:  proportion of the data to sample
        :param coeff: number of times the subsampled mechanism is being composed.
        :param improved_bound_flag:
            - If True, then it uses Theorem 27 of https://arxiv.org/pdf/1808.00087.pdf
            - If False (default value), it uses Theorem 9 of https://arxiv.org/pdf/1808.00087.pdf
            To qualify for the improved bound, the mechanism needs to have a pair of neighboring
            datasets that is worst for all Renyi-divergence and Pearson-Vajda divergence;
            Also, the RDP bound needs to be tight (see Definition 26 from the same paper).
            Gaussian mechanism, Laplace mechanism and many others satisfy this condition.

        :return:  nothing  (updates to the RDP accountant's attribute)
        """

        # (find a random subset of proportion prob)
        self.flag = False
        self.flag_subsample = True
        if (func, prob) in self.idxhash:
            idx = self.idxhash[(func, prob)]
            # update the coefficients of each function
            self.coeffs[idx] += coeff
            # also update the integer CGFs
            self.RDPs_int += self.cache[(func, prob)] * coeff
        else:

            def cgf(x):
                return x * func(x + 1)

            if not improved_bound_flag:

                def subsample_func_int(x):
                    # output the cgf of the subsampled mechanism
                    mm = int(x)
                    eps_inf = func(np.inf)

                    moments_two = 2 * np.log(prob) + utils.logcomb(mm,2) \
                                  + np.minimum(np.log(4) + func(2.0) + np.log(1-np.exp(-func(2.0))),
                                               func(2.0) + np.minimum(np.log(2),
                                                            2 * (eps_inf+np.log(1-np.exp(-eps_inf)))))
                    moment_bound = lambda j: np.minimum(j * (eps_inf + np.log(1-np.exp(-eps_inf))),
                                                        np.log(2)) + cgf(j - 1) \
                                             + j * np.log(prob) + utils.logcomb(mm, j)
                    moments = [moment_bound(j) for j in range(3, mm + 1, 1)]
                    return np.minimum(
                        (x - 1) * func(x),
                        utils.stable_logsumexp([0, moments_two] + moments))
            else:
                # we need forward differences of exp(cgf)
                # The following line is the numerically stable way of implementing it.
                # The output is in polar form with logarithmic magnitude
                deltas, signs_deltas = utils.get_forward_diffs(cgf, self.m)

                #deltas1, signs_deltas1 = get_forward_diffs_direct(func, self.m)

                #tmp = deltas-deltas1

                self.deltas_cache[(func, prob)] = [deltas, signs_deltas]

                def subsample_func_int(x):
                    # This function evaluates teh CGF at alpha = x, i.e., lamb =  x- 1
                    deltas_local, signs_deltas_local = self.deltas_cache[(
                        func, prob)]
                    if np.isinf(func(x)):
                        return np.inf

                    mm = int(x)
                    eps_inf = func(np.inf)

                    moments_two = 2 * np.log(prob) + utils.logcomb(mm, 2) \
                                  + np.minimum(
                        np.log(4) + func(2.0) + np.log(1 - np.exp(-func(2.0))),
                        func(2.0) + np.minimum(np.log(2),
                                               2 * (eps_inf + np.log(1 - np.exp(-eps_inf)))))

                    moment_bound = lambda j: np.minimum(np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1]
                                                        + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1],
                                                        np.minimum(j * (eps_inf + np.log(1 - np.exp(-eps_inf))),
                                                                   np.log(2))
                                                        + cgf(j - 1)) \
                                             + j * np.log(prob) + utils.logcomb(mm, j)

                    moment_bound_linear = lambda j: np.minimum(j * (eps_inf + np.log(1-np.exp(-eps_inf))),
                                                        np.log(2)) + cgf(j - 1) \
                                             + j * np.log(prob) + utils.logcomb(mm, j)

                    fastupperbound = fast_subsampled_cgf_upperbound(
                        func, mm, prob, deltas_local)

                    if mm <= self.alphas[
                            -1]:  # compute the bound exactly. Requires book keeping of O(x^2)
                        #
                        # moments = [ np.minimum(np.minimum((j)*np.log(np.exp(func(np.inf))-1) + np.minimum(cgf(j-1),np.log(4)),
                        #                                   np.log(2) + cgf(j-1)),
                        #                        np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1]
                        #                        + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1]) + j*np.log(prob)
                        #             +self.logBinomC[int(mm), j] for j in range(2,int(mm+1),1)]
                        moments = [
                            moment_bound(j) for j in range(3, mm + 1, 1)
                        ]

                        return np.minimum(
                            fastupperbound,
                            utils.stable_logsumexp([0, moments_two] + moments))
                    elif mm <= self.m_lin_max:  # compute the bound with stirling approximation. Everything is O(x) now.
                        # moment_bound = lambda j: np.minimum(j * np.log(np.exp(func(np.inf)) - 1)
                        #                                     + np.minimum(cgf(j - 1), np.log(4)), np.log(2)
                        #                                     + cgf(j - 1)) + j * np.log(prob) + utils.logcomb(mm, j)
                        # moments = [moment_bound(j) for j in range(2,mm+1,1)]

                        moments = [
                            moment_bound_linear(j)
                            for j in range(3, mm + 1, 1)
                        ]

                        return np.minimum(
                            fastupperbound,
                            utils.stable_logsumexp([0, moments_two] + moments))
                    else:  # Compute the O(1) upper bound
                        return fastupperbound

            def subsample_func(x):
                # This function returns the RDP at alpha = x
                # RDP with the linear interpolation upper bound of the CGF

                epsinf, tmp = subsample_epsdelta(func(np.inf), 0, prob)

                if np.isinf(x):
                    return epsinf
                if prob == 1.0:
                    return func(x)

                if (x >= 1.0) and (x <= 2.0):
                    return np.minimum(epsinf,
                                      subsample_func_int(2.0) / (2.0 - 1))
                if np.equal(np.mod(x, 1), 0):
                    return np.minimum(epsinf, subsample_func_int(x) / (x - 1))
                xc = math.ceil(x)
                xf = math.floor(x)
                return np.min([
                    epsinf,
                    func(x),
                    ((x - xf) * subsample_func_int(xc) +
                     (1 - (x - xf)) * subsample_func_int(xf)) / (x - 1)
                ])

            # book keeping
            self.idxhash[(func, prob)] = self.n  # save the index
            self.n += 1  # increment the number of unique mechanisms
            self.coeffs.append(coeff)  # Update the coefficient
            self.RDPs.append(subsample_func)  # update the analytical functions

            # also update the integer results up to m_max.
            if (func, prob) in self.cache:
                results = self.cache[(func, prob)]
            else:
                results = np.zeros_like(self.RDPs_int, float)
                # m = np.max(self.lambs)
                mm = np.max(self.alphas)
                for alpha in range(2, mm + 1):
                    results[alpha - 1] = subsample_func(alpha)
                results[0] = results[
                    1]  # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy.
                self.cache[(func, prob)] = results  # save in cache

            self.RDPs_int += results * coeff
        # update the pure DP
        eps, delta = subsample_epsdelta(func(np.inf), 0, prob)
        self.RDP_inf += eps * coeff