Beispiel #1
0
def numeric_sparse(data, query_list, T, c, epsilon, delta):
    if delta == 0:
        epsilon_1 = 8 * epsilon / 9
        epsilon_2 = 2 * epsilon / 9

        def sigma(eps):
            return 2 * c / eps
    else:
        epsilon_1 = np.sqrt(512) * epsilon / (np.sqrt(512) + 1)
        epsilon_2 = 2 * epsilon / (np.sqrt(512) + 1)

        def sigma(eps):
            return np.sqrt(32 * c * np.log(2 / delta)) / eps

    T_hat_count = T + random.laplace(scale=sigma(epsilon_1))
    count = 0
    answer_list = []
    for query in query_list:
        v_query_1 = random.laplace(scale=2 * sigma(epsilon_1))
        query_response = data.evaluate(query)
        if query_response + v_query_1 >= T_hat_count:
            v_query_2 = random.laplace(scale=sigma(epsilon_2))
            answer_list.append(query_response + v_query_2)
            count = count + 1
            T_hat_count = T + random.laplace(scale=sigma(epsilon_1))
        else:
            answer_list.append(STOP_SIGN)
        if count >= c:
            return answer_list
    return answer_list
Beispiel #2
0
def find(data, domain, goal_number, failure, eps, sparse=True):
    """
    Based on "Locating a Small Cluster Privately" by Kobbi Nissim, Uri Stemmer, and Salil Vadhan. PODS 2016.
    Given a data set, finds the radius of an approximately minimal cluster of points with
    approximately the desired amount of points
    :param data: list of points in R^dimension
    :param domain: tuple(absolute value of domain's end as int, minimum intervals in domain as float)
    :param goal_number: the number of desired points in the resulting cluster
    :param failure: 0 < float < 1. chances that the procedure will fail to return an answer
    :param eps: float > 0. privacy parameter
    :param sparse: 1 > float > 0. privacy parameter
    :return: the radius of the resulting cluster
    """
    # max(abs(np.min(data)), np.max(data))
    all_distances = distances(data)
    # TODO change variable name
    # 'a' need to greater than - log(domain[0] / failure) / eps
    a = 2 * log(domain[0] / failure) / eps
    thresh = goal_number - a - log(1 / failure) / eps
    # TODO verify that the noise addition is correct
    if __max_average_ball__(0, all_distances, goal_number) + laplace(0, 1 / eps, 1) > thresh:
        return 0

    dimension = data.shape[1]
    # TODO maybe a little less sparse?
    if sparse:
        new_domain = __sparse_domain__(domain, dimension)
    else:
        new_domain = __create_regular_domain__(domain, dimension)

    def quality(d, r):
        return min(goal_number - __max_average_ball__(r / 2, all_distances, goal_number),
                   __max_average_ball__(r, all_distances, goal_number) - goal_number + 2*a) / 2

    return exponential_mechanism_big(data, new_domain, quality, eps / 2)
Beispiel #3
0
def sanitize(samples, alpha, beta, eps, delta):
    """

    :param samples:
    :param alpha:
    :param beta:
    :param eps:
    :param delta:
    :return:
    """
    max_sample = max(samples)
    dim = ceil(log2(max_sample + 1))
    end_domain = 2**int(dim)
    remaining_samples = set(range(end_domain))
    est = dict.fromkeys(remaining_samples, 0)
    new_beta = alpha * beta / 4
    new_eps = eps / sqrt(32 * log(5/delta) / alpha)
    new_delta = alpha * delta / 5
    for i in range(int(2/alpha)):
        q = __point_choice_quality__(remaining_samples)
        b = choosing_mechanism_big(samples, remaining_samples, q, 1, alpha/2, new_beta, new_eps, new_delta)
        if b != 'bottom':
            remaining_samples.remove(b)
            # remaining_samples[b] -= 1
            # remaining_samples += Counter()
            est[b] = concept_query(samples, point_concept(b)) + laplace(0, 1 / eps / len(samples), 1)[0]
    return est
Beispiel #4
0
def sanitize(samples, alpha, beta, eps, delta):
    """

    :param samples:
    :param alpha:
    :param beta:
    :param eps:
    :param delta:
    :return:
    """
    max_sample = max(samples)
    dim = ceil(log2(max_sample + 1))
    end_domain = 2 ** int(dim)
    remaining_samples = set(range(end_domain))
    est = dict.fromkeys(remaining_samples, 0)
    new_beta = alpha * beta / 4
    new_eps = eps / sqrt(32 * log(5 / delta) / alpha)
    new_delta = alpha * delta / 5
    for i in range(int(2 / alpha)):
        q = __point_choice_quality__(remaining_samples)
        b = choosing_mechanism_big(samples, remaining_samples, q, 1, alpha / 2, new_beta, new_eps, new_delta)
        if b != "bottom":
            remaining_samples.remove(b)
            # remaining_samples[b] -= 1
            # remaining_samples += Counter()
            est[b] = concept_query(samples, point_concept(b)) + laplace(0, 1 / eps / len(samples), 1)[0]
    return est
Beispiel #5
0
def get_noisy_distribution_of_attributes(attributes,
                                         encoded_dataset,
                                         epsilon=0.1):
    data = encoded_dataset.copy().loc[:, attributes]
    data['count'] = 1
    stats = data.groupby(attributes).sum()

    iterables = [
        range(int(encoded_dataset[attr].max()) + 1) for attr in attributes
    ]
    full_space = DataFrame(columns=attributes, data=list(product(*iterables)))
    stats.reset_index(inplace=True)
    stats = merge(full_space, stats, how='left')
    stats.fillna(0, inplace=True)

    if epsilon:
        k = len(attributes) - 1
        num_tuples, num_attributes = encoded_dataset.shape
        noise_para = laplace_noise_parameter(k, num_attributes, num_tuples,
                                             epsilon)
        laplace_noises = laplace(0, scale=noise_para, size=stats.index.size)
        stats['count'] += laplace_noises
        stats.loc[stats['count'] < 0, 'count'] = 0

    return stats
    def _get_attribute_frequency_counts(self, attributes, encoded_dataset):
        """ Differentially private mechanism to get attribute frequency counts"""
        # Get attribute counts for category combinations present in data
        counts = encoded_dataset.groupby(attributes).size()
        counts.name = 'count'
        counts = counts.reset_index()

        # Get all possible attribute combinations
        attr_combs = [
            range(self.DataDescriber.attr_dict[attr].domain_size)
            for attr in attributes
        ]
        full_space = DataFrame(columns=attributes,
                               data=list(product(*attr_combs)))
        full_counts = merge(full_space, counts, how='left')
        full_counts.fillna(0, inplace=True)

        # Get Laplace noise sample
        noise_sample = laplace(0,
                               scale=self.laplace_noise_scale,
                               size=full_counts.index.size)
        full_counts['count'] += noise_sample
        full_counts.loc[full_counts['count'] < 0, 'count'] = 0

        return full_counts
Beispiel #7
0
def noise_iid(size, noise_scale, noise_tail="normal", **kwargs):
    if noise_tail == "normal":
        return npr.normal(0, 1, size=size) * noise_scale
    elif noise_tail == "laplace":
        return npr.laplace(0, 1, size=size) * noise_scale
    elif noise_tail == "cauchy":
        return npr.standard_cauchy(size=size) * noise_scale
Beispiel #8
0
 def randomize(self, oh_data, private=True):
   """ Add Laplace noise. """
   for row in oh_data:
     for i in range(len(row)):
       b = laplace(scale=1.0/self.epsilon) if private else 0
       row[i] += b
   return oh_data
Beispiel #9
0
def histograms(data, dimension, shift, side, eps, delta):
    """
    Based on Theorem 2.5 from "Locating a Small Cluster Privately"
    by Kobbi Nissim, Uri Stemmer, and Salil Vadhan. PODS 2016.
    find parts of R^d that contain a lot of data-points
    when partitioning R^d into boxes of the same size
    the boxes partitioning is given by the shift and the size of the 'boxes'
    :param data: list of points in R^dimension
    :param dimension: the dimension of the space which the points are taken from
    :param shift: the partition's shift, the i-th value represents the shift in the i-th axis
    :param side: the side-length of each 'box' in the partition
    :param eps: privacy parameter
    :param delta: privacy parameter
    :return: parts of the partition that contain a lot of data-points
    """
    my_box = partial(__box_containing_point__, partition=shift, dimension=dimension, side_length=side)
    # those are the parts of the partition that have at least one point
    #  when each box appears as many times as the numbers of points in it
    boxes = [my_box(point) for point in data]
    boxes_quality = Counter(boxes)
    non_zero = False
    for b in boxes_quality:
        boxes_quality[b] += laplace(0, 2/eps, 1)[0]
        if boxes_quality[b] < 2*np.log(2/delta)/eps:
            boxes_quality[b] = 0
        # the current boxes_quality won't be '0' so the process can return an answer
        elif not non_zero:
            non_zero = True
    if not non_zero:
        raise ValueError('No high quality box')
    return max(boxes_quality)
Beispiel #10
0
def variance_operation(elist, elist_sq, auths, dp):
    G = EcGroup(nid=conf.EC_GROUP)

    #E(x^2) = (S(ri^2)/N)
    plain_sum_sqs = list_sum_decryption(elist_sq, auths)
    #print "plain_sum_sqs: " + str(plain_sum_sqs)
    first = plain_sum_sqs/len(elist)
    
    #E(x)^2 = (S(ri)/N)^2
    esum = Classes.Ct.sum(elist)
    plain_sum = collective_decryption(esum, auths)
    tmp = float(plain_sum) / float(len(elist))
    second = tmp * tmp
    #print "second: " + str(second)

    variance = first - second
    
    if (conf.DP and dp):
        #deltaf = 1/relays * 80 (80 is an arbitrary value for our.)
        d = float(200)/float(len(elist))
        
        from numpy.random import laplace
        scale = float(d) / conf.EPSILON
        noise = laplace(0, scale)
        print "noise " + str(noise)
        noisy_var = variance + noise
        
    else:
        noisy_var=variance
    
    
    
    return str(noisy_var)
Beispiel #11
0
def median_operation(sk_sum, auths, dp):
    
    from numpy.random import laplace   
    
    proto = Classes.get_median(sk_sum, min_b = 0, max_b = 1000, steps = 20) #Compute Median
    plain = None
    total_noise = 0
    while True:
        v = proto.send(plain)
        if isinstance(v, int):
            break
        
        plain = collective_decryption(v, auths)
        
        if (conf.DP and dp):
            #print "sksum:" + str(sk_sum.epsilon)
            if sk_sum.epsilon != 0 and sk_sum.delta != 0:
                noise = 0
                #print conf.DP and dp
                scale = float(sk_sum.d) / float(sk_sum.epsilon)
                #print sk_sum.d
                #print sk_sum.epsilon
                noise = int(round(laplace(0, scale)))
                #print "noise: " + str(noise)
                plain += noise
                total_noise += noise
            
        #print "*: " + str(plain)

    #print "Estimated median: " + str(v)
    #print "Total Noise Added: " + str(total_noise)
    return str(v)
Beispiel #12
0
def slaplace(nSources, nSamples):
    '''
    Generates an nSources x nSamples array of sources which are Laplace distributed.

        p(x) = (1/sqrt(2))*exp(-sqrt(2)*x)
    '''
    s = laplace(size=(nSources, nSamples))
    return s / s.std(axis=1)[:, newaxis]
Beispiel #13
0
def slaplace(nSources,nSamples):
    '''
    Generates an nSources x nSamples array of sources which are Laplace distributed.

        p(x) = (1/sqrt(2))*exp(-sqrt(2)*x)
    '''
    s = laplace(size=(nSources,nSamples))
    return s/s.std(axis=1)[:,newaxis]
Beispiel #14
0
def get_noise(yDim, xDim, white_noise_func_val):
    if white_noise_func_val == "Uniform":
        return rnd.random(size=(yDim, xDim))
    elif white_noise_func_val == "Beta":
        return rnd.beta(2, 2, size=(yDim, xDim))
    elif white_noise_func_val == "Normal":
        return rnd.normal(size=(yDim, xDim))
    elif white_noise_func_val == "Exponential":
        return rnd.exponential(size=(yDim, xDim))
    else:
        return rnd.laplace(size=(yDim, xDim))
Beispiel #15
0
def get_cropping_params(bb_loc_laplace_b_param=0.2,
                        bb_size_laplace_b_param=0.06,
                        bbox_shrinkage_limit=0.6,
                        bbox_expansion_limit=1.4):
    '''Get random draws for parameters involved in random crops. Output those params

    Args:
    ----
        bb_loc_laplace_b_param: Scale parameter in the laplace distribution that
            determines where to center the hypothetical bounding box used to
            create crop. Applied for draws of both x and y shifts.
        bb_size_laplace_b_param" Scale parameter in the laplace distribution that
            determines the size of the hypothetical bounding box used to create
            crop. Applied for draws of both x and y scaling.
        bbox_shrinkage_limit: The minimum size in each dimension of hypothetical
            bounding box for cropping, as a fraction of previous bounding box
            size in that dimension
        bbox_expansion_limit: Maximum size in each dimension of hypothetical bounding
            box for cropping, as a fraction of previous bounding box size in that
            dimension.

    Output:
    ------
        x_center_shift: fraction of box width to shift center of bbox for next crop
        y_center_shift: fraction of box height to shift center of bbox for next crop
        x_size_shift: bbox width used to make crop, as frac of previous bbox width
        y_size_shift: bbox height used to make crop, as frac of previous bbox height
    '''

    x_center_shift = laplace(scale=bb_loc_laplace_b_param)  #delta_x in paper
    y_center_shift = laplace(scale=bb_loc_laplace_b_param)  #delta_y in paper
    x_size_shift = (1 - laplace(scale=bb_size_laplace_b_param))
    x_size_shift = np.clip(x_size_shift, bbox_shrinkage_limit,
                           bbox_expansion_limit)
    y_size_shift = (1 - laplace(scale=bb_size_laplace_b_param))
    y_size_shift = np.clip(y_size_shift, bbox_shrinkage_limit,
                           bbox_expansion_limit)
    return x_center_shift, y_center_shift, x_size_shift, y_size_shift
def compute_average_BF(n, m, nsim, Xmean, Xstd, Ymean, Ystd, Ydist='Normal',
                       thetaval = np.linspace(0.001, 60, 100), rdseed = 12231):
    if thetaval is None:
        ntheta = 1
    else:
        ntheta = np.shape(thetaval)[0]
    BFmat = np.zeros((nsim, ntheta))
    ProbM1mat = np.zeros((nsim, ntheta))

    for ll in range(nsim):
        np.random.seed(rdseed)
        X = np.reshape(normal(Xmean, Xstd, n), (n, 1))
        Zx = normal(Xmean, Xstd, int(m / 2))
        if Ydist == 'Normal':
            Y = np.reshape(normal(Ymean, Ystd, n), (n, 1))
            Zy = normal(Ymean, Ystd, int(m / 2))
        elif Ydist == 'Laplace':
            Y = np.reshape(laplace(Ymean, Ystd, n), (n, 1))
            Zy = laplace(Ymean, Ystd, int(m / 2))
        else:
            raise NotImplementedError

        Z = np.reshape(np.concatenate((Zx, Zy)), (m, 1))
        if thetaval is None:
            K = GaussianKernel()
            XY = np.reshape(np.concatenate((X, Y)), (2 * n, 1))
            median_heuristic_theta = K.get_sigma_median_heuristic(XY)
            BF_val, prob_M1_val = compute_ProbM1(X, Y, Z, np.array([median_heuristic_theta]), Independent=True)
        else:
            BF_val, prob_M1_val = compute_ProbM1(X, Y, Z, thetaval, Independent=True)
            median_heuristic_theta = None

        BFmat[ll, :] = BF_val.reshape(-1)
        ProbM1mat[ll,:] = prob_M1_val.reshape(-1)
        rdseed += 1
    return BFmat, ProbM1mat, median_heuristic_theta
Beispiel #17
0
def varRange(n):
    for i in range(1000):
        fillTable(normal(0, 1, n))
    addRow("normal", n)
    for i in range(1000):
        fillTable(standard_cauchy(n))
    addRow("cauchy", n)
    for i in range(1000):
        fillTable(laplace(0, 2**(-0.5), n))
    addRow("laplace", n)
    for i in range(1000):
        fillTable(poisson(10, n))
    addRow("poisson", n)
    for i in range(1000):
        fillTable(uniform(-1 * (3**0.5), 3**0.5, n))
    addRow("uniform", n)
Beispiel #18
0
def find(data, goal_number, failure, eps, delta, promise=-1):
    # TODO docstring
    """

    :param data:
    :param goal_number:
    :param failure:
    :param eps:
    :param delta:
    :param promise:
    :return:
    """
    domain = abs(max(np.max(data, axis=0)) - min(np.min(data, axis=0)))
    if promise == -1:
        promise = __promise__(data, domain, eps, delta, failure)
    all_distances = distances(data)
    if __max_average_ball__(0, all_distances, goal_number) + laplace(0, 4/eps, 1) >\
                            goal_number - 2*promise - 4/eps*log(2/failure):
        return 0

    extended_domain = 2**int(ceil(log2(domain)))
    max_averages_by_radius = [
        __max_average_ball__(r, all_distances, goal_number)
        for r in arange(0, extended_domain, 0.5)
    ]

    def quality(d, r):
        try:
            return min(
                goal_number - max_averages_by_radius[r],
                max_averages_by_radius[2 * r] - goal_number + 4 * promise) / 2
        except IndexError:
            raise IndexError('error while trying to qualify %f' % r)

    # TODO must complete those two
    def radius_interval_bounding(data_set, domain_end, j):
        return max(
            min(quality(data_set, i) for i in xrange(a, a + 2**j))
            for a in xrange(domain_end - 2**j))

    def max_radius_in_interval(data_set, i):
        return max(quality(data_set, r) for r in i)

    return evaluate(data, domain, quality, promise, 0.5, eps, delta,
                    radius_interval_bounding, max_radius_in_interval)
Beispiel #19
0
 def sub_sampling(point, num_of_samples, l_b=-1, u_b=1, scale=0.3):
     '''
     :param point: one point
     :param number_of_samples: # of sample generated
     :param l_b:  low bound of data
     :param u_b: upper bound of data
     :param scale: 1/2lambda*exp(-|x-\mu|/lambda)
     :return:
     '''
     point_dimension = len(point)
     data = np.zeros((num_of_samples, point_dimension))
     for i in range(num_of_samples):
         within_range = False
         while not within_range:
             cur_sample = laplace(point, scale*np.ones((point_dimension,)))
             if all([e <= u_b and e >= l_b for e in cur_sample]):
                 within_range = True
         data[i,:] = cur_sample
     return data
def add_laplacian_noise(image, loc=0.0, scale=1.0):
    """Add Laplacian noise to image.
    It has probability density function
    f(x; mu, lambda) = 1 / (2 * lambda) * exp(-|x - mu| / lambda)

    Parameters
    ----------
    image : numpy 2D array
        Input image
    loc : float
        The position, mu, of the distribution peak
    scale: float
        lambda, the exponential decay

    Returns
    -------
    numpy 2D array
        Image with Laplacian noise
    """
    noise = laplace(loc, scale, image.shape)
    return array(clip(image + noise, 0, 255), dtype=int)
def test_laplaces():
    # laplace seems to work
    dist_list = []
    param_list = []
    myvar1 = 1
    H = RA.uniform(-10, 10, (m, n))  # H is observed
    G = H + RA.laplace(0, myvar1, (m, n))
    for j in range(n):
        dist_list.append('laplace')
        param_list.append([myvar1])
    y = G @ x + RA.randn(m)
    myvar2 = 1
    dist_list.append('laplace')
    param_list.append([myvar2])
    lf = loglikelihood(H, y, dist_list, param_list)
    f, g = lf.func_and_grad(x)
    f1, _ = lf.func_and_grad(x + eps * e1)
    f2, _ = lf.func_and_grad(x + eps * e2)
    approx_grad = (1 / eps) * np.array([f1 - f, f2 - f])
    print(g[0])
    print(approx_grad)
Beispiel #22
0
def sample_and_encode(X, scale=1.):
    #X = diabetes.data

    #truth = numpy.zeros_like(X[0])
    #y = diabetes.target
    truth = rng.laplace(0, scale, size=X.shape[1])
    y = numpy.dot(X, truth)

    yc = y - y.mean()

    print
    print ' T |', '%6.3f |' % abs(truth).sum(),
    print' '.join('%6.3f' % b for b in truth)

    guess = numpy.zeros_like(yc)
    for i, (guess, beta) in enumerate(lmj.lars.lars(yc, X)):
        print '%2d |' % i, '%6.3f |' % abs(beta).sum(),
        print ' '.join('%6.3f' % b for b in beta),
        print '|  yerr %.5f' % numpy.linalg.norm(yc - guess),
        print '  xerr %.5f' % numpy.linalg.norm(truth - beta)
    return yc, guess
Beispiel #23
0
def mean_operation(elist, auths, dp):
    
    G = EcGroup(nid=conf.EC_GROUP)
    esum = Classes.Ct.sum(elist)
    plain_sum = collective_decryption(esum, auths)
    
    mean = float(plain_sum)/float(len(elist))

    if (conf.DP and dp):
        #deltaf = 1/relays * 80 (80 is an arbitrary value for our data.)
        d = float(100)/float(len(elist))
        
        from numpy.random import laplace
        scale = float(d) / conf.EPSILON
        noise = laplace(0, scale)
        noisy_mean = mean + noise
        #print "noise " + str(noise)
    else:
        noisy_mean=mean
    
    return str(noisy_mean)
Beispiel #24
0
def laplace_mechanism(x: Union[int, float, ndarray], sensitivity: float,
                      privacy_budget: PrivacyBudget) -> Union[float, ndarray]:
    """Differentially private Laplace mechanism. Add Laplacian noise to the value:

    .. math::
            x + Laplace\left(\mu=0, \sigma=\\frac{\Delta f}{\epsilon}\\right)

    The result guarantees :math:`(\epsilon,0)`-differential privacy.

    :param x: Sensitive input data
    :param sensitivity: The global L1-sensitivity :math:`\Delta f` of `x`
    :param privacy_budget: The privacy budget :math:`(\epsilon,0)` used for the outputs
    :return: Input data protected by noise
    """
    check_positive(privacy_budget.epsilon)
    check_positive(sensitivity)

    shape = None if isinstance(x, (int, float)) else x.shape
    noise = laplace(loc=0.,
                    scale=sensitivity / privacy_budget.epsilon,
                    size=shape)
    return x + noise
Beispiel #25
0
def generate_data(D_x, D_y, N_int, N_ext, beta=[], sigma=0.1, DP_eps=1.0):
    if len(beta) == 0:
        beta = npr.randn(D_x, D_y)
    x_int = npr.randn(N_int, D_x)
    x_ext = npr.randn(N_ext, D_x)
    y_int = x_int.dot(beta) + sigma * npr.randn(N_int, D_y)
    y_ext = x_ext.dot(beta) + sigma * npr.randn(N_ext, D_y)
    xx_int = x_int.T.dot(x_int) / N_int
    xx_ext = x_ext.T.dot(x_ext) / N_ext
    xy_int = x_int.T.dot(y_int) / N_int
    xy_ext = x_ext.T.dot(y_ext) / N_ext
    DP_xx_scale = 1. / (DP_eps / 2.)
    DP_xy_scale = 2. * D_x**2 / (DP_eps / 2.)
    xx_ext_DP = xx_ext + sps.wishart.rvs(
        df=D_x + 1, scale=np.eye(D_x) * DP_xx_scale / N_ext)
    xy_ext_DP = xy_ext + npr.laplace(scale=DP_xy_scale / N_ext,
                                     size=(D_x, D_y))
    return ({
        'N_ext': N_ext,
        'N_int': N_int,
        'D_x': D_x,
        'D_y': D_y,
        'DP_xx_scale': DP_xx_scale,
        'DP_xy_scale': DP_xy_scale,
        'xx_int': xx_int,
        'xx_ext_DP': xx_ext_DP,
        'xy_int': xy_int,
        'xy_ext_DP': xy_ext_DP,
    }, {
        'x_int': x_int,
        'x_ext': x_ext,
        'y_int': y_int,
        'y_ext': y_ext,
        'xx_ext': xx_ext,
        'xy_ext': xy_ext,
        'beta': beta
    })
Beispiel #26
0
    def sample(self, size=None):
        """Samples from the distribution

        Keyword Arguments:
            size {int or tuple of ints} -- Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples are drawn.
            If size is None (default), a single value is returned if loc and scale are both scalars. Otherwise,
            np.broadcast(loc, scale).size samples are drawn. (default: {None})

        Returns:
            ndarray or scalar -- Drawn samples from the parameterized distribution.
        """

        if self.distribution_type == DistributionType.UNIFORM:
            return random.uniform(self.low, self.high, size)
        elif self.distribution_type == DistributionType.GAUSSIAN:
            return random.normal(self.average, self.standard_deviation, size)
        elif self.distribution_type == DistributionType.LAPLACIAN:
            return random.laplace(self.average, self.standard_deviation, size)
        elif self.distribution_type == DistributionType.EXPONENTIAL:
            return int(math.ceil(random.exponential(self.beta)))
        elif self.distribution_type == DistributionType.CONSTANT:
            return self.value
        else:
            raise NotImplementedError("Distribution type not yet implemented")
Beispiel #27
0
def thresholdout(train, holdout, threshold, tolerance):
    if np.abs(train - holdout) < threshold + laplace(scale=tolerance):
        return train
    else:
        return holdout + laplace(scale=tolerance)
Beispiel #28
0
def __rec_sanitize__(samples, domain_range, alpha, beta, eps, delta, dimension):
    # print domain_range
    # print calls
    global calls
    global san_data
    # step 1
    if calls == 0:
        return
    calls -= 1

    # step 2
    # the use of partial is redundant
    samples_domain_points = partial(points_in_subset, samples)
    noisy_points_in_range = samples_domain_points(subset=domain_range) + laplace(0, 1/eps, 1)
    sample_size = len(samples)

    # step 3
    if noisy_points_in_range < alpha*sample_size/8:
        base_range = domain_range
        san_data.extend(base_range[1] * noisy_points_in_range)
        return san_data

    # step 4
    domain_size = domain_range[1] - domain_range[0] + 1
    log_size = int(ceil(log(domain_size, 2)))
    # not needed
    # size_tag = 2**log_size

    # step 6

    def quality(data, j):
        return min(point_count_intervals_bounding(data, domain_range, j)-alpha * sample_size / 32,
                3 * alpha * sample_size / 32 - point_count_intervals_bounding(data, domain_range, j-1))

    # not needed if using exponential_mechanism
    # step 7
    # promise = alpha * sample_size / 32

    # step 8
    new_eps = eps/3/log_star(dimension)
    # new_delta = delta/3/log_star(dimension)
    # note the use of exponential_mechanism instead of rec_concave
    z_tag = exponential_mechanism(samples, range(log_size+1), quality, new_eps)
    z = 2 ** z_tag

    # step 9
    if z_tag == 0:
        point_counter = Counter(samples)

        def special_quality(data, b):
            return point_counter[b]

        b = choosing_mechanism(samples, range(domain_range[0], domain_range[1] + 1), special_quality,
                               1, alpha/64., beta, eps, delta)
        a = b
    # step 10
    else:
        first_intervals = __build_intervals_set__(samples, 2*z, domain_range[0], domain_range[1] + 1)
        second_intervals = __build_intervals_set__(samples, 2*z_tag, domain_range[0], domain_range[1] + 1, True)
        intervals = [(i, i+2*z-1) for i in first_intervals+second_intervals]
        a, b = choosing_mechanism(samples, intervals, points_in_subset, 2, alpha/64., beta, eps, delta)

    if type(a) == str:
        raise ValueError("stability problem - choosing_mechanism returned 'bottom'")

    # step 11
    # although not mentioned I assume the noisy value should be rounded
    noisy_count_ab = int(samples_domain_points((a, b)) + laplace(0, 1/eps, 1))
    san_data.extend([b] * noisy_count_ab)

    # step 12
    if a > domain_range[0]:
        rec_range = (domain_range[0], a - 1)
        __rec_sanitize__(samples, rec_range, alpha, beta, eps, delta, dimension)
    if b < domain_range[1]:
        rec_range = (b + 1, domain_range[1])
        __rec_sanitize__(samples, rec_range, alpha, beta, eps, delta, dimension)
    return san_data
 def laplace(self, mean, scale):
     '''Parameters:\n
     mean: float. scale: float, >=0.
     '''
     return r.laplace(mean, scale, self.size)
        return out
        
        



if __name__ == "__main__":
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D


    # Generate random samples for testing the algorithm
    Ndim = 3
    Nsamples = 1000


    # Generate random points . . . some Gausian some Laplacian 
    mean = np.array(Ndim * [5.0,])
    cov = sp.eye(Ndim) + 5.0

    samples = random.multivariate_normal(mean, cov, Nsamples)
    samples[:50] = random.laplace(5, scale =.0000001, size=(50, mean.size))


    # Make a 3D figure
    fig = plt.figure(0)
    fig.clear()
    ax = fig.add_subplot(1,1,1, projection='3d')
    ax.scatter(samples[:,0], samples[:,1], samples[:,2], marker='.')
Beispiel #31
0
def no_test_DP_median():
    d, w = 25, 7
    print("Sketch: d=%s w=%s (Cmp. size: %s%%)" % (d, w, (float(100*d*w)/1000)))

    # Setup the crypto
    G = EcGroup()
    sec = G.order().random()
    y = sec * G.generator()

    # Get some test data
    narrow_vals = 1000
    wide_vals = 200

    from numpy.random import laplace    
    from collections import defaultdict

    
    datapoints = defaultdict(list)

    eps = ["Inf", 1, 0.5, 0.1, 0.05, 0.01, 0.005, 0.001]
    for epsilon in eps:
        print(epsilon)

        for _ in range(40):
            vals = [gauss(300, 25) for _ in range (narrow_vals)]
            vals += [gauss(500, 200) for _ in range (wide_vals)]
            vals = sorted([int(v) for v in vals])

            median = vals[int(len(vals) / 2)]

            # Add each sample to the sketch
            cs = CountSketchCt.epsilondelta(0.05, 0.05, y) # CountSketchCt(d, w, y)
            for x in vals:
                cs.insert("%s" % x)

            # Now use test the median function
            proto = get_median(cs, min_b = 0, max_b = 1000, steps = 20)

            plain = None
            no_decryptions = 0
            while True:
                v = proto.send(plain)
                if isinstance(v, int):
                    break
                no_decryptions += 1

                noise = 0
                if isinstance(epsilon, float):
                    scale = float(d) / epsilon
                    noise = int(round(laplace(0, scale)))

                plain = v.dec(sec) + noise

            print("Estimated median: %s\t\tAbs. Err: %s" % (v, abs(v - median)))
            datapoints[epsilon] += [ abs(v - median) ] 

    import matplotlib.pyplot as plt
    from numpy import mean, std

    upper_err = []
    core_err = []
    lower_err = []

    for e in eps:
        samples = sorted(datapoints[e])
        core_err.append(mean(samples))
        upper_err.append(mean(samples) + std(samples) / (len(samples)**0.5))
        lower_err.append(mean(samples) - std(samples) / (len(samples)**0.5))


    eps_lab = range(len(eps))

    eps = ["Inf"] + [e * 10 for e in eps][1:]

    plt.plot(eps_lab, core_err, label="Absolute Error")
    plt.yscale('log')
    # v = v_issue / (len(cnt_issue)**0.5)
    plt.fill_between(x=eps_lab, y1=lower_err, y2=upper_err, alpha=0.2, color="b")
    plt.xticks(eps_lab, eps)

    plt.xlabel(r'Differential Privacy parameter (epsilon)')
    plt.ylabel('Absolute Error (mean & std. of mean)')
    plt.title(r'Median Estimation - Quality vs. Protection')
    # plt.axis([1, 10, 0, 1700])
    # plt.grid(True)

    plt.savefig("Quality.pdf")

    # plt.show()
    plt.close()
Beispiel #32
0
total = 0; # 这是一个全局变量
# 可写函数说明
def sum2( arg1, arg2 ):
    #返回2个参数的和."
    total = arg1 + arg2; # total在这里是局部变量.
    print ("函数内是局部变量 : ", total)
    return total

#调用sum函数
total = sum2( 10, 20 )
print ("函数外是全局变量 : ", total)

# global 和 nonlocal关键字可以改变作用域

print(nmr.laplace(1.1))

## python3 数据结构
#列表list可以修改,tuple和字符串不行
A = [12,3,45.6]
print(A.count('str'))
A.append(17.9)
print(A)
A.sort(reverse=False)
print(A)
A.sort(reverse=True)
print(A)
queue_me = deque(['1', 'aeee', 'dugu'])
print(queue_me.popleft())
queue_me.append('ssss')
queue_me.append('sad')
Beispiel #33
0
def Laplace_noise(privacy_bugdet, L1_sensitivity, dim):
    return rn.laplace(0, L1_sensitivity / np.float(privacy_bugdet), dim)
Beispiel #34
0
def gen_gaussian_linear_data(n=10,
                             d=100,
                             norm_beta=1,
                             beta=None,
                             var_eps=0.1,
                             s=None,
                             seed=1,
                             shift_type='None',
                             shift_val=0.1,
                             logistic=False):
    '''Generate data
    n : int
        number of samples
    d : int
        dimension
    norm_beta: float
        norm of beta
    var_eps: float
        variance of epsilon
        snr = norm_beta^2 / var_eps
    '''
    npr.seed(seed=seed)

    # x
    x = npr.randn(n, d)
    if 'shift' in shift_type:
        x += shift_val
    elif 'scale' in shift_type:
        S2 = np.cumsum(np.ones(d))
        S2 /= np.sum(S2)
        S2 = np.diag(np.sqrt(S2 * d))
        x = x @ S2
    elif 'spike' in shift_type:
        v = np.ones(d)
        v /= npl.norm(v)
        S = np.eye(d) + (np.sqrt(shift_val) - 1) * np.outer(v, v)
        x = x @ S
    elif shift_type == 'lap':
        x = npr.laplace(size=(n, d))

    # beta
    if s == None:
        s = d
    if beta is None:
        beta = np.zeros(d)
        beta[:s] = npr.randn(s)
        beta[:s] /= npl.norm(beta[:s])
        if norm_beta == 'd':
            norm_beta = d
        beta[:s] *= norm_beta

    var_mult = 0 if var_eps == 0 else np.sqrt(var_eps)
    eps = var_mult * npr.randn(n)
    y = x @ beta + eps

    if logistic:
        # pass through an inv-logit function
        pr = 1 / (1 + np.exp(-y))

        # binomial distr (bernoulli response var)
        # n trials, probability p
        z = np.random.uniform(size=n)  # random number 0-1
        y = (z <= pr).astype(np.int32)

    return x, y, beta
Beispiel #35
0
def thresholdout(train, holdout, threshold, tolerance):
    if np.abs(train - holdout) < threshold + laplace(scale=tolerance):
        return train
    else:
        return holdout + laplace(scale=tolerance)
Beispiel #36
0
def Lap(mu,lamb):
	return rand.laplace(mu,lamb);	
Beispiel #37
0
def slaplace(nSources, nSamples):
    """
    Generates an nSources x nSamples array of sources which are Laplace distributed.
    """
    s = laplace(size=(nSources, nSamples))
    return s / s.std(axis=1)[:, newaxis]