Esempio n. 1
0
def check(samples_size, alpha, beta, eps, delta, parameter):
    data = [int(i) for i in normal(0, parameter, samples_size)]
    m = min(data)
    data = [i - m for i in data]
    max_sample = max(data)
    dim = ceil(log2(max_sample + 1))
    end_domain = 2**int(dim)
    try:
        san = sanitize(data, (0, end_domain), alpha, beta, eps, delta)
        result = cdf_comp(san, data)
        if result == 0:
            plot_san_and_original(data, san)
    except ValueError:
        result = -1
    return result
def check(samples_size, alpha, beta, eps, delta, parameter):
    data = [int(i) for i in normal(0, parameter, samples_size)]
    m = min(data)
    data = [i-m for i in data]
    max_sample = max(data)
    dim = ceil(log2(max_sample + 1))
    end_domain = 2**int(dim)
    try:
        san = sanitize(data, (0, end_domain), alpha, beta, eps, delta)
        result = cdf_comp(san, data)
        if result == 0:
            plot_san_and_original(data, san)
    except ValueError:
        result = -1
    return result
Esempio n. 3
0
    f2 = ECDF(data2)
    return 1-sum(1 for c in xrange(m) if abs(f1(c) - f2(c)) <= alpha)/m


a, b, e, d = 0.1, 0.1, 0.5, 2**-20
b *= a / 231
samples_no = 5000
parameter = 5
data = [int(i) for i in normal(0, parameter, samples_no)]
m = min(data)
data = [i-m for i in data]
print len(data)
max_sample = max(data)
print max_sample
dim = ceil(log2(max_sample + 1))
print dim
end_domain = 2**int(dim)
start_time = time.time()
san = sanitize(data, (0, end_domain), a, b, e, d)
run_time = time.time() - start_time
print max(san)
print cdf_comp(san, data, a)
sorted_san = sorted(san)
i_max_san = searchsorted(sorted_san, max_sample)
limited_san = sorted_san[:i_max_san]
plot_cdf(data)
plot_cdf(limited_san)
plt.show()

print "run-time: %.2f seconds" % run_time