def check(samples_size, alpha, beta, eps, delta, parameter): data = [int(i) for i in normal(0, parameter, samples_size)] m = min(data) data = [i - m for i in data] max_sample = max(data) dim = ceil(log2(max_sample + 1)) end_domain = 2**int(dim) try: san = sanitize(data, (0, end_domain), alpha, beta, eps, delta) result = cdf_comp(san, data) if result == 0: plot_san_and_original(data, san) except ValueError: result = -1 return result
def check(samples_size, alpha, beta, eps, delta, parameter): data = [int(i) for i in normal(0, parameter, samples_size)] m = min(data) data = [i-m for i in data] max_sample = max(data) dim = ceil(log2(max_sample + 1)) end_domain = 2**int(dim) try: san = sanitize(data, (0, end_domain), alpha, beta, eps, delta) result = cdf_comp(san, data) if result == 0: plot_san_and_original(data, san) except ValueError: result = -1 return result
f2 = ECDF(data2) return 1-sum(1 for c in xrange(m) if abs(f1(c) - f2(c)) <= alpha)/m a, b, e, d = 0.1, 0.1, 0.5, 2**-20 b *= a / 231 samples_no = 5000 parameter = 5 data = [int(i) for i in normal(0, parameter, samples_no)] m = min(data) data = [i-m for i in data] print len(data) max_sample = max(data) print max_sample dim = ceil(log2(max_sample + 1)) print dim end_domain = 2**int(dim) start_time = time.time() san = sanitize(data, (0, end_domain), a, b, e, d) run_time = time.time() - start_time print max(san) print cdf_comp(san, data, a) sorted_san = sorted(san) i_max_san = searchsorted(sorted_san, max_sample) limited_san = sorted_san[:i_max_san] plot_cdf(data) plot_cdf(limited_san) plt.show() print "run-time: %.2f seconds" % run_time