def setUp(self): super(TestExperiment, self).setUp() domain1 = 1024 sample = 1E4 domain2 = (32, 32) # numpy shape tuple self.expr_seed = 12345 self.expr_eps = 0.1 # 1D data and workload self.X1 = dataset.DatasetSampledFromFile(nickname='HEPTH', sample_to_scale=sample, reduce_to_dom_shape=domain1, seed=111) self.W1 = workload.Prefix1D(domain_shape_int=domain1) # 2D data and workload self.X2 = dataset.DatasetSampledFromFile(nickname='SF-CABS-S', sample_to_scale=sample, reduce_to_dom_shape=domain2, seed=111) self.W2 = workload.RandomRange(shape_list=[(5, 5), (10, 10)], domain_shape=domain2, size=1000, seed=9001) self.A = uniform.uniform_noisy_engine( ) # this algorithm works for 1D and 2D self.A2 = AG.AG_engine(c=10, c2=5, alpha=.4) # this algorithm works for 2D only
def setUp(self): super(TestAlgorithm, self).setUp() domain1 = 1024 sample = 1E4 domain2 = (32, 32) # numpy shape tuple self.expr_seed = 12345 self.expr_eps = 0.1 # 1D data and workload self.X1 = dataset.DatasetSampledFromFile(nickname='BIDS-ALL', sample_to_scale=sample, reduce_to_dom_shape=domain1, seed=111) self.W1 = workload.Prefix1D(domain_shape_int=domain1) # 2D data and workload self.X2 = dataset.DatasetSampledFromFile(nickname='SF-CABS-E', sample_to_scale=sample, reduce_to_dom_shape=domain2, seed=111) self.W2 = workload.RandomRange(shape_list=[(5, 5), (10, 10)], domain_shape=domain2, size=1000, seed=9001)
def run_experiment(datasets, alg_engine, epsilons, seed, num_bins): total_runs = len(epsilons)*len(datasets) print("total runs: ", total_runs) num_done = 0 experiment_results = [] for i in in range(len(datasets)): dataset = datasets[i] if sum(dataset) == 0 or len(dataset) <= 2: print("bad dataset") continue # for some reason there are '0' data vectors # also, for branching, length should be at least 3 dataset = np.array(dataset) scale = sum(dataset) domain_size = len(dataset) data_range = max(dataset) - min(dataset) std_dev = math.sqrt(np.var(dataset)) uniform_distance = algs.uniform_distance(dataset) for epsilon in epsilons: w = workload.Prefix1D(domain_shape_int=len(dataset)) dataset_hat = alg_engine.Run(w, dataset, epsilon, seed) histogram, bin_size = algs.get_histogram(dataset, num_bins) private_hist, bin_size = algs.get_histogram(dataset_hat, num_bins) error = algs.get_scaled_error(histogram, private_hist) experiment_results.append((scale, domain_size, error, data_range, std_dev, uniform_distance, epsilon, data_set_index, i)) num_done +=1 if num_done % 50 ==0 : print("num done: ", num_done) return experiment_results
def setUp(self): n = 1024 self.hist = numpy.array( list(range(n))) self.d = dataset.Dataset(self.hist, None) self.epsilon = 0.1 self.w1 = workload.Identity.oneD(1024 , weight = 1.0) self.w2 = workload.Prefix1D(1024) self.eng = ahp.ahp_engine(ratio = 0.5, eta = 0.4)
def __init__(self, data, dom_size, scale, wktype, eps): self.x = data self.dom_size = dom_size self.scale = scale if(wktype): self.Q = workload.Prefix1D(domain_shape_int=dom_size) else: self.Q = workload.Identity((dom_size,)) self.wkload_type = wktype self.epsilon = eps
def setUp(self): A = uniform.uniform_noisy_engine() X = dataset.DatasetSampledFromFile(nickname='BIDS-ALL', sample_to_scale=1E4, reduce_to_dom_shape=1024, seed=0) W = workload.Prefix1D(domain_shape_int=1024) E1 = experiment.Single(X, W, A, 0.1, 0) E2 = experiment.Single(X, W, A, 0.1, 1) self.metric_group = [metric.SampleError(E1), metric.SampleError(E2)]
def setUp(self): n = 1024 self.hist = numpy.array(list(range(n))) self.d = dataset.Dataset(self.hist, None) self.dist = numpy.random.exponential(1, n) self.dist = util.old_div(self.dist, float(self.dist.sum())) self.epsilon = 0.1 self.w1 = workload.Identity.oneD(1024, weight=1.0) self.w2 = workload.Prefix1D(1024) self.eng = identity.identity_engine()
def W(dim, domain, size, workclass, seed): if workclass == workload.Prefix1D: return workload.Prefix1D(domain_shape_int=domain) elif util.contains_superclass(workclass, 'RandomRange'): if dim == 1: domain = [domain] if util.contains_superclass(workclass, 'SimpleRandomRange'): return workclass(domain_shape=domain, size=size, seed=seed) else: return workload.RandomRange(shape_list=None, domain_shape=domain, size=size, seed=seed) else: raise TypeError('unsupported workload class %s' % repr(workclass))
def setUp(self): super(TestExecution, self).setUp() domain1 = 1024 sample = 1E4 domain2 = (32, 32) self.expr_seed = 12345 self.expr_eps = 0.1 self.X1 = dataset.DatasetSampledFromFile(nickname='HEPTH', sample_to_scale=sample, reduce_to_dom_shape=domain1, seed=111) self.W1 = workload.Prefix1D(domain_shape_int=domain1) self.X2 = dataset.DatasetSampledFromFile(nickname='SF-CABS-S', sample_to_scale=sample, reduce_to_dom_shape=domain2, seed=111) self.W2 = workload.RandomRange(shape_list=[(5, 5), (10, 10)], domain_shape=domain2, size=1000, seed=9001) self.A1 = uniform.uniform_noisy_engine() self.A2 = AG.AG_engine(c=10, c2=5, alpha=.4) self.E1 = experiment.Single(self.X1, self.W1, self.A1, epsilon=self.expr_eps, seed=self.expr_seed) self.E2 = experiment.Single(self.X2, self.W2, self.A2, epsilon=self.expr_eps, seed=self.expr_seed) self.M1 = metric.SampleError(self.E1) self.M2 = metric.PopulationError(self.E2)
predictions = pickle.load( open("/home/famien/Code/MENG/regression/model_predictions.p", "rb")) # run four algs on each files seed = 2 num_bins = 50 error_errors = [] all_results = [] num_correct = 0 for i in range(len(data_files)): data_file = data_files[i] dataset = np.load(data_file) epsilon = .01 w = workload.Prefix1D(domain_shape_int=len(dataset)) results = {} predicted_error = predictions[i]['dataset_stat'][2] for alg_engine in alg_engines: predicted_epsilon = predictions[i][alg_engine.short_name][0] dataset_hat = alg_engine.Run(w, dataset, predicted_epsilon, seed) histogram, bin_size = algs.get_histogram(dataset, num_bins) private_hist, bin_size = algs.get_histogram(dataset_hat, num_bins) error = algs.get_scaled_error(histogram, private_hist) error_errors.append(abs(predicted_error - error)) results[alg_engine.short_name] = error actual_best = min(results, key=results.get) predictions_algs = {} for key in predictions[i].keys():
all_results = [] num_done = 0 for alg_engine in alg_engines: model = models[alg_engine.short_name] for dataset_stat in test_data: scale = dataset_stat[0] domain_size = dataset_stat[1] actual_error = dataset_stat[2] data_range = dataset_stat[3] std_dev = dataset_stat[4] uniform_distance = dataset_stat[5] actual_epsilon = dataset_stat[6] datset = datasets[dataset_stat[7]] w = workload.Prefix1D(domain_shape_int=len(domain_size)) dataset_hat = alg_engine.Run(w, dataset, predicted_epsilon, seed) histogram, bin_size = algs.get_histogram(dataset, num_bins) private_hist, bin_size = algs.get_histogram(dataset_hat, num_bins) actual_error = algs.get_scaled_error(histogram, private_hist) alg_epsilon_info[alg_engine.short_name] = (predicted_epsilon, actual_error) error_pairs.append((error, actual_error)) #results["runs"].append((alg_engine.short_name, epsilon, error)) num_done += 1 if num_done % 50 == 0: print("num done: ", num_done) pickle.dump(error_pairs,