Exemple #1
0
    def setUp(self):
        super(TestExperiment, self).setUp()

        domain1 = 1024
        sample = 1E4
        domain2 = (32, 32)  # numpy shape tuple

        self.expr_seed = 12345
        self.expr_eps = 0.1

        # 1D data and workload
        self.X1 = dataset.DatasetSampledFromFile(nickname='HEPTH',
                                                 sample_to_scale=sample,
                                                 reduce_to_dom_shape=domain1,
                                                 seed=111)
        self.W1 = workload.Prefix1D(domain_shape_int=domain1)

        # 2D data and workload
        self.X2 = dataset.DatasetSampledFromFile(nickname='SF-CABS-S',
                                                 sample_to_scale=sample,
                                                 reduce_to_dom_shape=domain2,
                                                 seed=111)
        self.W2 = workload.RandomRange(shape_list=[(5, 5), (10, 10)],
                                       domain_shape=domain2,
                                       size=1000,
                                       seed=9001)

        self.A = uniform.uniform_noisy_engine(
        )  # this algorithm works for 1D and 2D
        self.A2 = AG.AG_engine(c=10, c2=5,
                               alpha=.4)  # this algorithm works for 2D only
    def setUp(self):
        super(TestAlgorithm, self).setUp()

        domain1 = 1024
        sample = 1E4
        domain2 = (32, 32)  # numpy shape tuple

        self.expr_seed = 12345
        self.expr_eps = 0.1

        # 1D data and workload
        self.X1 = dataset.DatasetSampledFromFile(nickname='BIDS-ALL',
                                                 sample_to_scale=sample,
                                                 reduce_to_dom_shape=domain1,
                                                 seed=111)
        self.W1 = workload.Prefix1D(domain_shape_int=domain1)

        # 2D data and workload
        self.X2 = dataset.DatasetSampledFromFile(nickname='SF-CABS-E',
                                                 sample_to_scale=sample,
                                                 reduce_to_dom_shape=domain2,
                                                 seed=111)
        self.W2 = workload.RandomRange(shape_list=[(5, 5), (10, 10)],
                                       domain_shape=domain2,
                                       size=1000,
                                       seed=9001)
def run_experiment(datasets, alg_engine, epsilons, seed, num_bins):
	total_runs = len(epsilons)*len(datasets)
	print("total runs: ", total_runs)
	num_done = 0
	experiment_results = []
	for i in  in range(len(datasets)):
		dataset = datasets[i]
		if sum(dataset) == 0 or len(dataset) <= 2:
			print("bad dataset")
			continue # for some reason there are '0' data vectors
			# also, for branching, length should be at least 3

		dataset = np.array(dataset)
		scale = sum(dataset)
		domain_size = len(dataset)
		data_range = max(dataset) - min(dataset)
		std_dev = math.sqrt(np.var(dataset))
		uniform_distance = algs.uniform_distance(dataset)

		for epsilon in epsilons:
			w = workload.Prefix1D(domain_shape_int=len(dataset))
			dataset_hat = alg_engine.Run(w, dataset, epsilon, seed)

			histogram, bin_size = algs.get_histogram(dataset, num_bins)
			private_hist, bin_size = algs.get_histogram(dataset_hat, num_bins)
			error = algs.get_scaled_error(histogram, private_hist)

			experiment_results.append((scale, domain_size, error, data_range, std_dev, uniform_distance, epsilon, data_set_index, i))
			num_done +=1
			if num_done % 50 ==0 :
				print("num done: ", num_done)
	return experiment_results
Exemple #4
0
 def setUp(self):
     n = 1024
     self.hist = numpy.array( list(range(n)))
     self.d = dataset.Dataset(self.hist, None)
 
     self.epsilon = 0.1
     self.w1 = workload.Identity.oneD(1024 , weight = 1.0)
     self.w2 = workload.Prefix1D(1024)
     self.eng = ahp.ahp_engine(ratio = 0.5, eta = 0.4)
Exemple #5
0
 def __init__(self, data, dom_size, scale, wktype, eps):
     self.x = data
     self.dom_size = dom_size
     self.scale = scale
     if(wktype):
         self.Q = workload.Prefix1D(domain_shape_int=dom_size)
     else:
         self.Q = workload.Identity((dom_size,))
     self.wkload_type = wktype
     self.epsilon = eps
 def setUp(self):
     A = uniform.uniform_noisy_engine()
     X = dataset.DatasetSampledFromFile(nickname='BIDS-ALL',
                                        sample_to_scale=1E4,
                                        reduce_to_dom_shape=1024,
                                        seed=0)
     W = workload.Prefix1D(domain_shape_int=1024)
     E1 = experiment.Single(X, W, A, 0.1, 0)
     E2 = experiment.Single(X, W, A, 0.1, 1)
     self.metric_group = [metric.SampleError(E1), metric.SampleError(E2)]
Exemple #7
0
    def setUp(self):
        n = 1024
        self.hist = numpy.array(list(range(n)))
        self.d = dataset.Dataset(self.hist, None)
        self.dist = numpy.random.exponential(1, n)
        self.dist = util.old_div(self.dist, float(self.dist.sum()))

        self.epsilon = 0.1
        self.w1 = workload.Identity.oneD(1024, weight=1.0)
        self.w2 = workload.Prefix1D(1024)
        self.eng = identity.identity_engine()
Exemple #8
0
def W(dim, domain, size, workclass, seed):
    if workclass == workload.Prefix1D:
        return workload.Prefix1D(domain_shape_int=domain)
    elif util.contains_superclass(workclass, 'RandomRange'):
        if dim == 1:
            domain = [domain]

        if util.contains_superclass(workclass, 'SimpleRandomRange'):
            return workclass(domain_shape=domain,
                              size=size,
                              seed=seed)
        else:
            return workload.RandomRange(shape_list=None,
                                        domain_shape=domain,
                                        size=size,
                                        seed=seed)
    else:
        raise TypeError('unsupported workload class %s' % repr(workclass))
    def setUp(self):
        super(TestExecution, self).setUp()

        domain1 = 1024
        sample = 1E4
        domain2 = (32, 32)

        self.expr_seed = 12345
        self.expr_eps = 0.1

        self.X1 = dataset.DatasetSampledFromFile(nickname='HEPTH',
                                                 sample_to_scale=sample,
                                                 reduce_to_dom_shape=domain1,
                                                 seed=111)
        self.W1 = workload.Prefix1D(domain_shape_int=domain1)

        self.X2 = dataset.DatasetSampledFromFile(nickname='SF-CABS-S',
                                                 sample_to_scale=sample,
                                                 reduce_to_dom_shape=domain2,
                                                 seed=111)
        self.W2 = workload.RandomRange(shape_list=[(5, 5), (10, 10)],
                                       domain_shape=domain2,
                                       size=1000,
                                       seed=9001)

        self.A1 = uniform.uniform_noisy_engine()
        self.A2 = AG.AG_engine(c=10, c2=5, alpha=.4)

        self.E1 = experiment.Single(self.X1,
                                    self.W1,
                                    self.A1,
                                    epsilon=self.expr_eps,
                                    seed=self.expr_seed)
        self.E2 = experiment.Single(self.X2,
                                    self.W2,
                                    self.A2,
                                    epsilon=self.expr_eps,
                                    seed=self.expr_seed)

        self.M1 = metric.SampleError(self.E1)
        self.M2 = metric.PopulationError(self.E2)
predictions = pickle.load(
    open("/home/famien/Code/MENG/regression/model_predictions.p", "rb"))

# run four algs on each files
seed = 2
num_bins = 50

error_errors = []
all_results = []
num_correct = 0

for i in range(len(data_files)):
    data_file = data_files[i]
    dataset = np.load(data_file)
    epsilon = .01
    w = workload.Prefix1D(domain_shape_int=len(dataset))
    results = {}
    predicted_error = predictions[i]['dataset_stat'][2]

    for alg_engine in alg_engines:
        predicted_epsilon = predictions[i][alg_engine.short_name][0]
        dataset_hat = alg_engine.Run(w, dataset, predicted_epsilon, seed)
        histogram, bin_size = algs.get_histogram(dataset, num_bins)
        private_hist, bin_size = algs.get_histogram(dataset_hat, num_bins)
        error = algs.get_scaled_error(histogram, private_hist)
        error_errors.append(abs(predicted_error - error))
        results[alg_engine.short_name] = error

    actual_best = min(results, key=results.get)
    predictions_algs = {}
    for key in predictions[i].keys():
Exemple #11
0
all_results = []
num_done = 0

for alg_engine in alg_engines:
    model = models[alg_engine.short_name]
    for dataset_stat in test_data:
        scale = dataset_stat[0]
        domain_size = dataset_stat[1]
        actual_error = dataset_stat[2]
        data_range = dataset_stat[3]
        std_dev = dataset_stat[4]
        uniform_distance = dataset_stat[5]
        actual_epsilon = dataset_stat[6]
        datset = datasets[dataset_stat[7]]

        w = workload.Prefix1D(domain_shape_int=len(domain_size))

        dataset_hat = alg_engine.Run(w, dataset, predicted_epsilon, seed)
        histogram, bin_size = algs.get_histogram(dataset, num_bins)
        private_hist, bin_size = algs.get_histogram(dataset_hat, num_bins)
        actual_error = algs.get_scaled_error(histogram, private_hist)
        alg_epsilon_info[alg_engine.short_name] = (predicted_epsilon,
                                                   actual_error)
        error_pairs.append((error, actual_error))

        #results["runs"].append((alg_engine.short_name, epsilon, error))
        num_done += 1
        if num_done % 50 == 0:
            print("num done: ", num_done)

pickle.dump(error_pairs,