コード例 #1
0
def run_experiment(datasets, alg_engine, epsilons, seed, num_bins):
	total_runs = len(epsilons)*len(datasets)
	print("total runs: ", total_runs)
	num_done = 0
	experiment_results = []
	for i in  in range(len(datasets)):
		dataset = datasets[i]
		if sum(dataset) == 0 or len(dataset) <= 2:
			print("bad dataset")
			continue # for some reason there are '0' data vectors
			# also, for branching, length should be at least 3

		dataset = np.array(dataset)
		scale = sum(dataset)
		domain_size = len(dataset)
		data_range = max(dataset) - min(dataset)
		std_dev = math.sqrt(np.var(dataset))
		uniform_distance = algs.uniform_distance(dataset)

		for epsilon in epsilons:
			w = workload.Prefix1D(domain_shape_int=len(dataset))
			dataset_hat = alg_engine.Run(w, dataset, epsilon, seed)

			histogram, bin_size = algs.get_histogram(dataset, num_bins)
			private_hist, bin_size = algs.get_histogram(dataset_hat, num_bins)
			error = algs.get_scaled_error(histogram, private_hist)

			experiment_results.append((scale, domain_size, error, data_range, std_dev, uniform_distance, epsilon, data_set_index, i))
			num_done +=1
			if num_done % 50 ==0 :
				print("num done: ", num_done)
	return experiment_results
コード例 #2
0
def get_error_data():
    for i in range(reps):
        for data_vector in dataset_vectors_ext:
            for epsilon in epsilons:
                #num_iterations = 5*math.log(sum(data_vector))
                num_iterations = 2
                queries = algs.get_queries(num_bins, len(data_vector))
                private_dataset = algs.mwem(data_vector, queries,
                                            num_iterations, epsilon)
                histogram, bin_size = algs.get_histogram(data_vector, num_bins)
                private_hist, bin_size = algs.get_histogram(
                    private_dataset, num_bins)

                # collect statistics
                scale = sum(data_vector)
                domain_size = len(data_vector)
                std_dev = math.sqrt(np.var(data_vector))
                uniform_distance = algs.uniform_distance(data_vector)
                private_histograms_data.append(
                    (private_hist, histogram, sum(histogram), domain_size,
                     data_range, std_dev, uniform_distance, epsilon))

                done += 1
                if done % 500 == 0:
                    print "num done = ", done
コード例 #3
0
error_errors = []
all_results = []
num_correct = 0

for i in range(len(data_files)):
    data_file = data_files[i]
    dataset = np.load(data_file)
    epsilon = .01
    w = workload.Prefix1D(domain_shape_int=len(dataset))
    results = {}
    predicted_error = predictions[i]['dataset_stat'][2]

    for alg_engine in alg_engines:
        predicted_epsilon = predictions[i][alg_engine.short_name][0]
        dataset_hat = alg_engine.Run(w, dataset, predicted_epsilon, seed)
        histogram, bin_size = algs.get_histogram(dataset, num_bins)
        private_hist, bin_size = algs.get_histogram(dataset_hat, num_bins)
        error = algs.get_scaled_error(histogram, private_hist)
        error_errors.append(abs(predicted_error - error))
        results[alg_engine.short_name] = error

    actual_best = min(results, key=results.get)
    predictions_algs = {}
    for key in predictions[i].keys():
        if key == 'dataset_stat':
            continue
        predictions_algs[key] = predictions[i][key][0]  #isolate epsilons

    predicted_best = min(predictions_algs, key=predictions_algs.get)
    if actual_best == predicted_best:
        num_correct += 1
コード例 #4
0
num_total = reps * len(dataset_vectors_ext) * len(epsilons)
print "num total: ", num_total

# for data_vector in dataset_vectors_ext:
# 	print "my scale: ", sum(data_vector)
# 	print "my domain size: ", len(data_vector)

for i in range(reps):
    for data_vector in dataset_vectors_ext:
        for epsilon in epsilons:
            #num_iterations = 5*math.log(sum(data_vector))
            num_iterations = 2
            queries = algs.get_queries(num_bins, len(data_vector))
            private_dataset = algs.mwem(data_vector, queries, num_iterations,
                                        epsilon)
            histogram, bin_size = algs.get_histogram(data_vector, num_bins)
            private_hist, bin_size = algs.get_histogram(
                private_dataset, num_bins)

            # collect statistics
            scale = sum(data_vector)
            domain_size = len(data_vector)
            data_range = max(data_vector) - min(data_vector)
            std_dev = math.sqrt(np.var(data_vector))
            uniform_distance = algs.uniform_distance(data_vector)
            private_histograms_data.append(
                (private_hist, histogram, sum(histogram), domain_size,
                 data_range, std_dev, uniform_distance, epsilon))

            done += 1
            if done % 500 == 0: