if param_combination in searched_params: experiment_counter += 1 # skip, but dont resave... print('Skipping: ' + str(param_combination)) continue print('Running for: ' + str(param_combination)) max_eps = param_combination[0] dp_noise_scale = param_combination[1] clipping_bound = param_combination[2] damping_val = param_combination[3] eps_i = np.zeros(experiment_setup['N_dp_seeds']) kl_i = np.zeros(experiment_setup['N_dp_seeds']) results_objects = [] log_moments = generate_log_moments(no_workers, 32, dp_noise_scale, no_workers) # start everything running... for ind, seed in enumerate(dp_seeds): results = run_dp_analytical_pvi_sync.remote( experiment_setup, seed, max_eps, workers_data, dp_noise_scale, damping_val, clipping_bound, log_moments) results_objects.append((results, ind)) # fetch one by one for results_tup in results_objects: [results_obj, ind] = results_tup results = ray.get(results_obj) eps = results[0] kl = results[1] eps_i[ind] = eps
print(experiment_code) print("Sampled Parameters: {}".format(sampled_params)) print("Exact Inference Params: {}".format(exact_params)) if experiment_code in alreadyRunExperiments and not should_overwrite: print("Skipping Experiment") pprint.pprint(full_setup, width=1) print("Experiment Skipped \n\n") continue try: pprint.pprint(full_setup, width=1) print("Calc Log Moments") log_moments = generate_log_moments( no_workers, 32, full_experiment_setup['dp_noise_scale'], no_workers) print("Calc Log Moments - Done \n\n") results = run_global_dp_analytical_pvi_sync.remote( full_setup, seed, dataset, log_moments=None) [eps, kl, tracker] = ray.get(results) fname = path + 'e{}.csv'.format(experiment_counter, ind) np.savetxt(fname, tracker, delimiter=',') text_file = open(log_file_path, "a") results_array = [ experiment_counter, eps, kl, sampled_params[0], sampled_params[1], exact_params[0], exact_params[1], full_setup['clipping_bound'], full_setup['learning_rate']['start_value'], full_setup["num_workers"],
print("Sampled Parameters: {}".format(sampled_params)) print("Exact Inference Params: {}".format(exact_params)) if experiment_code in alreadyRunExperiments and not should_overwrite: print("Skipping Experiment") pprint.pprint(full_setup, width=1) experiment_counter += 1 print("Experiment Skipped \n\n") continue try: pprint.pprint(full_setup, width=1) # print("Calc Log Moments") # only thing that matters is q log_moments = generate_log_moments( 1, 32, full_experiment_setup['dp_noise_scale'], 1) # print("Calc Log Moments - Done \n\n") # set things running so we can get the results later - this ought to boost performance. results = run_global_dp_analytical_pvi_sync.remote( full_setup, seed, dataset, log_moments=None) results_array = [ experiment_counter, -1, -1, sampled_params[0], sampled_params[1], exact_params[0], exact_params[1], full_setup['clipping_bound'], full_setup['learning_rate']['start_value'], full_setup["num_workers"], full_setup["dataset"]["points_per_worker"], experiment_code ] results_obj = [results_array, results] experiment_counter += 1
def run_global_dp_analytical_pvi_sync(experiment_setup, seed, all_workers_data, log_moments=None): # logging path = experiment_setup['output_base_dir'] + 'logs/global_dp_ana_sync_pvi/' + time.strftime( "%m-%d;%H:%M:%S") + "-s-" + str(seed) + '/' if not os.path.exists(path): os.makedirs(path) setup_file = path + 'run-params.json' with open(setup_file, 'w') as outfile: json.dump(experiment_setup, outfile) np.random.seed(seed) tf.set_random_seed(seed) n_train_master = experiment_setup['num_workers'] * experiment_setup['dataset']['points_per_worker'] in_dim = 1 accountant = MomentsAccountant(MomentsAccountantPolicy.FIXED_DELTA_MAX_EPS, 1e-5, experiment_setup['max_eps'], 32) net = linreg_models.LinReg_MFVI_analytic(in_dim, n_train_master, noise_var=experiment_setup['dataset']['model_noise_std'] ** 2, prior_var=experiment_setup['prior_std'] ** 2) if log_moments is None: accountant.log_moments_increment = generate_log_moments(1, 32, experiment_setup['dp_noise_scale'], 1) else: accountant.log_moments_increment = log_moments all_keys, all_values = net.get_params() if experiment_setup["convergence_threshold"] == "automatic": convergence_threshold = experiment_setup["clipping_bound"] * experiment_setup["dp_noise_scale"] / ( experiment_setup["num_workers"] ** 0.5) print("convergence threshold calculated automatically: {}".format(convergence_threshold)) elif experiment_setup["convergence_threshold"] == "disabled": # would never be reached... convergence_threshold = 0 else: convergence_threshold = experiment_setup["convergence_threshold"] ps = ParameterServer.remote(all_keys, all_values, convergence_threshold, experiment_setup["convergence_length"], experiment_setup["num_intervals"]) worker_clipping_config = "not_clipped" if (experiment_setup["clipping_config"] == "not_clipped" or experiment_setup[ "clipping_config"] == "clipped_server") else "clipped" worker_noise_config = "noisy_worker" if experiment_setup["noise_config"] == "noisy_worker" else "not_noisy" global_clipping_bound = experiment_setup['clipping_bound'] if experiment_setup[ "clipping_config"] == "clipped_server" else np.inf global_dp_noise_scale = experiment_setup['dp_noise_scale'] if experiment_setup[ "noise_config"] == "noisy" else 0 worker_config = { "clipping": worker_clipping_config, "noise": worker_noise_config } learning_rate_schedule = generate_learning_rate_schedule( experiment_setup['num_intervals'], experiment_setup['learning_rate']) workers = [ Worker.remote(experiment_setup['num_workers'], in_dim, all_workers_data[i], experiment_setup['prior_std'] ** 2, worker_config, experiment_setup['clipping_bound'], 1 - experiment_setup['learning_rate']['start_value'], experiment_setup['dataset']['model_noise_std'] ** 2) for i in range(experiment_setup['num_workers'])] i = 0 current_params = ray.get(ps.pull.remote(all_keys)) tracker_vals = [] while i < experiment_setup['num_intervals']: current_learning_rate = learning_rate_schedule[i] deltas = [ worker.get_delta.remote( current_params, current_learning_rate, damping=experiment_setup['local_damping']) for worker in workers] sum_delta = compute_update(all_keys, ray.get(deltas), global_clipping_bound, experiment_setup['clipping_bound'], 1 - current_learning_rate, global_dp_noise_scale) should_stop_priv = accountant.update_privacy_budget() current_eps = accountant.current_tracked_val ps.push.remote(all_keys, sum_delta) current_params = ray.get(ps.pull.remote(all_keys)) conv_val = ray.get(ps.get_conv_val.remote()) KL_loss = KL_Gaussians(current_params[0], current_params[1], experiment_setup['exact_mean_pres'], experiment_setup['exact_pres']) tracker_i = [sum_delta[0], sum_delta[1], current_params[0], current_params[1], KL_loss, current_eps, conv_val, current_learning_rate] tracker_vals.append(tracker_i) print("Interval {} done: {}\n Conv Val: {}\n eps:{} \n\n".format(i, current_params, conv_val, current_eps)) i += 1 if ray.get(ps.get_should_stop.remote()): # break from the while loop if we should stop, convergence wise. print("Converged - stop training") break if should_stop_priv: print("Exceeded Privacy Budget - stop training") break eps = accountant.current_tracked_val tracker_array = np.array(tracker_vals) np.savetxt(path + 'tracker.csv', tracker_array, delimiter=',') n_row, _ = tracker_array.shape average_KL_loss = np.mean(tracker_array[n_row - 1 - 10:n_row - 1, 4]) print("Average KL: {}".format(average_KL_loss)) return eps, np.asscalar(average_KL_loss), tracker_array
if dataset_setup['dataset'] == 'toy_1d': data_func = lambda idx, N: data.get_toy_1d_shard( idx, N, dataset_setup['data_type'], dataset_setup['mean'], dataset_setup['model_noise_std'], experiment_setup[ 'num_workers'] * dataset_setup['points_per_worker']) workers_data = [ data_func(w_i, experiment_setup['num_workers']) for w_i in range(experiment_setup['num_workers']) ] x_train = np.array([[]]) y_train = np.array([]) for worker_data in workers_data: x_train = np.append(x_train, worker_data[0]) y_train = np.append(y_train, worker_data[1]) _, _, exact_mean_pres, exact_pres = exact_inference( x_train, y_train, experiment_setup['prior_std'], dataset_setup['model_noise_std']**2) log_moments = generate_log_moments( experiment_setup["dataset"]["points_per_worker"], 32, experiment_setup["dp_noise_scale"], experiment_setup["lot_size"]) print("Exact Params: {}, {}".format(exact_mean_pres, exact_pres)) ray.init() results = run_dpsgd_pvi_sync.remote(experiment_setup, 1, 1e50, workers_data, [exact_mean_pres, exact_pres], log_moments) ray.get(results)