if param_combination in searched_params:
                experiment_counter += 1
                # skip, but dont resave...
                print('Skipping: ' + str(param_combination))
                continue
            print('Running for: ' + str(param_combination))
            max_eps = param_combination[0]
            dp_noise_scale = param_combination[1]
            clipping_bound = param_combination[2]
            damping_val = param_combination[3]

            eps_i = np.zeros(experiment_setup['N_dp_seeds'])
            kl_i = np.zeros(experiment_setup['N_dp_seeds'])

            results_objects = []
            log_moments = generate_log_moments(no_workers, 32, dp_noise_scale,
                                               no_workers)

            # start everything running...
            for ind, seed in enumerate(dp_seeds):
                results = run_dp_analytical_pvi_sync.remote(
                    experiment_setup, seed, max_eps, workers_data,
                    dp_noise_scale, damping_val, clipping_bound, log_moments)
                results_objects.append((results, ind))

            # fetch one by one
            for results_tup in results_objects:
                [results_obj, ind] = results_tup
                results = ray.get(results_obj)
                eps = results[0]
                kl = results[1]
                eps_i[ind] = eps
Ejemplo n.º 2
0
            print(experiment_code)

            print("Sampled Parameters: {}".format(sampled_params))
            print("Exact Inference Params: {}".format(exact_params))

            if experiment_code in alreadyRunExperiments and not should_overwrite:
                print("Skipping Experiment")
                pprint.pprint(full_setup, width=1)
                print("Experiment Skipped \n\n")
                continue

            try:
                pprint.pprint(full_setup, width=1)
                print("Calc Log Moments")
                log_moments = generate_log_moments(
                    no_workers, 32, full_experiment_setup['dp_noise_scale'],
                    no_workers)
                print("Calc Log Moments - Done \n\n")
                results = run_global_dp_analytical_pvi_sync.remote(
                    full_setup, seed, dataset, log_moments=None)
                [eps, kl, tracker] = ray.get(results)
                fname = path + 'e{}.csv'.format(experiment_counter, ind)
                np.savetxt(fname, tracker, delimiter=',')

                text_file = open(log_file_path, "a")
                results_array = [
                    experiment_counter, eps, kl, sampled_params[0],
                    sampled_params[1], exact_params[0], exact_params[1],
                    full_setup['clipping_bound'],
                    full_setup['learning_rate']['start_value'],
                    full_setup["num_workers"],
Ejemplo n.º 3
0
            print("Sampled Parameters: {}".format(sampled_params))
            print("Exact Inference Params: {}".format(exact_params))

            if experiment_code in alreadyRunExperiments and not should_overwrite:
                print("Skipping Experiment")
                pprint.pprint(full_setup, width=1)
                experiment_counter += 1
                print("Experiment Skipped \n\n")
                continue

            try:
                pprint.pprint(full_setup, width=1)
                # print("Calc Log Moments")
                # only thing that matters is q
                log_moments = generate_log_moments(
                    1, 32, full_experiment_setup['dp_noise_scale'], 1)
                # print("Calc Log Moments - Done \n\n")

                # set things running so we can get the results later - this ought to boost performance.
                results = run_global_dp_analytical_pvi_sync.remote(
                    full_setup, seed, dataset, log_moments=None)
                results_array = [
                    experiment_counter, -1, -1, sampled_params[0],
                    sampled_params[1], exact_params[0], exact_params[1],
                    full_setup['clipping_bound'],
                    full_setup['learning_rate']['start_value'],
                    full_setup["num_workers"],
                    full_setup["dataset"]["points_per_worker"], experiment_code
                ]
                results_obj = [results_array, results]
                experiment_counter += 1
def run_global_dp_analytical_pvi_sync(experiment_setup, seed, all_workers_data, log_moments=None):
    # logging
    path = experiment_setup['output_base_dir'] + 'logs/global_dp_ana_sync_pvi/' + time.strftime(
        "%m-%d;%H:%M:%S") + "-s-" + str(seed) + '/'
    if not os.path.exists(path):
        os.makedirs(path)

    setup_file = path + 'run-params.json'
    with open(setup_file, 'w') as outfile:
        json.dump(experiment_setup, outfile)

    np.random.seed(seed)
    tf.set_random_seed(seed)

    n_train_master = experiment_setup['num_workers'] * experiment_setup['dataset']['points_per_worker']
    in_dim = 1

    accountant = MomentsAccountant(MomentsAccountantPolicy.FIXED_DELTA_MAX_EPS, 1e-5, experiment_setup['max_eps'], 32)
    net = linreg_models.LinReg_MFVI_analytic(in_dim, n_train_master,
                                             noise_var=experiment_setup['dataset']['model_noise_std'] ** 2,
                                             prior_var=experiment_setup['prior_std'] ** 2)

    if log_moments is None:
        accountant.log_moments_increment = generate_log_moments(1, 32, experiment_setup['dp_noise_scale'], 1)
    else:
        accountant.log_moments_increment = log_moments

    all_keys, all_values = net.get_params()

    if experiment_setup["convergence_threshold"] == "automatic":
        convergence_threshold = experiment_setup["clipping_bound"] * experiment_setup["dp_noise_scale"] / (
            experiment_setup["num_workers"] ** 0.5)
        print("convergence threshold calculated automatically: {}".format(convergence_threshold))
    elif experiment_setup["convergence_threshold"] == "disabled":
        # would never be reached...
        convergence_threshold = 0
    else:
        convergence_threshold = experiment_setup["convergence_threshold"]

    ps = ParameterServer.remote(all_keys, all_values, convergence_threshold,
                                experiment_setup["convergence_length"],
                                experiment_setup["num_intervals"])

    worker_clipping_config = "not_clipped" if (experiment_setup["clipping_config"] == "not_clipped" or experiment_setup[
        "clipping_config"] == "clipped_server") else "clipped"

    worker_noise_config = "noisy_worker" if experiment_setup["noise_config"] == "noisy_worker" else "not_noisy"

    global_clipping_bound = experiment_setup['clipping_bound'] if experiment_setup[
                                                                      "clipping_config"] == "clipped_server" else np.inf
    global_dp_noise_scale = experiment_setup['dp_noise_scale'] if experiment_setup[
                                                                      "noise_config"] == "noisy" else 0
    worker_config = {
        "clipping": worker_clipping_config,
        "noise": worker_noise_config
    }

    learning_rate_schedule = generate_learning_rate_schedule(
        experiment_setup['num_intervals'], experiment_setup['learning_rate'])

    workers = [
        Worker.remote(experiment_setup['num_workers'], in_dim, all_workers_data[i],
                      experiment_setup['prior_std'] ** 2, worker_config, experiment_setup['clipping_bound'],
                      1 - experiment_setup['learning_rate']['start_value'], experiment_setup['dataset']['model_noise_std'] ** 2)
        for i in range(experiment_setup['num_workers'])]
    i = 0
    current_params = ray.get(ps.pull.remote(all_keys))

    tracker_vals = []

    while i < experiment_setup['num_intervals']:
        current_learning_rate = learning_rate_schedule[i]
        deltas = [
            worker.get_delta.remote(
                current_params, current_learning_rate, damping=experiment_setup['local_damping'])
            for worker in workers]

        sum_delta = compute_update(all_keys, ray.get(deltas), global_clipping_bound, experiment_setup['clipping_bound'],
                                   1 - current_learning_rate, global_dp_noise_scale)
        should_stop_priv = accountant.update_privacy_budget()
        current_eps = accountant.current_tracked_val
        ps.push.remote(all_keys, sum_delta)
        current_params = ray.get(ps.pull.remote(all_keys))
        conv_val = ray.get(ps.get_conv_val.remote())
        KL_loss = KL_Gaussians(current_params[0], current_params[1], experiment_setup['exact_mean_pres'],
                               experiment_setup['exact_pres'])
        tracker_i = [sum_delta[0], sum_delta[1], current_params[0], current_params[1], KL_loss, current_eps, conv_val,
                     current_learning_rate]
        tracker_vals.append(tracker_i)
        print("Interval {} done: {}\n Conv Val: {}\n eps:{} \n\n".format(i, current_params, conv_val, current_eps))
        i += 1

        if ray.get(ps.get_should_stop.remote()):
            # break from the while loop if we should stop, convergence wise.
            print("Converged - stop training")
            break

        if should_stop_priv:
            print("Exceeded Privacy Budget - stop training")
            break

    eps = accountant.current_tracked_val

    tracker_array = np.array(tracker_vals)
    np.savetxt(path + 'tracker.csv', tracker_array, delimiter=',')

    n_row, _ = tracker_array.shape
    average_KL_loss = np.mean(tracker_array[n_row - 1 - 10:n_row - 1, 4])
    print("Average KL: {}".format(average_KL_loss))

    return eps, np.asscalar(average_KL_loss), tracker_array
Ejemplo n.º 5
0
    if dataset_setup['dataset'] == 'toy_1d':
        data_func = lambda idx, N: data.get_toy_1d_shard(
            idx, N, dataset_setup['data_type'], dataset_setup['mean'],
            dataset_setup['model_noise_std'], experiment_setup[
                'num_workers'] * dataset_setup['points_per_worker'])

    workers_data = [
        data_func(w_i, experiment_setup['num_workers'])
        for w_i in range(experiment_setup['num_workers'])
    ]
    x_train = np.array([[]])
    y_train = np.array([])
    for worker_data in workers_data:
        x_train = np.append(x_train, worker_data[0])
        y_train = np.append(y_train, worker_data[1])

    _, _, exact_mean_pres, exact_pres = exact_inference(
        x_train, y_train, experiment_setup['prior_std'],
        dataset_setup['model_noise_std']**2)

    log_moments = generate_log_moments(
        experiment_setup["dataset"]["points_per_worker"], 32,
        experiment_setup["dp_noise_scale"], experiment_setup["lot_size"])
    print("Exact Params: {}, {}".format(exact_mean_pres, exact_pres))
    ray.init()
    results = run_dpsgd_pvi_sync.remote(experiment_setup, 1, 1e50,
                                        workers_data,
                                        [exact_mean_pres, exact_pres],
                                        log_moments)
    ray.get(results)