예제 #1
0
    ])
    list_best.append(truth_at_best_sampled)
    total_cost += noise_and_cost_func(sample_IS, point_to_sample)[1]
    list_cost.append(total_cost)
    list_sampled_IS.append(sample_IS)
    list_sampled_points.append(point_to_sample)
    list_sampled_vals.append(val)
    list_noise_var.append(noise_and_cost_func(sample_IS, point_to_sample)[0])
    list_raw_voi.append(-min_neg_ei)

    result_to_pickle = {
        "best": np.array(list_best),
        "cost": np.array(list_cost),
        "sampled_is": np.array(list_sampled_IS),
        "sampled_points": np.array(list_sampled_points),
        "sampled_vals": np.array(list_sampled_vals),
        "sampled_noise_var": np.array(list_noise_var),
        "raw_voi": np.array(list_raw_voi),
        "init_best_truth": truth_at_init_best_sampled,
    }

    # write data to pickle.
    send_data_to_s3(bucket, problem.result_path, result_to_pickle)
    if problem.data_path is not None:
        data_to_pickle = {
            "points": np.array(list_sampled_points),
            "vals": np.array(list_sampled_vals),
            "noise": np.array(list_noise_var),
        }
        send_data_to_s3(bucket, problem.data_path, data_to_pickle)
func = func_dict[func_name]
num_replications = 100

search_domain = pythonTensorProductDomain([ClosedInterval(bound[0], bound[1]) for bound in func._search_domain])
num_parallel_jobs = num_pts    # Jialei's original choice
if(('ato' in func_name) and (num_parallel_jobs > 16)): # do not start too many MATLAB instances
    num_parallel_jobs = 16
if(('lrMU' in func_name) and (num_parallel_jobs > 8)): # do not start too many theano instances
    num_parallel_jobs = 8
if(not allows_parallelization):
    num_parallel_jobs = 1
def parallel_func(IS, pt):
    return func.evaluate(IS, pt)

for repl_no in range(num_replications):
    with Parallel(n_jobs=num_parallel_jobs) as parallel:
        for index_IS in func.getList_IS_to_query():

            points = search_domain.generate_uniform_random_points_in_domain(num_pts)
            # def parallel_func(IS, pt):
            #     return func.evaluate(IS, pt)
            # vals = [func.evaluate(0, pt) for pt in points]
            vals = parallel(delayed(parallel_func)(index_IS, pt) for pt in points)

            noise = [func.noise_and_cost_func(index_IS, pt)[0] for pt in points] #func.noise_and_cost_func(index_IS, None)[0] * np.ones(num_pts)
            data = {"points": np.array(points), "vals": np.array(vals), "noise": np.array(noise)}

            # write to S3
            key = directory+'/{2}_IS_{0}_{3}_points_repl_{1}'.format(index_IS, repl_no, func_name, num_pts)
            send_data_to_s3(bucket, key, data)
예제 #3
0
}
for i in range(len(hist_data)):
    result["data_{0}_points_sampled".format(i)] = hist_data[i].points_sampled
    result["data_{0}_points_sampled_value".format(i)] = hist_data[i].points_sampled_value
for key in range(len(hist_data)):
    # Setup prior for MAP
    if key == 0:
        prior_mean = np.concatenate(([max(0.01, np.var(hist_data[key].points_sampled_value) - np.mean(hist_data[key].points_sampled_noise_variance))], [(d[1]-d[0]) for d in problem.obj_func_min._search_domain]))
    else:
        prior_mean = np.concatenate(([max(0.01, np.var(hist_data[key].points_sampled_value) - np.mean(hist_data[key].points_sampled_noise_variance) - np.mean(hist_data[0].points_sampled_noise_variance))], [(d[1]-d[0]) for d in problem.obj_func_min._search_domain]))
    prior_sig = np.diag(np.power(prior_mean/2., 2.0))
    prior = NormalPrior(prior_mean, prior_sig)
    hyper_bounds = [(0.001, prior_mean[i]+2.*np.sqrt(prior_sig[i,i])) for i in range(problem.obj_func_min.getDim()+1)]
    hyperparam_search_domain = pythonTensorProductDomain([ClosedInterval(bound[0], bound[1]) for bound in hyper_bounds])
    multistart_pts = hyperparam_search_domain.generate_uniform_random_points_in_domain(num_hyper_multistart)
    best_f = np.inf
    cov = SquareExponential(prior_mean)
    for i in range(num_hyper_multistart):
        hyper, f, output = hyper_opt(cov, data=hist_data[key], init_hyper=multistart_pts[i, :],
                                     hyper_bounds=hyper_bounds, approx_grad=False, hyper_prior=prior)
        # print output
        if f < best_f:
            best_hyper = hyper
            best_f = f
    result['prior_mean'].append(prior_mean)
    result['prior_sig'].append(np.diag(prior_sig))
    result['hyper_bounds'].append(hyper_bounds)
    result['hyperparam'] = np.concatenate((result['hyperparam'], best_hyper))
    result['loglikelihood'].append(-best_f)
send_data_to_s3(bucket, problem.hyper_path, result)
### copy back from /backup the files where IS 1 is centered
directory_backup = '/miso/data/backup'
new_func_name = 'lrMU2'
#
# num_pts = 1000
# for index_IS in func.getList_IS_to_query():
#     key = directory_backup+'/hyper_{1}_IS_{0}_{2}_points'.format(index_IS, func_name, num_pts)
#     data = get_data_from_s3(bucket, key)
#
#     # write to lrMU2 file
#     key_new = directory + '/hyper_{1}_IS_{0}_{2}_points'.format(index_IS, new_func_name, num_pts)
#     # print key_new
#     send_data_to_s3(bucket, key_new, data)

# init data
num_replications = 100
num_pts = 10
for repl_no in xrange(num_replications):
    print '\nrepl ' + str(repl_no)
    for index_IS in func.getList_IS_to_query():
        key = directory_backup + '/{2}_IS_{0}_{3}_points_repl_{1}'.format(
            index_IS, repl_no, func_name, num_pts)
        data = get_data_from_s3(bucket, key)

        # write to lrMU2 file
        key_new = directory + '/{2}_IS_{0}_{3}_points_repl_{1}'.format(
            index_IS, repl_no, new_func_name, num_pts)
        # print key_new
        send_data_to_s3(bucket, key_new, data)
# bucket = conn.get_bucket('frazier-research', validate=True)
# cluster_dir = '/fs/europa/g_pf/pickles/miso/data/'
# s3_dir = 'miso/data/'
# for filename in os.listdir(cluster_dir):
#     if '.pickle' in filename:
#         print filename.split('.')[0]
#         with open(cluster_dir+filename, 'rb') as f:
#             d=pickle.load(f)
#             send_data_to_s3(bucket, s3_dir+filename.split('.')[0], d)

cluster_dir = '/fs/europa/g_pf/pickles/miso/data/'
s3_dir = 'miso/data/'
for i in range(100):
    for IS in range(3):
        name = "atoext_IS_{0}_20_points_repl_{1}".format(IS, i)
        print name
        with open(cluster_dir + name, 'rb') as f:
            data = pickle.load(f)
        send_data_to_s3(bucket, s3_dir + name, data)

# cluster_dir = '/fs/europa/g_pf/pickles/miso/hyper/'
# s3_dir = 'miso/hyper/'
# # for i in range(100):
# #     for IS in range(3):
# # name = "atoext_IS_{0}_20_points_repl_{1}".format(IS, i)
# name = "mkg_atoext"
# print name
# data = get_data_from_s3(bucket, s3_dir+name)
# with open(cluster_dir+name, 'wb') as f:
#     pickle.dump(data, f)