def generate_Hams(v_fun): out = {"float":None,"double":None} v_obj_float = v_fun(precision_type="torch.FloatTensor") v_obj_double = v_fun(precision_type="torch.DoubleTensor") metric_float = metric("unit_e",v_obj_float) metric_double = metric("unit_e",v_obj_double) Ham_float = Hamiltonian(v_obj_float,metric_float) Ham_double = Hamiltonian(v_obj_double,metric_double) out.update({"float":Ham_float,"double":Ham_double}) return(out)
def generate_H_V_T(epsilon,L,vo,q_point): metrico = metric("unit_e",vo) Ho = Hamiltonian(vo, metrico) q = q_point.point_clone() p = Ho.T.generate_momentum(q) H_0_out = Ho.evaluate(q,p) H_list = [H_0_out["H"]] V_list = [H_0_out["V"]] T_list = [H_0_out["T"]] for _ in range(L): print("iter {}".format(_)) q,p,stat = abstract_leapfrog_ult(q, p, epsilon, Ho) if stat["explode_grad"]: break # out = abstract_leapfrog_ult(q,p,epsilon,Ho) # out = abstract_NUTS(q,epsilon,Ham,abstract_leapfrog_ult,5) H_out = Ho.evaluate(q, p) current_H = H_out["H"] current_T = H_out["T"] current_V = H_out["V"] if abs(current_H - H_list[0]) > 1000: break print("current V is {}".format(current_V)) print("current H {}".format(current_H)) print("current T {}".format(current_T)) # current_V, current_T, current_H = Ho.evaluate_all(q, p) # print("current H2 {}".format(Ho.evaluate(q,p))) H_list.append(current_H) V_list.append(current_V) T_list.append(current_T) out = {"H_list":H_list,"V_list":V_list,"T_list":T_list} return(out)
def __init__(self,tune_dict,tune_param_objs_dict,init_point): #print(input_obj.input_dict) self.tune_param_objs_dict = tune_param_objs_dict # for param_name, obj in tune_param_objs_dict.items(): # val = obj.get_val() # setattr(self,param_name,val) self.v_fun = tune_dict["v_fun"] self.windowed = tune_dict["windowed"] self.dynamic = tune_dict["dynamic"] self.second_order = tune_dict["second_order"] self.metric_name = tune_dict["metric_name"] self.criterion = tune_dict["criterion"] self.v_obj = self.v_fun() self.v_obj.q_point = init_point #if hasattr(tune_param_objs_dict,"alpha"): if "alpha" in tune_param_objs_dict: alpha_val = tune_param_objs_dict["alpha"].get_val() self.metric = metric(self.metric_name,self.v_obj,alpha_val) else: self.metric = metric(self.metric_name,self.v_obj) self.Ham = Hamiltonian(self.v_obj,self.metric) #if not self.dynamic: #if hasattr(tune_param_objs_dict,"evolve_t"): if "evolve_t" in tune_param_objs_dict: self.input_time=True #elif hasattr(tune_param_objs_dict,"evolve_L"): elif "evolve_L" in tune_param_objs_dict: self.input_time=False else: self.input_time=None self.ave_second_per_leapfrog = 0 #self.one_step_function,self.tuneable_param = self.generate_sampler_one_step(self.windowed,self.dynamic,self.second_order,self.metric_name) # here self.one_step_function is raw_sampler_one_step self.one_step_function,self.tuneable_param = self.generate_sampler_one_step() # self.tuneable_param supplies the names of tuneable parameters that self.one_step_function needs #self.tune_param_obj_dict = tune_param_obj_dict #self.tuneable_param_obj_dict = {} #for name in self.tuneable_param: # self.tuneable_param_obj_dict.update({name:tune_param_obj_dict}) #for i in range(len(self.tuneable_param)): # self.tuneable_param_dict.update({self.tuneable_param[i]:getattr(self,self.tuneable_param[i])}) self.one_step_function = wrap(self.one_step_function)
def T(p): return(torch.dot(p,p)*0.5) def H(q,p,return_float): if return_float: return((V(q)+T(p)).data[0]) else: return((V(q)+T(p))) # first verify they have the same Hamiltonian function print("exact H {}".format(H(q,p,True))) v_obj = V_pima_inidan_logit() metric_obj = metric("unit_e",v_obj) Ham = Hamiltonian(v_obj,metric_obj) q_point = Ham.V.q_point.point_clone() p_point = Ham.T.p_point.point_clone() q_point.flattened_tensor.copy_(inputq) p_point.flattened_tensor.copy_(inputp) print("abstract H {}".format(Ham.evaluate(q_point,p_point))) print("input q diff{}".format((q.data-q_point.flattened_tensor).sum())) print("input p diff {}".format((p.data-p_point.flattened_tensor).sum())) L=10 for i in range(L): outq,outp = leapfrog_ult(q,p,0.1,H)
#output = softabs_map(input,1e-3) #print(output) #exit() #seed=1 #torch.manual_seed(seed) #numpy.random.seed(seed) vo = V_logistic_regression() #vo = V_funnel() #metrico = metric("softabs",vo,alpha=1e6) #metrico = metric("softabs_diag",vo,alpha=1e6) #metrico = metric("softabs_outer_product",vo,alpha=1e6) #metrico = metric("diag_e",vo) #metrico = metric("dense_e",vo) metrico = metric("unit_e", vo) #T_unit_e(metrico,vo) #exit() #to = T(metrico,vo) Ho = Hamiltonian(vo, metrico) epsilon = 0.1 qpoint_obj = Ho.V.q_point q = qpoint_obj #out = abstract_NUTS(q,epsilon,Ho,abstract_leapfrog_ult,5) #out = abstract_GNUTS(q,epsilon,Ho,5) #out = abstract_GNUTS(q,epsilon,Ho,abstract_leapfrog_ult,5) #out = abstract_NUTS_xhmc(q,epsilon,Ho,abstract_leapfrog_ult,5,0.1) #out = abstract_NUTS_xhmc(q,epsilon,Ho,generalized_leapfrog,5,0.1) out = abstract_HMC_alt_ult(epsilon=0.01, L=10, init_q=qpoint_obj, Ham=Ho) #out = rmhmc_step(qpoint_obj,0.01,10,Ho)
def setup_gibbs_v_joint_experiment(num_units_list, train_set, test_set, num_samples, save_name, seed=1): output_names = [ "train_error", "test_error", "train_error_sd", "test_error_sd", "sigma_2_ess", "mean_sigma2", "median_sigma2", "min_ess", "median_ess" ] output_store = numpy.zeros((len(num_units_list), 3, len(output_names))) diagnostics_store = numpy.zeros(shape=[len(num_units_list), 3] + [4, 13]) time_store = numpy.zeros(shape=[len(num_units_list), 3]) for i in range(len(num_units_list)): for j in range(3): start_time = time.time() v_fun = V_fc_model_4 model_dict = {"num_units": num_units_list[i]} mcmc_meta = mcmc_sampler_settings_dict(mcmc_id=0, samples_per_chain=1000 + num_samples, num_chains=4, num_cpu=4, thin=1, tune_l_per_chain=900, warmup_per_chain=1000, is_float=False, isstore_to_disk=False, allow_restart=True, seed=seed + i + 1) if j == 2: v_generator = wrap_V_class_with_input_data( class_constructor=V_fc_gibbs_model_1, input_data=train_set, model_dict=model_dict) v_obj = v_generator(precision_type="torch.DoubleTensor", gibbs=True) metric_obj = metric(name="unit_e", V_instance=v_obj) Ham = Hamiltonian(v_obj, metric_obj) init_q_point = point(V=v_obj) init_hyperparam = torch.abs(torch.randn(1)) + 3 log_obj = log_class() dim = len(init_q_point.flattened_tensor) mcmc_samples_weight = torch.zeros(1, num_samples + 1000, dim) mcmc_samples_hyper = torch.zeros(1, num_samples + 1000, 1) for iter in range(num_samples + 1000): print("iter {}".format(iter)) outq, out_hyperparam = update_param_and_hyperparam_dynamic_one_step( init_q_point, init_hyperparam, Ham, 0.01, log_obj) init_q_point.flattened_tensor.copy_(outq.flattened_tensor) init_q_point.load_flatten() init_hyperparam = out_hyperparam mcmc_samples_weight[ 0, iter, :] = outq.flattened_tensor.clone() mcmc_samples_hyper[0, iter, 0] = out_hyperparam mcmc_samples_weight = mcmc_samples_weight[:, 1000:, :].numpy() mcmc_samples_hyper = mcmc_samples_hyper[:, 1000:, :].numpy() te, predicted, te_sd = test_error( test_set, v_obj=v_generator(precision_type="torch.DoubleTensor"), mcmc_samples=mcmc_samples_weight[0, :, :], type="classification", memory_efficient=False) train_error, _, train_error_sd = test_error( train_set, v_obj=v_generator(precision_type="torch.DoubleTensor"), mcmc_samples=mcmc_samples_weight[0, :, :], type="classification", memory_efficient=False) sigma2_diagnostics = diagnostics_stan(mcmc_samples_hyper) sigma2_ess = sigma2_diagnostics["ess"] posterior_mean_hidden_in_sigma2 = numpy.mean( mcmc_samples_hyper) posterior_median_hidden_in_sigma2 = numpy.median( mcmc_samples_hyper) weight_ess = diagnostics_stan(mcmc_samples_weight)["ess"] min_ess = min(sigma2_ess, min(weight_ess)) median_ess = numpy.median([sigma2_ess] + list(weight_ess)) output_store[i, j, 0] = train_error output_store[i, j, 1] = te output_store[i, j, 2] = train_error output_store[i, j, 3] = te_sd output_store[i, j, 4] = sigma2_ess output_store[i, j, 5] = posterior_mean_hidden_in_sigma2 output_store[i, j, 6] = posterior_median_hidden_in_sigma2 output_store[i, j, 7] = min_ess output_store[i, j, 8] = median_ess elif j == 0: prior_dict = {"name": "gaussian_inv_gamma_1"} v_generator = wrap_V_class_with_input_data( class_constructor=v_fun, input_data=train_set, prior_dict=prior_dict, model_dict=model_dict) elif j == 1: prior_dict = {"name": "gaussian_inv_gamma_2"} v_generator = wrap_V_class_with_input_data( class_constructor=v_fun, input_data=train_set, prior_dict=prior_dict, model_dict=model_dict) if j == 0 or j == 1: input_dict = { "v_fun": [v_generator], "epsilon": ["dual"], "second_order": [False], "max_tree_depth": [8], "metric_name": ["unit_e"], "dynamic": [True], "windowed": [False], "criterion": ["xhmc"], "xhmc_delta": [0.1] } ep_dual_metadata_argument = { "name": "epsilon", "target": 0.9, "gamma": 0.05, "t_0": 10, "kappa": 0.75, "obj_fun": "accept_rate", "par_type": "fast" } dual_args_list = [ep_dual_metadata_argument] other_arguments = other_default_arguments() tune_settings_dict = tuning_settings(dual_args_list, [], [], other_arguments) tune_dict = tuneinput_class(input_dict).singleton_tune_dict() sampler1 = mcmc_sampler(tune_dict=tune_dict, mcmc_settings_dict=mcmc_meta, tune_settings_dict=tune_settings_dict) sampler1.start_sampling() np_diagnostics, feature_names = sampler1.np_diagnostics() mcmc_samples_hidden_in = sampler1.get_samples_alt( prior_obj_name="hidden_in", permuted=False) samples = mcmc_samples_hidden_in["samples"] hidden_in_sigma2_indices = mcmc_samples_hidden_in[ "indices_dict"]["sigma2"] sigma2_diagnostics = diagnostics_stan( samples[:, :, hidden_in_sigma2_indices]) sigma2_ess = sigma2_diagnostics["ess"] posterior_mean_hidden_in_sigma2 = numpy.mean( samples[:, :, hidden_in_sigma2_indices].reshape( -1, len(hidden_in_sigma2_indices)), axis=0) posterior_median_hidden_in_sigma2 = numpy.median( samples[:, :, hidden_in_sigma2_indices].reshape( -1, len(hidden_in_sigma2_indices)), axis=0) mcmc_samples_mixed = sampler1.get_samples(permuted=True) te, predicted, te_sd = test_error( test_set, v_obj=v_generator(precision_type="torch.DoubleTensor"), mcmc_samples=mcmc_samples_mixed, type="classification", memory_efficient=False) train_error, _, train_error_sd = test_error( train_set, v_obj=v_generator(precision_type="torch.DoubleTensor"), mcmc_samples=mcmc_samples_mixed, type="classification", memory_efficient=False) output_store[i, j, 0] = train_error output_store[i, j, 1] = te output_store[i, j, 2] = train_error output_store[i, j, 3] = te_sd output_store[i, j, 4] = sigma2_ess output_store[i, j, 5] = posterior_mean_hidden_in_sigma2 output_store[i, j, 6] = posterior_median_hidden_in_sigma2 diagnostics_store[i, j, :, :] = np_diagnostics output_store[i, j, 7] = np_diagnostics[0, 10] output_store[i, j, 8] = np_diagnostics[0, 11] total_time = time.time() - start_time() time_store[i, j] = total_time to_store = { "diagnostics": diagnostics_store, "output": output_store, "diagnostics_names": feature_names, "output_names": output_names, "seed": seed, "num_units_list": num_units_list, "time_store": time_store } numpy.savez(save_name, **to_store) return ()
from distributions.eightschool_cp import V_eightschool_cp import numpy from abstract.metric import metric from abstract.abstract_class_Ham import Hamiltonian from abstract.abstract_genleapfrog_ult_util import generalized_leapfrog import torch from post_processing.plot_energy_oscillation import plot_V_T # need to plot the two functions on the same graph # need to save the graph #vo = V_banana() #vo = V_logistic_regression() #vo = V_funnel() vo = V_eightschool_cp() #vo = V_eightschool_ncp() metrico = metric("softabs", vo, alpha=1) #metrico = metric("unit_e",vo) seed = 34 torch.manual_seed(seed) numpy.random.seed(seed) #T_unit_e(metrico,vo) #exit() #to = T(metrico,vo) Ho = Hamiltonian(vo, metrico) initq = Ho.V.q_point initq.flattened_tensor.copy_(torch.randn(len((initq.flattened_tensor)))) q = initq.point_clone() q.load_flatten() #print(q.flattened_tensor) p = Ho.T.generate_momentum(q)
def setup_sghmc_experiment(ep_list,L_list,eta_list,train_set,test_set,save_name,seed=1): output_names = ["train_error", "test_error","train_error_sd","test_error_sd"] output_store = numpy.zeros((len(ep_list),len(L_list),len(eta_list), len(output_names))) diagnostics_store = numpy.zeros(shape=[len(ep_list),len(L_list),len(eta_list)]+[4,13]) model_dict = {"num_units":35} prior_dict = {"name":"normal"} time_store = numpy.zeros(shape=[len(ep_list),len(L_list),len(eta_list)]) for i in range(len(ep_list)): for j in range(len(L_list)): for k in range(len(eta_list)): start_time = time.time() v_generator = wrap_V_class_with_input_data(class_constructor=V_fc_model_1, input_data=train_set, prior_dict=prior_dict, model_dict=model_dict) v_obj = v_generator(precision_type="torch.DoubleTensor") metric_obj = metric(name="unit_e", V_instance=v_obj) Ham = Hamiltonian(V=v_obj, metric=metric_obj) full_data = train_set init_q_point = point(V=v_obj) store,explode_grad = sghmc_sampler(init_q_point=init_q_point, epsilon=ep_list[i], L=L_list[j], Ham=Ham, alpha=0.01, eta=eta_list[k], betahat=0, full_data=full_data, num_samples=2000, thin=0, burn_in=1000, batch_size=25) total_time = time.time() - start_time if not explode_grad: v_generator = wrap_V_class_with_input_data(class_constructor=V_fc_model_1, input_data=train_set, prior_dict=prior_dict, model_dict=model_dict) test_mcmc_samples = store.numpy() te1, predicted1,te_sd = test_error(test_set, v_obj=v_generator(precision_type="torch.DoubleTensor"), mcmc_samples=test_mcmc_samples, type="classification", memory_efficient=False) train_error, predicted1, train_error_sd = test_error(train_set, v_obj=v_generator(precision_type="torch.DoubleTensor"), mcmc_samples=test_mcmc_samples, type="classification", memory_efficient=False) else: train_error = 2 te1 = 2 train_error_sd = 2 te_sd = 2 output_store[i,j,k,0] = train_error output_store[i,j,k,1] = te1 output_store[i,j,k,2] = train_error_sd output_store[i,j,k,3] = te_sd time_store[i,j,k] = total_time to_store = {"diagnostics":diagnostics_store,"output":output_store,"output_names":output_names,"seed":seed, "ep_list":ep_list,"L_list":L_list,"eta_list":eta_list,"num_units":model_dict["num_units"], "prior":prior_dict["name"],"total_store":time_store} numpy.savez(save_name,**to_store) return()
# point is to show that sampling can be successful , i.e. reasonably large ess. no divergence # gaussian inv gamma prior # compare ess for hyperparameter input_data = get_data_dict("8x8mnist") input_data = { "input": input_data["input"][:500, ], "target": input_data["target"][:500] } model_dict = {"num_units": 25} V_fun = wrap_V_class_with_input_data(class_constructor=V_fc_gibbs_model_1, input_data=input_data, model_dict=model_dict) v_obj = V_fun(precision_type="torch.DoubleTensor", gibbs=True) metric_obj = metric(name="unit_e", V_instance=v_obj) Ham = Hamiltonian(v_obj, metric_obj) init_q_point = point(V=v_obj) init_hyperparam = torch.abs(torch.randn(1)) log_obj = log_class() #print(init_q_point.flattened_tensor) num_samples = 1000 dim = len(init_q_point.flattened_tensor) mcmc_samples_weight = torch.zeros(1, num_samples, dim) mcmc_samples_hyper = torch.zeros(1, num_samples, 1) for i in range(num_samples): print("loop {}".format(i)) #outq,out_hyperparam = update_param_and_hyperparam_one_step(init_q_point,init_hyperparam,Ham,0.1,60,log_obj)
from abstract.abstract_class_point import point from abstract.abstract_static_sampler import abstract_static_one_step seedid = 3 numpy.random.seed(seedid) torch.manual_seed(seedid) alpha = 1e-4 #debug_dict = {"abstract":None,"explicit":None} #debug_dict.update({"explicit":y.data.clone()}) # first verify they have the same Hamiltonian function inputq = torch.randn(7) v_obj = V_pima_inidan_logit() metric_obj = metric("softabs_diag", v_obj, alpha) Ham = Hamiltonian(v_obj, metric_obj) q_point = point(V=Ham.V) q_point.flattened_tensor.copy_(inputq) q_point.load_flatten() p_point = Ham.T.generate_momentum(q_point) print("abstract H {}".format(Ham.evaluate(q_point, p_point))) print("abstract V {}".format(Ham.V.evaluate_scalar(q_point))) print("abstract T {}".format(Ham.T.evaluate_scalar(q_point, p_point))) L = 5000 mcmc_samples = torch.zeros(L, 7) for i in range(L): out = abstract_static_one_step(epsilon=0.1,
return (o + temp2) return (T_givenq) def H(q, p, alpha): # returns float return (V(q).data[0] + T(q, alpha)(p)) alpha = 1e6 # first verify they have the same Hamiltonian function print("exact H {}".format(H(q, p, alpha))) v_obj = V_pima_inidan_logit() metric_obj = metric("softabs", v_obj, alpha) Ham = Hamiltonian(v_obj, metric_obj) q_point = Ham.V.q_point.point_clone() p_point = Ham.T.p_point.point_clone() q_point.flattened_tensor.copy_(inputq) p_point.flattened_tensor.copy_(inputp) print("abstract H {}".format(Ham.evaluate(q_point, p_point))) print("input q diff{}".format((q.data - q_point.flattened_tensor).sum())) print("input p diff {}".format((p.data - p_point.flattened_tensor).sum())) debug_dict = {"abstract": None, "explicit": None} for i in range(10):
def __init__(self, tune_dict, tune_param_objs_dict, init_point): #print(input_obj.input_dict) self.tune_param_objs_dict = tune_param_objs_dict # for param_name, obj in tune_param_objs_dict.items(): # val = obj.get_val() # setattr(self,param_name,val) self.other_params = {} self.v_fun = tune_dict["v_fun"] self.dynamic = tune_dict["dynamic"] if self.dynamic: self.windowed = None if "max_tree_depth" in tune_dict: assert tune_dict["max_tree_depth"] > 0 self.max_tree_depth = tune_dict["max_tree_depth"] else: self.max_tree_depth = 10 self.other_params.update({"max_tree_depth": self.max_tree_depth}) else: if "max_L" in tune_dict: self.max_L = tune_dict["max_L"] else: self.max_L = 1024 if "stepsize_jitter" in tune_dict: assert not tune_dict["windowed"] self.stepsize_jitter = tune_dict["stepsize_jitter"] else: self.stepsize_jitter = False self.other_params.update({ "max_L": self.max_L, "stepsize_jitter": self.stepsize_jitter }) self.windowed = tune_dict["windowed"] assert self.windowed == True or self.windowed == False self.second_order = tune_dict["second_order"] self.metric_name = tune_dict["metric_name"] self.criterion = tune_dict["criterion"] precision_type = init_point.flattened_tensor.type() self.v_obj = self.v_fun(precision_type=precision_type) #self.v_obj.q_point = init_point self.v_obj.load_point(init_point) #if hasattr(tune_param_objs_dict,"alpha"): if "alpha" in tune_param_objs_dict: alpha_val = tune_param_objs_dict["alpha"].get_val() self.metric = metric(self.metric_name, self.v_obj, alpha_val) else: self.metric = metric(self.metric_name, self.v_obj) self.Ham = Hamiltonian(self.v_obj, self.metric) #if not self.dynamic: #if hasattr(tune_param_objs_dict,"evolve_t"): if "evolve_t" in tune_param_objs_dict: self.input_time = True #elif hasattr(tune_param_objs_dict,"evolve_L"): elif "evolve_L" in tune_param_objs_dict: self.input_time = False else: self.input_time = None self.ave_second_per_leapfrog = 0 #self.one_step_function,self.tuneable_param = self.generate_sampler_one_step(self.windowed,self.dynamic,self.second_order,self.metric_name) # here self.one_step_function is raw_sampler_one_step self.one_step_function, self.tuneable_param = self.generate_sampler_one_step( ) # self.tuneable_param supplies the names of tuneable parameters that self.one_step_function needs #self.tune_param_obj_dict = tune_param_obj_dict #self.tuneable_param_obj_dict = {} #for name in self.tuneable_param: # self.tuneable_param_obj_dict.update({name:tune_param_obj_dict}) #for i in range(len(self.tuneable_param)): # self.tuneable_param_dict.update({self.tuneable_param[i]:getattr(self,self.tuneable_param[i])}) self.one_step_function = wrap( raw_sampler_one_step=self.one_step_function, other_parameters=self.other_params)