def sample(draws=500, model=None, warmup_steps=None, num_chains=1, kernel='nuts'): """Markov-chain Monte Carlo sampling. Sampling should be run within the context of a model or the model should be passed as an argument `model` explicitly. Number of samples is given by `draws` which defaults to `500`. Warm-up steps are assumed to be 30% of sample count. MCMC kernel can be selected by setting `kernel`. `hmc` and `nuts` are available. `pmpyro.inference.sample` returns a trace of samples """ # get model from context if model is None: model = Context.get_context() stfn = model.stfn # get stochastic function from model data = model.args # get data # make nuts kernel kernels = {'nuts': NUTS(stfn, adapt_step_size=True), 'hmc': HMC(stfn)} # if not num_chains: # figure out number of chains # num_chains = max(os.cpu_count() -1, 2) if not warmup_steps: # figure out warm-up steps warmup_steps = int(0.3 * draws) # run MCMC mcmc = MCMC(kernels[kernel], num_samples=draws, warmup_steps=warmup_steps, num_chains=num_chains) mcmc.run(*data) # get num samples num_samples = num_chains * draws return mcmc.get_samples()
def main(): train_x = torch.linspace(0, 1, 1000).double() train_y = torch.sin(train_x * (2 * math.pi)).double() + torch.randn(train_x.size()).double() * 0.1 # Use a positive constraint instead of usual GreaterThan(1e-4) so that LogNormal has support over full range. likelihood = gpytorch.likelihoods.GaussianLikelihood(noise_constraint=gpytorch.constraints.Positive()) model = ExactGPModel(train_x, train_y, likelihood) # model.mean_module.register_prior("mean_prior", UniformPrior(-1, 1), "constant") # model.covar_module.base_kernel.register_prior("lengthscale_prior", UniformPrior(0.0, 9.0), "lengthscale") # # model.covar_module.base_kernel.register_prior("period_length_prior", UniformPrior(0.0, 4.0), "period_length") # model.covar_module.register_prior("outputscale_prior", UniformPrior(0, 4), "outputscale") # likelihood.register_prior("noise_prior", UniformPrior(0.0, 0.25), "noise") mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) def pyro_model(x, y): priors= { 'covar_module.base_kernel.raw_lengthscale': Normal(0, 2).expand([1, 1]), 'covar_module.raw_outputscale': Normal(0, 2), 'likelihood.noise_covar.raw_noise': Normal(0, 2).expand([1]), 'mean_module.constant': Normal(0, 2), } fn = pyro.random_module("model", model, prior=priors) sampled_model = fn() output = sampled_model.likelihood(sampled_model(x)) pyro.sample("obs", output, obs=y) # model.mean_module.constant.data.fill_(0.0) # model.covar_module.outputscale = 0.5**2 # model.covar_module.base_kernel.lengthscale = 1 # model.likelihood.noise = 0.05**2 model.double() likelihood.double() nuts_kernel = NUTS(pyro_model, adapt_step_size=True, jit_compile=False) hmc_kernel = HMC(pyro_model, step_size=0.1, num_steps=10, adapt_step_size=True,\ init_strategy=pyro.infer.autoguide.initialization.init_to_median(num_samples=20)) mcmc_run = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps)#, initial_params=initial_params) return model, likelihood, mll, mcmc_run, train_x, train_y labels = ["c", "ls","os","noise"] fig, axes = plt.subplots(nrows=2, ncols=2) for i in range(4): if i == 0: samples = getattr(chain, labels[i]).reshape(-1) else: samples = 1/(1+np.exp(-1*getattr(chain, labels[i]).reshape(-1))) sns.distplot(samples, ax=axes[int(i/2), int(i%2)]) axes[int(i/2)][int(i%2)].legend([labels[i]]) plt.show() pickle.dump(chain, open("results/test_mcmc.pkl", "wb")) return
def test_model_with_potential_fn(): init_params = {"z": torch.tensor(0.)} def potential_fn(params): return params["z"] mcmc = MCMC(kernel=HMC(potential_fn=potential_fn), num_samples=10, warmup_steps=10, initial_params=init_params) mcmc.run()
def sample_posterior_mcmc(self, num_samples, thin=10): """ Samples from posterior for true observation q(theta | x0) using MCMC. :param num_samples: Number of samples to generate. :param mcmc_method: Which MCMC method to use ['metropolis-hastings', 'slice', 'hmc', 'nuts'] :param thin: Generate (num_samples * thin) samples in total, then select every 'thin' sample. :return: torch.Tensor of shape [num_samples, parameter_dim] """ # Always sample in eval mode. self._neural_posterior.eval() if self._mcmc_method == "slice-np": self.posterior_sampler.gen(20) # Burn-in for 200 samples samples = torch.Tensor(self.posterior_sampler.gen(num_samples)) else: if self._mcmc_method == "slice": kernel = Slice(potential_function=self._potential_function) elif self._mcmc_method == "hmc": kernel = HMC(potential_fn=self._potential_function) elif self._mcmc_method == "nuts": kernel = NUTS(potential_fn=self._potential_function) else: raise ValueError( "'mcmc_method' must be one of ['slice', 'hmc', 'nuts']." ) num_chains = mp.cpu_count() - 1 initial_params = self._prior.sample((num_chains,)) sampler = MCMC( kernel=kernel, num_samples=(thin * num_samples) // num_chains + num_chains, warmup_steps=200, initial_params={"": initial_params}, num_chains=num_chains, mp_context="spawn", ) sampler.run() samples = next(iter(sampler.get_samples().values())).reshape( -1, self._simulator.parameter_dim ) samples = samples[::thin][:num_samples] assert samples.shape[0] == num_samples # Back to training mode. self._neural_posterior.train() return samples
def sample_posterior(self, num_samples, thin=1): """ Samples from posterior for true observation q(theta | x0) ~ q(x0 | theta) p(theta) using most recent likelihood estimate q(x0 | theta) with MCMC. :param num_samples: Number of samples to generate. :param thin: Generate (num_samples * thin) samples in total, then select every 'thin' sample. :return: torch.Tensor of shape [num_samples, parameter_dim] """ # Always sample in eval mode. self._neural_likelihood.eval() if self._mcmc_method == "slice-np": self.posterior_sampler.gen(20) samples = torch.Tensor(self.posterior_sampler.gen(num_samples)) else: if self._mcmc_method == "slice": kernel = Slice(potential_function=self._potential_function) elif self._mcmc_method == "hmc": kernel = HMC(potential_fn=self._potential_function) elif self._mcmc_method == "nuts": kernel = NUTS(potential_fn=self._potential_function) else: raise ValueError( "'mcmc_method' must be one of ['slice', 'hmc', 'nuts']." ) num_chains = mp.cpu_count() - 1 # TODO: decide on way to initialize chain initial_params = self._prior.sample((num_chains,)) sampler = MCMC( kernel=kernel, num_samples=num_samples // num_chains + num_chains, warmup_steps=200, initial_params={"": initial_params}, num_chains=num_chains, ) sampler.run() samples = next(iter(sampler.get_samples().values())).reshape( -1, self._simulator.parameter_dim ) samples = samples[:num_samples].to(device) assert samples.shape[0] == num_samples # Back to training mode. self._neural_likelihood.train() return samples
def test_model_with_potential_fn(run_mcmc_cls): init_params = {"z": torch.tensor(0.0)} def potential_fn(params): return params["z"] run_mcmc_cls( data=None, kernel=HMC(potential_fn=potential_fn), num_samples=10, warmup_steps=10, initial_params=init_params, )
def _train_hmc(self, train_loader, n_samples, warmup, step_size, num_steps, device): print("\n == HMC training ==") pyro.clear_param_store() num_batches = int(len(train_loader.dataset) / train_loader.batch_size) batch_samples = int(n_samples / num_batches) + 1 print("\nn_batches=", num_batches, "\tbatch_samples =", batch_samples) kernel = HMC(self.model, step_size=step_size, num_steps=num_steps) mcmc = MCMC(kernel=kernel, num_samples=batch_samples, warmup_steps=warmup, num_chains=1) start = time.time() for x_batch, y_batch in train_loader: x_batch = x_batch.to(device) labels = y_batch.to(device).argmax(-1) mcmc.run(x_batch, labels) execution_time(start=start, end=time.time()) self.posterior_predictive = {} posterior_samples = mcmc.get_samples(n_samples) state_dict_keys = list(self.basenet.state_dict().keys()) if DEBUG: print("\n", list(posterior_samples.values())[-1]) for model_idx in range(n_samples): net_copy = copy.deepcopy(self.basenet) model_dict = OrderedDict({}) for weight_idx, weights in enumerate(posterior_samples.values()): model_dict.update( {state_dict_keys[weight_idx]: weights[model_idx]}) net_copy.load_state_dict(model_dict) self.posterior_predictive.update({str(model_idx): net_copy}) if DEBUG: print("\n", weights[model_idx]) self.save()
def monte_carlo(y): pyro.clear_param_store() # create a Simple Hamiltonian Monte Carlo kernel with step_size of 0.1 hmc_kernel = HMC(conditioned_model, step_size=.1) mcmc = MCMC(hmc_kernel, num_samples=500, warmup_steps=100) # create a Markov Chain Monte Carlo method with: # the hmc_kernel, 500 samples, and 100 warmup iterations mcmc.run(model,y) mcmc.run(model, y) sample_dict = mcmc.get_samples(num_samples=5000) plt.figure(figsize=(8, 6)) sns.distplot(sample_dict["p"].numpy()) plt.xlabel("Observed probability value") plt.ylabel("Observed frequency") plt.show() mcmc.summary(prob=0.95) return sample_dict
def mcmc(model, obs, num_samples, kernel='HMC', kernel_params={}, mcmc_params={}, sites=['theta']): # NOTE: requires differentiable model model_conditioned = partial(model, obs=obs) if kernel.upper() == 'HMC': mcmc_kernel = HMC(model_conditioned, **kernel_params) elif kernel.upper() == 'NUTS': mcmc_kernel = NUTS(model_conditioned, **kernel_params) else: raise NotImplementedError mcmc = MCMC(mcmc_kernel, num_samples, **mcmc_params) mcmc_run = mcmc.run() posterior = pyro.infer.EmpiricalMarginal(mcmc_run, sites=sites) return posterior
def run( task: Task, num_samples: int, num_observation: Optional[int] = None, observation: Optional[torch.Tensor] = None, num_chains: int = 10, num_warmup: int = 10000, kernel: str = "slice", kernel_parameters: Optional[Dict[str, Any]] = None, thinning: int = 1, diagnostics: bool = True, available_cpu: int = 1, mp_context: str = "fork", jit_compile: bool = False, automatic_transforms_enabled: bool = True, initial_params: Optional[torch.Tensor] = None, **kwargs: Any, ) -> torch.Tensor: """Runs MCMC using Pyro on potential function Produces `num_samples` while accounting for warmup (burn-in) and thinning. Note that the actual number of simulations is not controlled for with MCMC since algorithms are only used as a reference method in the benchmark. MCMC is run on the potential function, which returns the unnormalized negative log posterior probability. Note that this requires a tractable likelihood. Pyro is used to automatically construct the potential function. Args: task: Task instance num_samples: Number of samples to generate from posterior num_observation: Observation number to load, alternative to `observation` observation: Observation, alternative to `num_observation` num_chains: Number of chains num_warmup: Warmup steps, during which parameters of the sampler are adapted. Warmup samples are not returned by the algorithm. kernel: HMC, NUTS, or Slice kernel_parameters: Parameters passed to kernel thinning: Amount of thinning to apply, in order to avoid drawing correlated samples from the chain diagnostics: Flag for diagnostics available_cpu: Number of CPUs used to parallelize chains mp_context: multiprocessing context, only fork might work jit_compile: Just-in-time (JIT) compilation, can yield significant speed ups automatic_transforms_enabled: Whether or not to use automatic transforms initial_params: Parameters to initialize at Returns: Samples from posterior """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) tic = time.time() log = sbibm.get_logger(__name__) hook_fn = None if diagnostics: log.info(f"MCMC sampling for observation {num_observation}") tb_writer, tb_close = tb_make_writer( logger=log, basepath= f"tensorboard/pyro_{kernel.lower()}/observation_{num_observation}", ) hook_fn = tb_make_hook_fn(tb_writer) if "num_simulations" in kwargs: warnings.warn( "`num_simulations` was passed as a keyword but will be ignored, see docstring for more info." ) # Prepare model and transforms conditioned_model = task._get_pyro_model(num_observation=num_observation, observation=observation) transforms = task._get_transforms( num_observation=num_observation, observation=observation, automatic_transforms_enabled=automatic_transforms_enabled, ) kernel_parameters = kernel_parameters if kernel_parameters is not None else {} kernel_parameters["jit_compile"] = jit_compile kernel_parameters["transforms"] = transforms log.info("Using kernel: {name}({parameters})".format( name=kernel, parameters=",".join([f"{k}={v}" for k, v in kernel_parameters.items()]), )) if kernel.lower() == "nuts": mcmc_kernel = NUTS(model=conditioned_model, **kernel_parameters) elif kernel.lower() == "hmc": mcmc_kernel = HMC(model=conditioned_model, **kernel_parameters) elif kernel.lower() == "slice": mcmc_kernel = Slice(model=conditioned_model, **kernel_parameters) else: raise NotImplementedError if initial_params is not None: site_name = "parameters" initial_params = {site_name: transforms[site_name](initial_params)} else: initial_params = None mcmc_parameters = { "num_chains": num_chains, "num_samples": thinning * num_samples, "warmup_steps": num_warmup, "available_cpu": available_cpu, "initial_params": initial_params, } log.info("Calling MCMC with: MCMC({name}_kernel, {parameters})".format( name=kernel, parameters=",".join([f"{k}={v}" for k, v in mcmc_parameters.items()]), )) mcmc = MCMC(mcmc_kernel, hook_fn=hook_fn, **mcmc_parameters) mcmc.run() toc = time.time() log.info(f"Finished MCMC after {toc-tic:.3f} seconds") log.info(f"Automatic transforms {mcmc.transforms}") log.info(f"Apply thinning of {thinning}") mcmc._samples = { "parameters": mcmc._samples["parameters"][:, ::thinning, :] } num_samples_available = (mcmc._samples["parameters"].shape[0] * mcmc._samples["parameters"].shape[1]) if num_samples_available < num_samples: warnings.warn("Some samples will be included multiple times") samples = mcmc.get_samples( num_samples=num_samples, group_by_chain=False)["parameters"].squeeze() else: samples = mcmc.get_samples( group_by_chain=False)["parameters"].squeeze() idx = torch.randperm(samples.shape[0])[:num_samples] samples = samples[idx, :] assert samples.shape[0] == num_samples if diagnostics: mcmc.summary() tb_ess(tb_writer, mcmc) tb_r_hat(tb_writer, mcmc) tb_marginals(tb_writer, mcmc) tb_acf(tb_writer, mcmc) tb_posteriors(tb_writer, mcmc) tb_plot_posterior(tb_writer, samples, tag="posterior/final") tb_close() return samples
plt.subplot(2, 2, 4) plt.hist(alpha.numpy(), bins=30, density=True) plt.xlabel('alpha') plt.ylabel('density') # In[9]: pyro.clear_param_store() # Set random seed for reproducibility. pyro.set_rng_seed(1) # Set up HMC sampler. kernel = HMC(dp_sb_gmm, step_size=0.01, trajectory_length=1, adapt_step_size=False, adapt_mass_matrix=False, jit_compile=True) hmc = MCMC(kernel, num_samples=500, warmup_steps=500) hmc.run(y, 10) # 06:13 for marginalized version. # Get posterior samples hmc_posterior_samples = hmc.get_samples() hmc_posterior_samples['eta'] = stickbreak(hmc_posterior_samples['v']) # In[10]: pyro.clear_param_store()
def localnews(INFERENCE): device = torch.device('cpu') torch.set_default_tensor_type(torch.DoubleTensor) if torch.cuda.is_available(): device = torch.device('cuda') torch.set_default_tensor_type(torch.cuda.DoubleTensor) # preprocess data data = pd.read_csv("data/localnews.csv", index_col=[0]) N = data.station_id.unique().shape[0] data.date = data.date.apply( lambda x: datetime.datetime.strptime(x, '%m/%d/%Y').date()) # data = data[(data.date<=datetime.date(2017, 9, 5)) & (data.date>=datetime.date(2017, 8, 25))] # data = data[data.station_id.isin([1345,3930])] ds = data.t.to_numpy().reshape((-1, 1)) ohe = OneHotEncoder() ohe = LabelEncoder() X = data.drop(columns=[ "station_id", "date", "national_politics", "sinclair2017", "post", "affiliation", "callsign", "t" ]).to_numpy().reshape(-1, ) # , "weekday","affiliation","callsign" Group = data.sinclair2017.to_numpy().reshape(-1, 1) ohe.fit(X) X = ohe.transform(X) station_le = LabelEncoder() ids = data.station_id.to_numpy().reshape(-1, ) station_le.fit(ids) ids = station_le.transform(ids) # weekday/day/unit effects and time trend X = np.concatenate((X.reshape(-1, 1), ds, ids.reshape(-1, 1), Group, ds), axis=1) # numbers of dummies for each effect X_max_v = [np.max(X[:, i]).astype(int) for i in range(X.shape[1] - 2)] Y = data.national_politics.to_numpy() T0 = data[data.date == datetime.date(2017, 9, 1)].t.to_numpy()[0] train_condition = (data.post != 1) | (data.sinclair2017 != 1) train_x = torch.Tensor(X[train_condition], device=device).double() train_y = torch.Tensor(Y[train_condition], device=device).double() idx = data.sinclair2017.to_numpy() train_g = torch.from_numpy(idx[train_condition]).to(device) test_x = torch.Tensor(X).double() test_y = torch.Tensor(Y).double() test_g = torch.from_numpy(idx) # define likelihood noise_prior = gpytorch.priors.GammaPrior(concentration=1, rate=10) likelihood = gpytorch.likelihoods.GaussianLikelihood(noise_prior=noise_prior if "MAP" in INFERENCE else None,\ noise_constraint=gpytorch.constraints.Positive()) # likelihood2 = gpytorch.likelihoods.GaussianLikelihood(noise_prior=noise_prior if "MAP" in INFERENCE else None,\ # noise_constraint=gpytorch.constraints.Positive()) model = MultitaskGPModel(test_x, test_y, X_max_v, likelihood, MAP="MAP" in INFERENCE) model.drift_t_module.T0 = T0 model2 = MultitaskGPModel(train_x, train_y, X_max_v, likelihood, MAP="MAP" in INFERENCE) # model2 = MultitaskGPModel(test_x, test_y, X_max_v, likelihood2, MAP="MAP" in INFERENCE) # model2.drift_t_module.T0 = T0 model2.double() # group effects # model.x_covar_module[0].c2 = torch.var(train_y) # model.x_covar_module[0].raw_c2.requires_grad = False # weekday/day/unit effects initialize to 0.05**2 for i in range(len(X_max_v)): model.x_covar_module[i].c2 = torch.tensor(0.05**2) # fix unit mean/variance by not requiring grad model.x_covar_module[-1].raw_c2.requires_grad = False # model.unit_mean_module.constant.data.fill_(0.12) # model.unit_mean_module.constant.requires_grad = False model.group_mean_module.constantvector.data[0].fill_(0.11) model.group_mean_module.constantvector.data[1].fill_(0.12) # set precision to double tensors torch.set_default_tensor_type(torch.DoubleTensor) train_x, train_y = train_x.to(device), train_y.to(device) test_x, test_y = test_x.to(device), test_y.to(device) model.to(device) likelihood.to(device) # define Loss for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) if torch.cuda.is_available(): train_x = train_x.cuda() train_y = train_y.cuda() model = model.cuda() likelihood = likelihood.cuda() if not os.path.isdir("results"): os.mkdir("results") transforms = { 'group_index_module.raw_rho': model.group_index_module.raw_rho_constraint.transform, 'group_t_covar_module.base_kernel.raw_lengthscale': model.group_t_covar_module.base_kernel.raw_lengthscale_constraint. transform, 'group_t_covar_module.raw_outputscale': model.group_t_covar_module.raw_outputscale_constraint.transform, 'unit_t_covar_module.base_kernel.raw_lengthscale': model.unit_t_covar_module.base_kernel.raw_lengthscale_constraint. transform, 'unit_t_covar_module.raw_outputscale': model.unit_t_covar_module.raw_outputscale_constraint.transform, 'likelihood.noise_covar.raw_noise': model.likelihood.noise_covar.raw_noise_constraint.transform, 'x_covar_module.0.raw_c2': model.x_covar_module[0].raw_c2_constraint.transform, 'x_covar_module.1.raw_c2': model.x_covar_module[1].raw_c2_constraint.transform #'x_covar_module.2.raw_c2': model.x_covar_module[2].raw_c2_constraint.transform } priors = { 'group_index_module.raw_rho': pyro.distributions.Normal(0, 1.5), 'group_t_covar_module.base_kernel.raw_lengthscale': pyro.distributions.Normal(30, 10).expand([1, 1]), 'group_t_covar_module.raw_outputscale': pyro.distributions.Normal(-7, 1), 'unit_t_covar_module.base_kernel.raw_lengthscale': pyro.distributions.Normal(30, 10).expand([1, 1]), 'unit_t_covar_module.raw_outputscale': pyro.distributions.Normal(-7, 1), 'likelihood.noise_covar.raw_noise': pyro.distributions.Normal(-7, 1).expand([1]), 'x_covar_module.0.raw_c2': pyro.distributions.Normal(-7, 1).expand([1]), 'x_covar_module.1.raw_c2': pyro.distributions.Normal(-7, 1).expand([1]) #'model.x_covar_module.2.raw_c2': pyro.distributions.Normal(-6, 1).expand([1]) } # plot_pyro_prior(priors, transforms) def pyro_model(x, y): fn = pyro.random_module("model", model, prior=priors) sampled_model = fn() output = sampled_model.likelihood(sampled_model(x)) pyro.sample("obs", output, obs=y) if INFERENCE == 'MCMCLOAD': with open('results/localnews_MCMC.pkl', 'rb') as f: mcmc_run = pickle.load(f) mcmc_samples = mcmc_run.get_samples() print(mcmc_run.summary()) plot_pyro_posterior(mcmc_samples, transforms) # plot_posterior(mcmc_samples) return for k, d in mcmc_samples.items(): mcmc_samples[k] = d[idx] model.pyro_load_from_samples(mcmc_samples) visualize_localnews_MCMC(data, train_x, train_y, train_i, test_x, test_y, test_i, model,\ likelihood, T0, station_le, 10) return elif INFERENCE == 'MAP': model.group_index_module._set_rho(0.0) model.group_t_covar_module.outputscale = 0.05**2 model.group_t_covar_module.base_kernel.lengthscale = 15 model.likelihood.noise_covar.noise = 0.05**2 model.unit_t_covar_module.outputscale = 0.05**2 model.unit_t_covar_module.base_kernel.lengthscale = 30 # weekday/day/unit effects initialize to 0.01**2 for i in range(len(X_max_v)): model.x_covar_module[i].c2 = torch.tensor(0.05**2) for name, param in model.drift_t_module.named_parameters(): param.requires_grad = True model.drift_t_module._set_T1(0.0) model.drift_t_module._set_T2(5.0) model.drift_t_module.base_kernel.lengthscale = 30.0 model.drift_t_module.outputscale = 0.05**2 # model.drift_t_module.raw_T1.requires_grad = False # model.drift_t_module.raw_T2.requires_grad = False optimizer = torch.optim.LBFGS(model.parameters(), lr=0.1, history_size=10, max_iter=4) model, likelihood = train(test_x, test_y, model, likelihood, mll, optimizer, training_iterations) torch.save(model.state_dict(), 'results/localnews_' + INFERENCE + '_model_state.pth') return elif INFERENCE == 'MCMC': model.group_index_module._set_rho(0.9) model.group_t_covar_module.outputscale = 0.02**2 model.group_t_covar_module.base_kernel._set_lengthscale(3) model.likelihood.noise_covar.noise = 0.03**2 model.unit_t_covar_module.outputscale = 0.02**2 model.unit_t_covar_module.base_kernel._set_lengthscale(30) # weekday/day/unit effects initialize to 0.0**2 for i in range(len(X_max_v) - 1): model.x_covar_module[i].c2 = torch.tensor(0.01**2) # model.x_covar_module[i].raw_c2.requires_grad = False initial_params = {'group_index_module.rho_prior': model.group_index_module.raw_rho.detach(),\ 'group_t_covar_module.base_kernel.lengthscale_prior': model.group_t_covar_module.base_kernel.raw_lengthscale.detach(),\ 'group_t_covar_module.outputscale_prior': model.group_t_covar_module.raw_outputscale.detach(),\ 'unit_t_covar_module.base_kernel.lengthscale_prior': model.unit_t_covar_module.base_kernel.raw_lengthscale.detach(),\ 'unit_t_covar_module.outputscale_prior': model.unit_t_covar_module.raw_outputscale.detach(),\ 'likelihood.noise_covar.noise_prior': model.likelihood.raw_noise.detach(), 'x_covar_module.0.c2_prior': model.x_covar_module[0].raw_c2.detach(), 'x_covar_module.1.c2_prior': model.x_covar_module[1].raw_c2.detach()} with gpytorch.settings.fast_computations( covar_root_decomposition=False, log_prob=False, solves=False): nuts_kernel = NUTS(pyro_model, adapt_step_size=True, adapt_mass_matrix=True, jit_compile=False,\ init_strategy=pyro.infer.autoguide.initialization.init_to_value(values=initial_params)) hmc_kernel = HMC(pyro_model, step_size=0.1, num_steps=10, adapt_step_size=True,\ init_strategy=pyro.infer.autoguide.initialization.init_to_mean()) mcmc_run = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps) mcmc_run.run(train_x, train_y) pickle.dump(mcmc_run, open("results/localnews_MCMC.pkl", "wb")) # plot_pyro_posterior(mcmc_run.get_samples(), transforms) return visualize_localnews_MCMC(data, train_x, train_y, train_g, test_x, test_y, test_i, model,\ likelihood, T0, station_le, num_samples) else: model.load_strict_shapes(False) state_dict = torch.load('results/localnews_MAP_model_state.pth') model.load_state_dict(state_dict) model2.load_state_dict(state_dict) print( f'Parameter name: rho value = {model.group_index_module.rho.detach().numpy()}' ) # print(f'Parameter name: unit mean value = {model.unit_mean_module.constant.detach().numpy()}') print( f'Parameter name: group ls value = {model.group_t_covar_module.base_kernel.lengthscale.detach().numpy()}' ) print( f'Parameter name: group os value = {np.sqrt(model.group_t_covar_module.outputscale.detach().numpy())}' ) print( f'Parameter name: unit ls value = {model.unit_t_covar_module.base_kernel.lengthscale.detach().numpy()}' ) print( f'Parameter name: unit os value = {np.sqrt(model.unit_t_covar_module.outputscale.detach().numpy())}' ) print( f'Parameter name: noise value = {np.sqrt(model.likelihood.noise.detach().numpy())}' ) print( f'Parameter name: weekday std value = {np.sqrt(model.x_covar_module[0].c2.detach().numpy())}' ) print( f'Parameter name: day std value = {np.sqrt(model.x_covar_module[1].c2.detach().numpy())}' ) print( f'Parameter name: unit std value = {np.sqrt(model.x_covar_module[2].c2.detach().numpy())}' ) print( f'Parameter name: drift ls value = {model.drift_t_module.base_kernel.lengthscale.detach().numpy()}' ) print( f'Parameter name: drift cov os value = {np.sqrt(model.drift_t_module.outputscale.detach().numpy())}' ) print( f'Parameter name: drift cov T1 value = {model.drift_t_module.T1.detach().numpy()}' ) print( f'Parameter name: drift cov T2 value = {model.drift_t_module.T2.detach().numpy()}' ) visualize_localnews(data, test_x, test_y, test_g, model, model2, likelihood, T0, station_le, train_condition)
def main(inference, model, width, n_samples, warmup, init_method, burnin, skip, metrics_skip, cycles, temperature, momentum, precond_update, lr, batch_size, load_samples, save_samples, reject_samples, run_id, log_dir, sampling_decay, progressbar, skip_first, _run, _log): assert inference in ["SGLD", "HMC", "VerletSGLD", "OurHMC", "HMCReject", "VerletSGLDReject", "SGLDReject"] assert width > 0 assert n_samples > 0 assert cycles > 0 assert temperature >= 0 data = get_data() x_train = data.norm.train_X y_train = data.norm.train_y x_test = data.norm.test_X y_test = data.norm.test_y model = get_model(x_train=x_train, y_train=y_train) if load_samples is None: if init_method == "he": exp_utils.he_initialize(model) elif init_method == "he_uniform": exp_utils.he_uniform_initialize(model) elif init_method == "he_zerobias": exp_utils.he_zerobias_initialize(model) elif init_method == "prior": pass else: raise ValueError(f"unknown init_method={init_method}") else: state_dict = exp_utils.load_samples(load_samples, idx=-1, keep_steps=False) model_sd = model.state_dict() for k in state_dict.keys(): if k not in model_sd: _log.warning(f"key {k} not in model, ignoring") del state_dict[k] elif model_sd[k].size() != state_dict[k].size(): _log.warning(f"key {k} size mismatch, model={model_sd[k].size()}, loaded={state_dict[k].size()}") state_dict[k] = model_sd[k] missing_keys = set(model_sd.keys()) - set(state_dict.keys()) _log.warning(f"The following keys were not found in loaded state dict: {missing_keys}") model_sd.update(state_dict) model.load_state_dict(model_sd) del state_dict del model_sd if save_samples: model_saver_fn = (lambda: exp_utils.HDF5ModelSaver( exp_utils.sneaky_artifact(_run, "samples.pt"), "w")) else: @contextlib.contextmanager def model_saver_fn(): yield None with exp_utils.HDF5Metrics( exp_utils.sneaky_artifact(_run, "metrics.h5"), "w") as metrics_saver,\ model_saver_fn() as model_saver: if inference == "HMC": _potential_fn = model.get_potential(x_train, y_train, eff_num_data=len(x_train)) kernel = HMC(potential_fn=_potential_fn, adapt_step_size=False, adapt_mass_matrix=False, step_size=1e-3, num_steps=32) mcmc = MCMC(kernel, num_samples=n_samples, warmup_steps=warmup, initial_params=model.params_dict()) else: if inference == "SGLD": runner_class = bnn_priors.inference.SGLDRunner elif inference == "VerletSGLD": runner_class = bnn_priors.inference.VerletSGLDRunner elif inference == "OurHMC": runner_class = bnn_priors.inference.HMCRunner elif inference == "VerletSGLDReject": runner_class = bnn_priors.inference_reject.VerletSGLDRunnerReject elif inference == "HMCReject": runner_class = bnn_priors.inference_reject.HMCRunnerReject elif inference == "SGLDReject": runner_class = bnn_priors.inference_reject.SGLDRunnerReject assert (n_samples * skip) % cycles == 0 sample_epochs = n_samples * skip // cycles epochs_per_cycle = warmup + burnin + sample_epochs if batch_size is None: batch_size = len(data.norm.train) # Disable parallel loading for `TensorDataset`s. num_workers = (0 if isinstance(data.norm.train, t.utils.data.TensorDataset) else 2) dataloader = t.utils.data.DataLoader(data.norm.train, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers) dataloader_test = t.utils.data.DataLoader(data.norm.test, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=num_workers) mcmc = runner_class(model=model, dataloader=dataloader, dataloader_test=dataloader_test, epochs_per_cycle=epochs_per_cycle, warmup_epochs=warmup, sample_epochs=sample_epochs, learning_rate=lr, skip=skip, metrics_skip=metrics_skip, sampling_decay=sampling_decay, cycles=cycles, temperature=temperature, momentum=momentum, precond_update=precond_update, metrics_saver=metrics_saver, model_saver=model_saver, reject_samples=reject_samples) mcmc.run(progressbar=progressbar) samples = mcmc.get_samples() samples = {k: v[skip_first:] for k, v in samples.items()} model.eval() batch_size = min(batch_size, len(data.norm.test)) dataloader_test = t.utils.data.DataLoader(data.norm.test, batch_size=batch_size) return evaluate_model(model, dataloader_test, samples)
with pyro.plate('latent_response', N): eta = pyro.sample('eta', dist.Normal(0, 1)) # Latent function. f = compute_f(alpha, rho, beta, eta, X) with pyro.plate('response', N): pyro.sample('obs', dist.Bernoulli(logits=f), obs=y) # HMC pyro.clear_param_store() # clear global parameter cache. pyro.set_rng_seed(2) # set random number generator seed. hmc = MCMC(HMC(gpc, step_size=0.05, trajectory_length=1, adapt_step_size=False, adapt_mass_matrix=False, jit_compile=True), num_samples=500, warmup_steps=500) # sampler setup. hmc.run(X, y.double()) # run mcmc hmc_posterior_samples = hmc.get_samples() # get posterior samples. # NUTS pyro.clear_param_store() pyro.set_rng_seed(2) nuts = MCMC(NUTS(gpc, target_accept_prob=0.8, max_tree_depth=10, jit_compile=True), num_samples=500,
y_hidden_dist = dist.Exponential(1 / link[i]) if truncation_label[i] == 0: y_real = pyro.sample("obs_{}".format(i), y_hidden_dist, obs = y[i]) else: truncation_prob = 1 - y_hidden_dist.cdf(y[i]) pyro.sample("truncation_label_{}".format(i), dist.Bernoulli(truncation_prob), obs = truncation_label[i]) pyro.clear_param_store() hmc_kernel = HMC(model, step_size = 0.1, num_steps = 4) mcmc_run = MCMC(hmc_kernel, num_samples=5, warmup_steps=1).run(x, y, truncation_label) marginal_a = EmpiricalMarginal(mcmc_run, sites="a_model") posterior_a = [marginal_a.sample() for i in range(50)] sns.distplot(posterior_a) """# Modeling using HMC with Vectorized Data