def initialize(data): pyro.clear_param_store() optim = pyro.optim.Adam({'lr': 0.1, 'betas': [0.8, 0.99]}) elbo = TraceEnum_ELBO(max_plate_nesting=2) # global global_guide global_guide = AutoDelta( poutine.block(model, expose=['weights', 'mus', 'lambdas'])) svi = SVI(model, global_guide, optim, loss=elbo) svi.loss(model, global_guide, data) return svi
def initialize(seed, model, scheduler, model_args): pyro.set_rng_seed(seed) pyro.clear_param_store() guide = autoguide.AutoDiagonalNormal(model) svi = SVI(model, guide, scheduler, loss=Trace_ELBO()) loss = svi.loss(model, guide, **model_args) return loss, guide, svi
def initialize(data): pyro.clear_param_store() optim = pyro.optim.Adam({'lr': 0.1, 'betas': [0.8, 0.99]}) elbo = TraceEnum_ELBO(max_iarange_nesting=1) svi = SVI(model, full_guide, optim, loss=elbo) # Initialize weights to uniform. pyro.param('auto_weights', 0.5 * torch.ones(K), constraint=constraints.simplex) # Assume half of the data variance is due to intra-component noise. var = (data.var() / 2).sqrt() pyro.param('auto_scale', torch.tensor([var] * 4), constraint=constraints.positive) # Initialize means from a subsample of data. pyro.param('auto_locs', data[torch.multinomial(torch.ones(len(data)) / len(data), K)]) loss = svi.loss(model, full_guide, data) return loss, svi
def initialize_multi_obs_dim(seed, model, guide, data): global svi pyro.set_rng_seed(seed) if 'new_participant_topic_q' in pyro.get_param_store().keys(): pyro.get_param_store().__delitem__('new_participant_topic_q') svi = SVI(model, guide, tm.optim, loss=tm.elbo) return svi.loss(model, guide, data)
def initialize(seed): global global_guide, svi pyro.set_rng_seed(seed) pyro.clear_param_store() global_guide = AutoDelta( poutine.block( model, expose=[ 'weights', 'locs', 'scale']), init_loc_fn=init_loc_fn) svi = SVI(model, global_guide, optim, loss=elbo) return svi.loss(model, global_guide, data)
def initialize(seed,model,data): global global_guide, svi pyro.set_rng_seed(seed) pyro.clear_param_store() exposed_params = [] # set the parameters inferred through the guide based on the kind of data if 'gr' in mtype: if dtype == 'norm': exposed_params = ['weights', 'concentration'] elif dtype == 'raw': exposed_params = ['weights', 'alpha', 'beta'] elif 'dim' in mtype: if dtype == 'norm': exposed_params = ['topic_weights', 'topic_concentration', 'participant_topics'] elif dtype == 'raw': exposed_params = ['topic_weights', 'topic_a','topic_b', 'participant_topics'] global_guide = AutoDelta(poutine.block(model, expose = exposed_params)) svi = SVI(model, global_guide, optim, loss = elbo) return svi.loss(model, global_guide, data)
def initialize(data): pyro.clear_param_store() optim = pyro.optim.Adam({'lr': 0.1, 'betas': [0.8, 0.99]}) elbo = TraceEnum_ELBO(max_iarange_nesting=1) svi = SVI(model, full_guide, optim, loss=elbo) # Initialize weights to uniform. pyro.param('auto_weights', 0.5 * torch.ones(K), constraint=constraints.simplex) # Assume half of the data variance is due to intra-component noise. var = (data.var() / 2).sqrt() pyro.param('auto_scale', torch.tensor([var]*4), constraint=constraints.positive) # Initialize means from a subsample of data. pyro.param('auto_locs', data[torch.multinomial(torch.ones(len(data)) / len(data), K)]) loss = svi.loss(model, full_guide, data) return loss, svi
class GMM(object): # Set device to CPU device = torch.device('cpu') def __init__(self, n_comp=3, infr='svi', n_itr=100, subsample=False): assert infr == 'svi' or infr == 'mcmc', 'Only svi and mcmc supported' # Load data # df = read_data(data_type='nyse') data_train, _, data_test, _ = preprocess(ret_type='tensor') self.tensor_train = data_train.type(torch.FloatTensor) self.tensor_test = data_test.type(torch.FloatTensor) self.n_comp = n_comp self.infr = infr self.shape = self.tensor_train.shape self.params = None self.weights = None self.locs = None self.scale = None self.mcmc_time = None self.svi_time = None print(f'Initializing object for inference method {self.infr}') if self.infr == 'svi': self.guide = None self.optim = Adam({'lr': 0.1, 'betas': [0.8, 0.99]}) self.svi = None self.svi_itr = n_itr self.elbo_loss = TraceEnum_ELBO(max_plate_nesting=1) self.posterior_est = None self.resp = None else: self.num_samples = 250 self.mcmc = None self.warmup_steps = 50 if subsample: self.mcmc_subsample = 0.1 self.n_obs = int(self.shape[0] * self.mcmc_subsample) else: self.n_obs = self.shape[0] # Need to subsample in numpy array because # sampling using multinomial takes ages # self.tensor = torch.from_numpy(np.random.choice(data, self.n_obs) # ).type(torch.FloatTensor) # Initialize model self.model() ################## # Model definition ################## @config_enumerate def model(self): # Global variables weights = pyro.sample('weights', dist.Dirichlet(0.5 * torch.ones(self.n_comp))) with pyro.plate('components', self.n_comp): locs = pyro.sample('locs', dist.MultivariateNormal( torch.zeros(self.shape[1]), torch.eye(self.shape[1])) ) scale = pyro.sample('scale', dist.LogNormal(0., 2.)) lis = [] for i in range(self.n_comp): t = torch.eye(self.shape[1]) * scale[i] lis.append(t) f = torch.stack(lis) with pyro.plate('data', self.shape[0]): # Local variables. assignment = pyro.sample('assignment', dist.Categorical(weights)) pyro.sample('obs', dist.MultivariateNormal(locs[assignment], f[assignment]), obs=self.tensor_train) ################## # SVI ################## def guide_autodelta(self): self.guide = AutoDelta(poutine.block(self.model, expose=['weights', 'locs', 'scale'])) def guide_autodiagnorm(self): self.guide = AutoDiagonalNormal(poutine.block(self.model, expose=['weights', 'locs', 'scale'])) def guide_multivariatenormal(self): self.guide = AutoMultivariateNormal(poutine.block(self.model, expose=['weights', 'locs', 'scale'])) def guide_manual(self): # Define priors weights_alpha = pyro.param('weights_alpha', (1. / self.n_comp) * torch.ones( self.n_comp), constraint=constraints.simplex) scale_loc = pyro.param('scale_loc', torch.rand(1).expand([self.n_comp]), constraint=constraints.positive) scale_scale = pyro.param('scale_scale', torch.rand(1), constraint=constraints.positive) loc_loc = pyro.param('loc_loc', torch.zeros(self.shape[1]), constraint=constraints.positive) loc_scale = pyro.param('loc_scale', torch.ones(1), constraint=constraints.positive) # Global variables weights = pyro.sample('weights', dist.Dirichlet(weights_alpha)) with pyro.plate('components', self.n_comp): locs = pyro.sample('locs', dist.MultivariateNormal(loc_loc, torch.eye( self.shape[1]) * loc_scale)) scale = pyro.sample('scale', dist.LogNormal(scale_loc, scale_scale)) with pyro.plate('data', self.shape[0]): # Local variables. assignment = pyro.sample('assignment', dist.Categorical(weights)) return locs, scale, assignment def optimizer(self): self.optim = pyro.optim.Adam({'lr': 0.1, 'betas': [0.8, 0.99]}) def initialize(self, seed): self.set_seed(seed) self.clear_params() return self.run_svi() def init_svi(self): self.svi = SVI(self.model, self.guide, self.optim, loss=self.elbo_loss) def run_svi(self): if self.guide is None: self.guide = self.guide_manual self.init_svi() loss = self.svi.loss(self.model, self.guide) return loss def best_start(self): # Choose the best among 100 random initializations. print("Determining best seed for initialization") loss, seed = min((self.initialize(seed), seed) for seed in range(100)) self.initialize(seed) print("Best seed determined after 100 random initializations:") print('seed = {}, initial_loss = {}'.format(seed, loss)) def params(self): self.params = pyro.get_param_store() return self.params def register_params(self): gradient_norms = defaultdict(list) for nam, value in self.params.named_parameters(): value.register_hook(lambda g, name=nam: gradient_norms[nam]. append(g.norm().item())) def get_svi_estimates_auto_guide(self): estimates = self.guide() self.weights = estimates['weights'] self.locs = estimates['locs'] self.scale = estimates['scale'] return self.weights, self.locs, self.scale def get_mean_svi_est_manual_guide(self): svi_posterior = self.svi.run() sites = ["weights", "scale", "locs"] svi_samples = { site: EmpiricalMarginal(svi_posterior, sites=site). enumerate_support().detach().cpu().numpy() for site in sites } self.posterior_est = dict() for item in svi_samples.keys(): self.posterior_est[item] = torch.tensor( svi_samples[item].mean(axis=0)) return self.posterior_est ################## # MCMC ################## def init_mcmc(self, seed=42): self.set_seed(seed) kernel = NUTS(self.model) self.mcmc = MCMC(kernel, num_samples=self.num_samples, warmup_steps=self.warmup_steps) print("Initialized MCMC with NUTS kernal") def run_mcmc(self): self.clear_params() print("Initializing MCMC") self.init_mcmc() print(f'Running MCMC using NUTS with num_obs = {self.n_obs}') self.mcmc.run() def get_mcmc_samples(self): return self.mcmc.get_samples() ################## # Inference ################## def inference(self): if self.infr == 'svi': start = time.time() # Initialize with best seed self.best_start() # Run SVI iterations print("Running SVI iterations") losses = [] for i in range(self.svi_itr): loss = self.svi.step() losses.append(loss) # print('.' if i % 100 else '\n', end='') end = time.time() self.svi_time = (end - start) return losses else: start = time.time() self.run_mcmc() end = time.time() self.mcmc_time = (end - start) return self.get_mcmc_samples() # Get posterior responsibilities def get_posterior_resp(self): ''' Formula: k: cluster index p(c=k|x) = w_k * N(x|mu_k, sigma_k) / sum(w_k * N(x|mu_k, sigma_k)) ''' w = self.posterior_est['weights'] lo = self.posterior_est['locs'] s = self.posterior_est['scale'] prob_list = [] lis = [] for i in range(self.n_comp): t = torch.eye(self.shape[1]) * s[i] lis.append(t) f = torch.stack(lis) distri = dist.MultivariateNormal(lo, f) for d in self.tensor_test: numerator = w * torch.exp(distri.log_prob(d)) denom = numerator.sum() probs = numerator / denom prob_list.append(probs) self.resp = torch.stack(prob_list) return self.resp ################## # Generate stats ################## def generate_stats(self): if self.svi is not None: svi_stats = dict({'num_samples': self.shape[0], 'num_iterations': self.svi_itr, 'exec_time': self.svi_time}) else: svi_stats = None if self.mcmc is not None: mcmc_stats = dict( {'num_sampl': self.shape[0] * self.mcmc_subsample, 'exec_time': self.mcmc_time, 'num_samples_generated': self.num_samples, 'warmup_steps': self.warmup_steps}) else: mcmc_stats = None return [svi_stats, mcmc_stats] ################## # Static Methods ################# @staticmethod def set_seed(seed): pyro.set_rng_seed(seed) @staticmethod def clear_params(): pyro.clear_param_store() @staticmethod def plot_svi_convergence(losses): plt.figure(figsize=(10, 3), dpi=100).set_facecolor('white') plt.plot(losses) plt.xlabel('iters') plt.ylabel('loss') plt.title('Convergence of SVI') plt.plot()
with pyro.iarange('data.loop', N, dim=-1) as i: p_z_t, p_z_d = self.AE.encode(enc_in[i]) pyro.sample('z_t', dist.Categorical(p_z_t)) pyro.sample('z_d', dist.Categorical(p_z_d)) vae = VAE() optimizer = Adam({"lr": .1}) svi = SVI(vae.model, vae.guide, optimizer, loss=TraceEnum_ELBO(max_plate_nesting=1, strict_enumeration_warning=True)) losses = defaultdict(list) n_steps = 10000 for c in range(3): pyro.clear_param_store() for step in range(n_steps): start_time = time.time() print(step, end=' ') loss = svi.step(enc_in, dec_in, dec_out, T, N) print(loss, time.time() - start_time) losses[c].append(loss) gradient_norms = defaultdict(list) svi.loss(vae.model, vae.model, enc_in, dec_in, dec_out, T, N) # Initializes param store. for name, value in pyro.get_param_store().named_parameters(): value.register_hook( lambda g, name=name: gradient_norms[name].append(g.norm().item()))
class GaussianMixtureModel(object): def __init__(self, data, number_of_hidden_states=3): self.number_of_hidden_states = number_of_hidden_states self.data = data self.global_guide = AutoDelta( poutine.block(self.model, expose=['weights', 'locs', 'scale'])) self.svi = None self.losses = None self.gradient_norms = None @config_enumerate def model(self, data): # Global variables. weights = pyro.sample( 'weights', dist.Dirichlet(0.5 * torch.ones(self.number_of_hidden_states))) with pyro.plate('components', self.number_of_hidden_states): locs = pyro.sample('locs', dist.Normal(0., 10.)) scale = pyro.sample('scale', dist.LogNormal(0., 2.)) with pyro.plate('data', len(data)): # Local variables. assignment = pyro.sample('assignment', dist.Categorical(weights)) pyro.sample('obs', dist.Normal(locs[assignment], scale[assignment]), obs=data) def initialize(self, seed): pyro.set_rng_seed(seed) pyro.clear_param_store() pyro.param('auto_weights', 0.5 * torch.ones(self.number_of_hidden_states), constraint=constraints.simplex) pyro.param('auto_scale', (self.data.var() / 2).sqrt(), constraint=constraints.positive) pyro.param( 'auto_locs', self.data[torch.multinomial( torch.ones(len(self.data)) / len(self.data), self.number_of_hidden_states)]) optim = pyro.optim.Adam({'lr': 0.1, 'betas': [0.8, 0.99]}) elbo = TraceEnum_ELBO(max_plate_nesting=1) self.svi = SVI(self.model, self.global_guide, optim, loss=elbo) loss = self.svi.loss(self.model, self.global_guide, self.data) return loss def train(self): loss, seed = min((self.initialize(seed), seed) for seed in range(100)) self.initialize(seed) gradient_norms = defaultdict(list) for name, value in pyro.get_param_store().named_parameters(): value.register_hook(lambda g, name=name: gradient_norms[name]. append(g.norm().item())) losses = [] for i in range(200 if not smoke_test else 2): loss = self.svi.step(self.data) losses.append(loss) self.losses = losses self.gradient_norms = gradient_norms def show_losses(self): assert self.losses is not None, "must train the model before showing losses" \ "" pyplot.figure(figsize=(10, 3), dpi=100).set_facecolor('white') pyplot.plot(self.losses) pyplot.xlabel('iters', size=18) pyplot.ylabel('loss', size=18) #pyplot.yscale('log') pyplot.grid() pyplot.title('Convergence of stochastic variational inference', size=20) pyplot.show() def show_gradients_norm(self): pyplot.figure(figsize=(10, 4), dpi=100).set_facecolor('white') for name, grad_norms in self.gradient_norms.items(): pyplot.plot(grad_norms, label=name) pyplot.xlabel('iters') pyplot.ylabel('gradient norm') pyplot.yscale('log') pyplot.legend(loc='best') pyplot.title('Gradient norms during SVI') pyplot.show() def return_map_estimate(self): map_estimates = self.global_guide(self.data) weights = map_estimates['weights'].data.numpy() locs = map_estimates['locs'].data.numpy() scale = map_estimates['scale'].data.numpy() return weights, locs, scale