def init( self, lr: float = 0.005, nbatch_size: int = 5, fbatch_size: int = 512, jit: bool = False, ) -> None: """ Initialize SVI object. :param lr: Learning rate. :param nbatch_size: AOI batch size. :param fbatch_size: Frame batch size. :param jit: Use JIT compiler. """ self.lr = lr self.optim_fn = optim.Adam self.optim_args = {"lr": lr, "betas": [0.9, 0.999]} self.optim = self.optim_fn(self.optim_args) try: self.load_checkpoint() except (FileNotFoundError, TypeError): pyro.clear_param_store() self.iter = 0 self.converged = False self._rolling = {p: deque([], maxlen=100) for p in self.conv_params} self.init_parameters() self.elbo = self.TraceELBO(jit) self.svi = infer.SVI(self.model, self.guide, self.optim, loss=self.elbo) self.nbatch_size = min(nbatch_size, self.data.Nt) self.fbatch_size = min(fbatch_size, self.data.F)
def assert_ok(model, guide, elbo, *args, **kwargs): """ Assert that inference works without warnings or errors. """ pyro.get_param_store().clear() adam = optim.Adam({"lr": 1e-6}) inference = infer.SVI(model, guide, adam, elbo) for i in range(2): inference.step(*args, **kwargs)
def assert_error(model, guide, elbo, match=None): """ Assert that inference fails with an error. """ pyro.get_param_store().clear() adam = optim.Adam({"lr": 1e-6}) inference = infer.SVI(model, guide, adam, elbo) with pytest.raises((NotImplementedError, UserWarning, KeyError, ValueError, RuntimeError), match=match): inference.step()
def assert_warning(model, guide, elbo): """ Assert that inference works but with a warning. """ pyro.get_param_store().clear() adam = optim.Adam({"lr": 1e-6}) inference = infer.SVI(model, guide, adam, elbo) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") inference.step() assert len(w), 'No warnings were raised' for warning in w: print(warning)
def main(args): funsor.set_backend("torch") # Define a basic model with a single Normal latent random variable `loc` # and a batch of Normally distributed observations. def model(data): loc = pyro.sample("loc", dist.Normal(0., 1.)) with pyro.plate("data", len(data), dim=-1): pyro.sample("obs", dist.Normal(loc, 1.), obs=data) # Define a guide (i.e. variational distribution) with a Normal # distribution over the latent random variable `loc`. def guide(data): guide_loc = pyro.param("guide_loc", torch.tensor(0.)) guide_scale = pyro.param("guide_scale", torch.tensor(1.), constraint=constraints.positive) pyro.sample("loc", dist.Normal(guide_loc, guide_scale)) # Generate some data. torch.manual_seed(0) data = torch.randn(100) + 3.0 # Because the API in minipyro matches that of Pyro proper, # training code works with generic Pyro implementations. with pyro_backend(args.backend), interpretation(MonteCarlo()): # Construct an SVI object so we can do variational inference on our # model/guide pair. Elbo = infer.JitTrace_ELBO if args.jit else infer.Trace_ELBO elbo = Elbo() adam = optim.Adam({"lr": args.learning_rate}) svi = infer.SVI(model, guide, adam, elbo) # Basic training loop pyro.get_param_store().clear() for step in range(args.num_steps): loss = svi.step(data) if args.verbose and step % 100 == 0: print("step {} loss = {}".format(step, loss)) # Report the final values of the variational parameters # in the guide after training. if args.verbose: for name in pyro.get_param_store(): value = pyro.param(name).data print("{} = {}".format(name, value.detach().cpu().numpy())) # For this simple (conjugate) model we know the exact posterior. In # particular we know that the variational distribution should be # centered near 3.0. So let's check this explicitly. assert (pyro.param("guide_loc") - 3.0).abs() < 0.1
def test_gaussian_probit_hmm_smoke(exact, jit): def model(data): T, N, D = data.shape # time steps, individuals, features # Gaussian initial distribution. init_loc = pyro.param("init_loc", torch.zeros(D)) init_scale = pyro.param("init_scale", 1e-2 * torch.eye(D), constraint=constraints.lower_cholesky) # Linear dynamics with Gaussian noise. trans_const = pyro.param("trans_const", torch.zeros(D)) trans_coeff = pyro.param("trans_coeff", torch.eye(D)) noise = pyro.param("noise", 1e-2 * torch.eye(D), constraint=constraints.lower_cholesky) obs_plate = pyro.plate("channel", D, dim=-1) with pyro.plate("data", N, dim=-2): state = None for t in range(T): # Transition. if t == 0: loc = init_loc scale_tril = init_scale else: loc = trans_const + funsor.torch.torch_tensordot( trans_coeff, state, 1) scale_tril = noise state = pyro.sample("state_{}".format(t), dist.MultivariateNormal(loc, scale_tril), infer={"exact": exact}) # Factorial probit likelihood model. with obs_plate: pyro.sample("obs_{}".format(t), dist.Bernoulli(logits=state["channel"]), obs=data[t]) def guide(data): pass data = torch.distributions.Bernoulli(0.5).sample((3, 4, 2)) with pyro_backend("funsor"): Elbo = infer.JitTraceEnum_ELBO if jit else infer.TraceEnum_ELBO elbo = Elbo() adam = optim.Adam({"lr": 1e-3}) svi = infer.SVI(model, guide, adam, elbo) svi.step(data)
def train(model, guide, lr=1e-3, n_steps=1000, jit=True, verbose=False, **kwargs): pyro.clear_param_store() optimizer = optim.Adam({"lr": lr}) elbo = ( infer.JitTraceEnum_ELBO(max_plate_nesting=2) if jit else infer.TraceEnum_ELBO(max_plate_nesting=2) ) svi = infer.SVI(model, guide, optimizer, elbo) for step in range(n_steps): svi.step(**kwargs) if step % 100 == 99 and verbose: values = tuple(f"{k}: {v}" for k, v in pyro.get_param_store().items()) print(values)
def test_optimizer(backend, optim_name, jit): def model(data): p = pyro.param("p", torch.tensor(0.5)) pyro.sample("x", dist.Bernoulli(p), obs=data) def guide(data): pass data = torch.tensor(0.) with pyro_backend(backend): pyro.get_param_store().clear() Elbo = infer.JitTrace_ELBO if jit else infer.Trace_ELBO elbo = Elbo(ignore_jit_warnings=True) optimizer = getattr(optim, optim_name)({"lr": 1e-6}) inference = infer.SVI(model, guide, optimizer, elbo) for i in range(2): inference.step(data)
def main(args): if args.cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') logging.info('Loading data') data = poly.load_data(poly.JSB_CHORALES) logging.info('-' * 40) model = models[args.model] logging.info('Training {} on {} sequences'.format( model.__name__, len(data['train']['sequences']))) sequences = data['train']['sequences'] lengths = data['train']['sequence_lengths'] # find all the notes that are present at least once in the training set present_notes = ((sequences == 1).sum(0).sum(0) > 0) # remove notes that are never played (we remove 37/88 notes) sequences = sequences[..., present_notes] if args.truncate: lengths = lengths.clamp(max=args.truncate) sequences = sequences[:, :args.truncate] num_observations = float(lengths.sum()) pyro.set_rng_seed(args.seed) pyro.clear_param_store() pyro.enable_validation(__debug__) # We'll train using MAP Baum-Welch, i.e. MAP estimation while marginalizing # out the hidden state x. This is accomplished via an automatic guide that # learns point estimates of all of our conditional probability tables, # named probs_*. guide = AutoDelta( handlers.block(model, expose_fn=lambda msg: msg["name"].startswith("probs_"))) # To help debug our tensor shapes, let's print the shape of each site's # distribution, value, and log_prob tensor. Note this information is # automatically printed on most errors inside SVI. if args.print_shapes: first_available_dim = -2 if model is model_0 else -3 guide_trace = handlers.trace(guide).get_trace( sequences, lengths, args=args, batch_size=args.batch_size) model_trace = handlers.trace( handlers.replay(handlers.enum(model, first_available_dim), guide_trace)).get_trace(sequences, lengths, args=args, batch_size=args.batch_size) logging.info(model_trace.format_shapes()) # Bind non-PyTorch parameters to make these functions jittable. model = functools.partial(model, args=args) guide = functools.partial(guide, args=args) # Enumeration requires a TraceEnum elbo and declaring the max_plate_nesting. # All of our models have two plates: "data" and "tones". optimizer = optim.Adam({'lr': args.learning_rate}) if args.tmc: if args.jit and not args.funsor: raise NotImplementedError( "jit support not yet added for TraceTMC_ELBO") Elbo = infer.JitTraceTMC_ELBO if args.jit else infer.TraceTMC_ELBO elbo = Elbo(max_plate_nesting=1 if model is model_0 else 2) tmc_model = handlers.infer_config(model, lambda msg: { "num_samples": args.tmc_num_samples, "expand": False } if msg["infer"].get("enumerate", None) == "parallel" else {} ) # noqa: E501 svi = infer.SVI(tmc_model, guide, optimizer, elbo) else: Elbo = infer.JitTraceEnum_ELBO if args.jit else infer.TraceEnum_ELBO elbo = Elbo(max_plate_nesting=1 if model is model_0 else 2, strict_enumeration_warning=True, jit_options={"time_compilation": args.time_compilation}) svi = infer.SVI(model, guide, optimizer, elbo) # We'll train on small minibatches. logging.info('Step\tLoss') for step in range(args.num_steps): loss = svi.step(sequences, lengths, batch_size=args.batch_size) logging.info('{: >5d}\t{}'.format(step, loss / num_observations)) if args.jit and args.time_compilation: logging.debug('time to compile: {} s.'.format( elbo._differentiable_loss.compile_time)) # We evaluate on the entire training dataset, # excluding the prior term so our results are comparable across models. train_loss = elbo.loss(model, guide, sequences, lengths, batch_size=sequences.shape[0], include_prior=False) logging.info('training loss = {}'.format(train_loss / num_observations)) # Finally we evaluate on the test dataset. logging.info('-' * 40) logging.info('Evaluating on {} test sequences'.format( len(data['test']['sequences']))) sequences = data['test']['sequences'][..., present_notes] lengths = data['test']['sequence_lengths'] if args.truncate: lengths = lengths.clamp(max=args.truncate) num_observations = float(lengths.sum()) # note that since we removed unseen notes above (to make the problem a bit easier and for # numerical stability) this test loss may not be directly comparable to numbers # reported on this dataset elsewhere. test_loss = elbo.loss(model, guide, sequences, lengths, batch_size=sequences.shape[0], include_prior=False) logging.info('test loss = {}'.format(test_loss / num_observations)) # We expect models with higher capacity to perform better, # but eventually overfit to the training set. capacity = sum( value.reshape(-1).size(0) for value in pyro.get_param_store().values()) logging.info('model_{} capacity = {} parameters'.format( args.model, capacity))