def test_subsample_gradient(Elbo, reparameterized, has_rsample, subsample, local_samples, scale): pyro.clear_param_store() data = torch.tensor([-0.5, 2.0]) subsample_size = 1 if subsample else len(data) precision = 0.06 * scale Normal = dist.Normal if reparameterized else fakes.NonreparameterizedNormal def model(subsample): with pyro.plate("data", len(data), subsample_size, subsample) as ind: x = data[ind] z = pyro.sample("z", Normal(0, 1)) pyro.sample("x", Normal(z, 1), obs=x) def guide(subsample): scale = pyro.param("scale", lambda: torch.tensor([1.0])) with pyro.plate("data", len(data), subsample_size, subsample): loc = pyro.param("loc", lambda: torch.zeros(len(data)), event_dim=0) z_dist = Normal(loc, scale) if has_rsample is not None: z_dist.has_rsample_(has_rsample) pyro.sample("z", z_dist) if scale != 1.0: model = poutine.scale(model, scale=scale) guide = poutine.scale(guide, scale=scale) num_particles = 50000 if local_samples: guide = config_enumerate(guide, num_samples=num_particles) num_particles = 1 optim = Adam({"lr": 0.1}) elbo = Elbo(max_plate_nesting=1, # set this to ensure rng agrees across runs num_particles=num_particles, vectorize_particles=True, strict_enumeration_warning=False) inference = SVI(model, guide, optim, loss=elbo) with xfail_if_not_implemented(): if subsample_size == 1: inference.loss_and_grads(model, guide, subsample=torch.tensor([0], dtype=torch.long)) inference.loss_and_grads(model, guide, subsample=torch.tensor([1], dtype=torch.long)) else: inference.loss_and_grads(model, guide, subsample=torch.tensor([0, 1], dtype=torch.long)) params = dict(pyro.get_param_store().named_parameters()) normalizer = 2 if subsample else 1 actual_grads = {name: param.grad.detach().cpu().numpy() / normalizer for name, param in params.items()} expected_grads = {'loc': scale * np.array([0.5, -2.0]), 'scale': scale * np.array([2.0])} for name in sorted(params): logger.info('expected {} = {}'.format(name, expected_grads[name])) logger.info('actual {} = {}'.format(name, actual_grads[name])) assert_equal(actual_grads, expected_grads, prec=precision)
def test_plate_elbo_vectorized_particles(Elbo, reparameterized): pyro.clear_param_store() data = torch.tensor([-0.5, 2.0]) num_particles = 200000 precision = 0.06 Normal = dist.Normal if reparameterized else fakes.NonreparameterizedNormal def model(): data_plate = pyro.plate("data", len(data)) pyro.sample("nuisance_a", Normal(0, 1)) with data_plate: z = pyro.sample("z", Normal(0, 1)) pyro.sample("nuisance_b", Normal(2, 3)) with data_plate: pyro.sample("x", Normal(z, 1), obs=data) pyro.sample("nuisance_c", Normal(4, 5)) def guide(): loc = pyro.param("loc", torch.zeros(len(data))) scale = pyro.param("scale", torch.tensor([1.0])) pyro.sample("nuisance_c", Normal(4, 5)) with pyro.plate("data", len(data)): pyro.sample("z", Normal(loc, scale)) pyro.sample("nuisance_b", Normal(2, 3)) pyro.sample("nuisance_a", Normal(0, 1)) optim = Adam({"lr": 0.1}) loss = Elbo( num_particles=num_particles, vectorize_particles=True, strict_enumeration_warning=False, ) inference = SVI(model, guide, optim, loss=loss) inference.loss_and_grads(model, guide) params = dict(pyro.get_param_store().named_parameters()) actual_grads = { name: param.grad.detach().cpu().numpy() for name, param in params.items() } expected_grads = {"loc": np.array([0.5, -2.0]), "scale": np.array([2.0])} for name in sorted(params): logger.info("expected {} = {}".format(name, expected_grads[name])) logger.info("actual {} = {}".format(name, actual_grads[name])) assert_equal(actual_grads, expected_grads, prec=precision)
def test_iarange(Elbo, reparameterized): pyro.clear_param_store() data = torch.tensor([-0.5, 2.0]) num_particles = 20000 precision = 0.06 Normal = dist.Normal if reparameterized else fakes.NonreparameterizedNormal @poutine.broadcast def model(): particles_iarange = pyro.iarange("particles", num_particles, dim=-2) data_iarange = pyro.iarange("data", len(data), dim=-1) pyro.sample("nuisance_a", Normal(0, 1)) with particles_iarange, data_iarange: z = pyro.sample("z", Normal(0, 1)) pyro.sample("nuisance_b", Normal(2, 3)) with data_iarange, particles_iarange: pyro.sample("x", Normal(z, 1), obs=data) pyro.sample("nuisance_c", Normal(4, 5)) @poutine.broadcast def guide(): loc = pyro.param("loc", torch.zeros(len(data))) scale = pyro.param("scale", torch.tensor([1.])) pyro.sample("nuisance_c", Normal(4, 5)) with pyro.iarange("particles", num_particles, dim=-2): with pyro.iarange("data", len(data), dim=-1): pyro.sample("z", Normal(loc, scale)) pyro.sample("nuisance_b", Normal(2, 3)) pyro.sample("nuisance_a", Normal(0, 1)) optim = Adam({"lr": 0.1}) inference = SVI(model, guide, optim, loss=Elbo(strict_enumeration_warning=False)) inference.loss_and_grads(model, guide) params = dict(pyro.get_param_store().named_parameters()) actual_grads = {name: param.grad.detach().cpu().numpy() / num_particles for name, param in params.items()} expected_grads = {'loc': np.array([0.5, -2.0]), 'scale': np.array([2.0])} for name in sorted(params): logger.info('expected {} = {}'.format(name, expected_grads[name])) logger.info('actual {} = {}'.format(name, actual_grads[name])) assert_equal(actual_grads, expected_grads, prec=precision)
def test_subsample_gradient_sequential(Elbo, reparameterized, subsample): pyro.clear_param_store() data = torch.tensor([-0.5, 2.0]) subsample_size = 1 if subsample else len(data) num_particles = 5000 precision = 0.333 Normal = dist.Normal if reparameterized else fakes.NonreparameterizedNormal def model(): with pyro.plate("data", len(data), subsample_size) as ind: x = data[ind] z = pyro.sample("z", Normal(0, 1).expand_by(x.shape)) pyro.sample("x", Normal(z, 1), obs=x) def guide(): loc = pyro.param("loc", lambda: torch.zeros(len(data), requires_grad=True)) scale = pyro.param("scale", lambda: torch.tensor([1.0], requires_grad=True)) with pyro.plate("data", len(data), subsample_size) as ind: pyro.sample("z", Normal(loc[ind], scale)) optim = Adam({"lr": 0.1}) elbo = Elbo(num_particles=10, strict_enumeration_warning=False) inference = SVI(model, guide, optim, elbo) iters = num_particles // 10 with xfail_if_not_implemented(): for _ in range(iters): inference.loss_and_grads(model, guide) params = dict(pyro.get_param_store().named_parameters()) actual_grads = { name: param.grad.detach().cpu().numpy() / iters for name, param in params.items() } expected_grads = {'loc': np.array([0.5, -2.0]), 'scale': np.array([2.0])} for name in sorted(params): logger.info('expected {} = {}'.format(name, expected_grads[name])) logger.info('actual {} = {}'.format(name, actual_grads[name])) assert_equal(actual_grads, expected_grads, prec=precision)
def test_subsample_gradient(trace_graph, reparameterized): pyro.clear_param_store() data_size = 2 subsample_size = 1 num_particles = 1000 precision = 0.333 data = dist.normal(ng_zeros(data_size), ng_ones(data_size)) def model(subsample_size): with pyro.iarange("data", len(data), subsample_size) as ind: x = data[ind] z = pyro.sample("z", dist.Normal(ng_zeros(len(x)), ng_ones(len(x)), reparameterized=reparameterized)) pyro.observe("x", dist.Normal(z, ng_ones(len(x)), reparameterized=reparameterized), x) def guide(subsample_size): mu = pyro.param("mu", lambda: Variable(torch.zeros(len(data)), requires_grad=True)) sigma = pyro.param("sigma", lambda: Variable(torch.ones(1), requires_grad=True)) with pyro.iarange("data", len(data), subsample_size) as ind: mu = mu[ind] sigma = sigma.expand(subsample_size) pyro.sample("z", dist.Normal(mu, sigma, reparameterized=reparameterized)) optim = Adam({"lr": 0.1}) inference = SVI(model, guide, optim, loss="ELBO", trace_graph=trace_graph, num_particles=num_particles) # Compute gradients without subsampling. inference.loss_and_grads(model, guide, subsample_size=data_size) params = dict(pyro.get_param_store().named_parameters()) expected_grads = {name: param.grad.data.clone() for name, param in params.items()} zero_grads(params.values()) # Compute gradients with subsampling. inference.loss_and_grads(model, guide, subsample_size=subsample_size) actual_grads = {name: param.grad.data.clone() for name, param in params.items()} for name in sorted(params): print('\nexpected {} = {}'.format(name, expected_grads[name].cpu().numpy())) print('actual {} = {}'.format(name, actual_grads[name].cpu().numpy())) assert_equal(actual_grads, expected_grads, prec=precision)
def test_subsample_gradient(Elbo, reparameterized, subsample): pyro.clear_param_store() data = torch.tensor([-0.5, 2.0]) subsample_size = 1 if subsample else len(data) num_particles = 50000 precision = 0.06 Normal = dist.Normal if reparameterized else fakes.NonreparameterizedNormal def model(subsample): with pyro.iarange("particles", num_particles): with pyro.iarange("data", len(data), subsample_size, subsample) as ind: x = data[ind].unsqueeze(-1).expand(-1, num_particles) z = pyro.sample("z", Normal(0, 1).expand_by(x.shape)) pyro.sample("x", Normal(z, 1), obs=x) def guide(subsample): loc = pyro.param("loc", lambda: torch.zeros(len(data), requires_grad=True)) scale = pyro.param("scale", lambda: torch.tensor([1.0], requires_grad=True)) with pyro.iarange("particles", num_particles): with pyro.iarange("data", len(data), subsample_size, subsample) as ind: loc_ind = loc[ind].unsqueeze(-1).expand(-1, num_particles) pyro.sample("z", Normal(loc_ind, scale)) optim = Adam({"lr": 0.1}) elbo = Elbo(strict_enumeration_warning=False) inference = SVI(model, guide, optim, loss=elbo) if subsample_size == 1: inference.loss_and_grads(model, guide, subsample=torch.LongTensor([0])) inference.loss_and_grads(model, guide, subsample=torch.LongTensor([1])) else: inference.loss_and_grads(model, guide, subsample=torch.LongTensor([0, 1])) params = dict(pyro.get_param_store().named_parameters()) normalizer = 2 * num_particles / subsample_size actual_grads = {name: param.grad.detach().cpu().numpy() / normalizer for name, param in params.items()} expected_grads = {'loc': np.array([0.5, -2.0]), 'scale': np.array([2.0])} for name in sorted(params): logger.info('expected {} = {}'.format(name, expected_grads[name])) logger.info('actual {} = {}'.format(name, actual_grads[name])) assert_equal(actual_grads, expected_grads, prec=precision)