def map_x_to_distribution(self, x: torch.Tensor) -> distributions.Normal: distr = self.distribution_class(loc=x[..., 2], scale=x[..., 3]) scaler = distributions.AffineTransform(loc=x[..., 0], scale=x[..., 1]) if self._transformation is None: return distributions.TransformedDistribution(distr, [scaler]) else: return distributions.TransformedDistribution( distr, [ scaler, TorchNormalizer.get_transform( self._transformation)["inverse_torch"] ])
def __init__(self, dim, blocks, flow_length, final_block=None, density=None, amortized='none'): """ Initialize normalizing flow """ super().__init__() biject = [] self.n_params = [] # Start density (z0) if density is None: density = MultivariateNormal(torch.zeros(dim), torch.eye(dim)) self.base_density = density for f in range(flow_length-1): for b_flow in blocks: cur_block = b_flow(dim, amortized=amortized) self.n_params.append(cur_block.n_parameters()) biject.append(cur_block) # Add only first block last cur_block = blocks[0](dim, amortized=amortized) self.n_params.append(cur_block.n_parameters()) biject.append(cur_block) if (final_block is not None): cur_block = final_block self.n_params.append(cur_block.n_parameters()) biject.append(cur_block) # Full set of transforms self.transforms = transform.ComposeTransform(biject) self.bijectors = nn.ModuleList(biject) # Final density (zK) defined as transformed distribution self.final_density = distrib.TransformedDistribution(density, self.transforms) self.amortized = amortized # Handle different amortizations if amortized in ('self', 'input'): self.amortized_seed = torch.ones(1, dim).detach() self.amortized_params = self.parameters_network(dim, self.n_parameters()) self.log_det = [] self.dim = dim
def logistic_distribution(loc: Tensor, scale: Tensor): base_distribution = td.Uniform(loc.new_zeros(1), scale.new_zeros(1)) transforms = [ td.SigmoidTransform().inv, td.AffineTransform(loc=loc, scale=scale) ] return td.TransformedDistribution(base_distribution, transforms)
def _dist(self, loc, scale, inv_scale, log_sqrt_vals, concentration, base_rate, event_shape): return td.TransformedDistribution( # sum zeros to broadcast distributions.DoubleGamma(concentration.expand(event_shape), rate=base_rate.expand(event_shape)), PCATransform(loc, scale, inv_scale, log_sqrt_vals))
def _dist(self, loc, scale, inv_scale, log_sqrt_vals, base_scale, event_shape): zeros = torch.zeros((), device=loc.device, dtype=loc.dtype).expand(event_shape) return td.TransformedDistribution( td.Laplace(loc=zeros, scale=base_scale.expand(event_shape)), PCATransform(loc, scale, inv_scale, log_sqrt_vals))
def forward(self, state_features): x = self.feedforward_model(state_features) if self.dist == 'tanh_normal': mean, std = th.chunk(x, 2, -1) mean = self.mean_scale * th.tanh(mean / self.mean_scale) std = F.softplus(std + self.raw_init_std) + self.min_std dist = td.Normal(mean, std) # TODO: fix nan problem dist = td.TransformedDistribution(dist, td.TanhTransform(cache_size=1)) dist = td.Independent(dist, 1) dist = SampleDist(dist) elif self.dist == 'trunc_normal': mean, std = th.chunk(x, 2, -1) std = 2 * th.sigmoid((std + self.raw_init_std) / 2) + self.min_std from rls.nn.dists.TruncatedNormal import \ TruncatedNormal as TruncNormalDist dist = TruncNormalDist(th.tanh(mean), std, -1, 1) dist = td.Independent(dist, 1) elif self.dist == 'one_hot': dist = td.OneHotCategoricalStraightThrough(logits=x) elif self.dist == 'relaxed_one_hot': dist = td.RelaxedOneHotCategorical(th.tensor(0.1), logits=x) else: raise NotImplementedError(f"{self.dist} is not implemented.") return dist
def __init__(self, dim, blocks, density, flow_name='normalizing_flow', flow_length=1, encoder=None, decoder=None): super().__init__() biject = [] self.flow_name = flow_name self.n_params = [] if flow_name == 'normalizing_flow': for f in range(flow_length): for b_flow in blocks: cur_block = b_flow(dim) biject.append(cur_block) self.n_params.append(cur_block.n_parameters()) else: for b_flow in blocks: biject.append(b_flow()) self.transforms = transform.ComposeTransform(biject) self.bijectors = nn.ModuleList(biject) self.base_density = density self.final_density = distrib.TransformedDistribution( density, self.transforms) self.log_det = [] self.dim = dim self.encoder = encoder self.decoder = decoder
def _get_transformed_normal(means, stds): normal_dist = td.Independent(td.Normal(loc=means, scale=stds), 1) transforms = [ dist_utils.StableTanh(), dist_utils.AffineTransform(loc=torch.tensor(0.), scale=torch.tensor(5.0)) ] squashed_dist = td.TransformedDistribution( base_distribution=normal_dist, transforms=transforms) return squashed_dist, transforms
def _dist(self, loc, scale, inv_scale, log_sqrt_vals, beta, base_scale, event_shape): zeros = torch.zeros((), device=loc.device, dtype=loc.dtype).expand(event_shape) return td.TransformedDistribution( distributions.GeneralizedNormal( loc=zeros, scale=base_scale.expand(event_shape), beta=beta.expand(event_shape)), PCATransform(loc, scale, inv_scale, log_sqrt_vals))
def map_x_to_distribution(self, x: torch.Tensor) -> distributions.Normal: x = x.permute(1, 0, 2) distr = self.distribution_class( loc=x[..., 2], cov_factor=x[..., 4:], cov_diag=x[..., 3], ) scaler = distributions.AffineTransform(loc=x[0, :, 0], scale=x[0, :, 1], event_dim=1) if self._transformation is None: return distributions.TransformedDistribution(distr, [scaler]) else: return distributions.TransformedDistribution( distr, [ scaler, TorchNormalizer.get_transform( self._transformation)["inverse_torch"] ])
def logistic_distribution(loc, log_scale): scale = torch.exp(log_scale) + 1e-5 base_distribution = distributions.Uniform(torch.zeros_like(loc), torch.ones_like(loc)) transforms = [ LogisticTransform(), distributions.AffineTransform(loc=loc, scale=scale) ] logistic = distributions.TransformedDistribution(base_distribution, transforms) return logistic
def transformed_dist(self): """ Returns the unconstrained distribution. :return: Transformed distribution :rtype: dist.TransformedDistribution """ if not self.trainable: raise ValueError('Is not of `Distribution` instance!') return dist.TransformedDistribution(self._prior, [self.bijection.inv])
def __init__(self, dim, blocks, generative_layers, args, target_density=distrib.MultivariateNormal, learn_top=False, y_condition=False): """ Initialize normalizing flow """ super(GenerativeFlow, self).__init__(dim, blocks, generative_layers, target_density, 'none') biject = [] self.n_params = [] self.output_shapes = [] self.target_density = target_density # Get input size C, H, W = args.input_size # Create the L layers for l in range(generative_layers): C, H, W = C * 4, H // 2, W // 2 self.output_shapes.append([-1, C, H, W]) for b_flow in blocks: cur_block = b_flow(C, amortized='none') biject.append(cur_block) self.n_params.append(cur_block.n_parameters()) C = C // 2 C, H, W = C * 4, H // 2, W // 2 self.output_shapes.append([-1, C, H, W]) # Add a last layer (avoiding last block) for b_flow in blocks[:-1]: cur_block = b_flow(C, amortized='none') biject.append(cur_block) self.n_params.append(cur_block.n_parameters()) self.transforms = transform.ComposeTransform(biject) self.bijectors = nn.ModuleList(biject) self.final_density = distrib.TransformedDistribution( target_density, self.transforms) self.dim = dim # self.y_classes = hparams.Glow.y_classes self.learn_top = learn_top self.y_condition = y_condition # for prior if self.learn_top: self.top_layer = nn.Conv2d(C * 2, C * 2) if self.y_condition: self.project_ycond = LinearZeros(y_classes, 2 * C) self.project_class = LinearZeros(C, y_classes) # Register learnable prior self.prior_h = nn.Parameter(torch.zeros([args.batch_size, C * 2, H, W]))
def __init__(self, dim, flow_func, flow_length, base_density): super().__init__() biject = [] if flow_func == "planar": for iflow in range(flow_length): biject.append(PlanarFlow(dim)) else: raise ValueError("Unrecognized Flow Function.") self.transforms = transform.ComposeTransform(biject) self.bijectors = nn.ModuleList(biject) self.base_density = base_density self.final_density = distrib.TransformedDistribution( base_density, self.transforms) self.log_det = []
def _normal_dist(self, means, stds): normal_dist = dist_utils.DiagMultivariateNormal(loc=means, scale=stds) if self._scale_distribution: # The transformed distribution can also do reparameterized sampling # i.e., `.has_rsample=True` # Note that in some cases kl_divergence might no longer work for this # distribution! Assuming the same `transforms`, below will work: # ```` # kl_divergence(Independent, Independent) # # kl_divergence(TransformedDistribution(Independent, transforms), # TransformedDistribution(Independent, transforms)) # ```` squashed_dist = td.TransformedDistribution( base_distribution=normal_dist, transforms=self._transforms) return squashed_dist else: return normal_dist
def __init__(self, dim, blocks, flow_length, density=None, amortized='none', amortize_dim=None): """ Initialize normalizing flow """ super().__init__() biject = [] self.n_params = [] # Start density (z0) if density is None: density = MultivariateNormal(torch.zeros(dim), torch.eye(dim)) self.base_density = density blocks_tmp = [] for f in range(flow_length): current_block = [] for b_flow in blocks: cur_block = b_flow(dim, amortized=amortized) self.n_params.append(cur_block.n_parameters()) current_block.append(cur_block) biject.extend(current_block) blocks_tmp.append(FlowList(current_block)) # Full set of transforms self.blocks = blocks_tmp self.transforms = transform.ComposeTransform(biject) self.bijectors = FlowList(biject) # Final density (zK) defined as transformed distribution self.final_density = distrib.TransformedDistribution( density, self.transforms) self.amortized = amortized # Handle different amortizations amortize_dim = amortize_dim or dim if amortized in ('self', 'input', 'auxiliary'): self.amortized_seed = torch.ones(1, amortize_dim).detach() self.amortized_params = self.parameters_network( amortize_dim, self.n_parameters()) self.log_det = [] self.dim = dim
def test_transformed(self): normal_dist = dist_utils.DiagMultivariateNormal( torch.tensor([[1., 2.], [2., 2.]]), torch.tensor([[2., 3.], [1., 1.]])) transforms = [dist_utils.SigmoidTransform()] dist = td.TransformedDistribution(base_distribution=normal_dist, transforms=transforms) spec = dist_utils.DistributionSpec.from_distribution(dist) params1 = { 'loc': torch.tensor([[0.5, 1.5], [1.0, 1.0]]), 'scale': torch.tensor([[2., 4.], [2., 1.]]) } dist1 = spec.build_distribution(params1) self.assertEqual(type(dist1), td.TransformedDistribution) self.assertEqual(dist1.event_shape, dist.event_shape) self.assertEqual(dist1.transforms, transforms) self.assertEqual(type(dist1.base_dist), dist_utils.DiagMultivariateNormal) self.assertEqual(type(dist1.base_dist.base_dist), td.Normal) self.assertEqual(dist1.base_dist.base_dist.mean, params1['loc']) self.assertEqual(dist1.base_dist.base_dist.stddev, params1['scale'])
def _builder_transformed(base_builder, transforms, **kwargs): return td.TransformedDistribution(base_builder(**kwargs), transforms)
def set_num_terms(self, n_terms): for block in self.stack: for layer in block.stack: layer.numSeriesTerms = n_terms if __name__ == "__main__": scale = 1. loc = 0. base_distribution = distributions.Uniform(0., 1.) transforms_1 = [ distributions.SigmoidTransform().inv, distributions.AffineTransform(loc=loc, scale=scale) ] logistic_1 = distributions.TransformedDistribution(base_distribution, transforms_1) transforms_2 = [ LogisticTransform(), distributions.AffineTransform(loc=loc, scale=scale) ] logistic_2 = distributions.TransformedDistribution(base_distribution, transforms_2) x = torch.zeros(2) print(logistic_1.log_prob(x), logistic_2.log_prob(x)) 1 / 0 diff = lambda x, y: (x - y).abs().sum() batch_size = 13 channels = 3
def __init__(self, transforms, flow, base_dist, batchnorms=None): self.dist = dist.TransformedDistribution(base_dist, flow) self.modules = nn.ModuleList(transforms) if batchnorms is not None: self.modules = self.modules.extend(batchnorms)
def sample(args): """ Performs the following: 1. construct model object & load state dict from saved model; 2. make H x W samples from a set of gaussian or logistic prior on the latent space; 3. save to disk as a grid of images. """ # parse settings: if args.dataset == 'mnist': input_dim = 28 * 28 img_height = 28 img_width = 28 img_depth = 1 if args.dataset == 'svhn': input_dim = 32 * 32 * 3 img_height = 32 img_width = 32 img_depth = 3 if args.dataset == 'cifar10': input_dim = 32 * 32 * 3 img_height = 32 img_width = 32 img_depth = 3 if args.dataset == 'tfd': raise NotImplementedError( "[sample] Toronto Faces Dataset unsupported right now. Sorry!") input_dim = None img_height = None img_width = None img_depth = None # shut off gradients for sampling: torch.set_grad_enabled(False) # build model & load state dict: nice = NICEModel(input_dim, args.nhidden, args.nlayers) if args.model_path is not None: nice.load_state_dict(torch.load(args.model_path, map_location='cpu')) print("[sample] Loaded model from file.") nice.eval() # sample a batch: if args.prior == 'logistic': LOGISTIC_LOC = 0.0 LOGISTIC_SCALE = (3. / (np.pi**2)) # (sets variance to 1) logistic = dist.TransformedDistribution(dist.Uniform(0.0, 1.0), [ dist.SigmoidTransform().inv, dist.AffineTransform(loc=LOGISTIC_LOC, scale=LOGISTIC_SCALE) ]) print( "[sample] sampling from logistic prior with loc={0:.4f}, scale={1:.4f}." .format(LOGISTIC_LOC, LOGISTIC_SCALE)) ys = logistic.sample(torch.Size([args.nrows * args.ncols, input_dim])) xs = nice.inverse(ys) if args.prior == 'gaussian': print("[sample] sampling from gaussian prior with loc=0.0, scale=1.0.") ys = torch.randn(args.nrows * args.ncols, input_dim) xs = nice.inverse(ys) # format sample into images of correct shape: image_batch = unflatten_images(xs, img_depth, img_height, img_width) # arrange into a grid and save to file: torchvision.utils.save_image(image_batch, args.save_image_path, nrow=args.nrows) print("[sample] Saved {0}-by-{1} sampled images to {2}.".format( args.nrows, args.ncols, args.save_image_path))
def _dist(self, loc, scale, inv_scale, log_sqrt_vals, event_shape): zeros = torch.zeros((), device=loc.device, dtype=loc.dtype).expand(event_shape) return td.TransformedDistribution( td.Normal(zeros, zeros + 1), PCATransform(loc, scale, inv_scale, log_sqrt_vals))
def _sigmoid_gaussian(self, loc: torch.tensor, scale: torch.tensor): distribution = D.Normal(loc, scale) transform = D.transforms.SigmoidTransform() return D.TransformedDistribution(distribution, transform)