def gauss(args, mu, std): radius = torch.Tensor([args.radius]).to(args.dev) mu = clamp(mu, min=-max_clamp_norm, max=max_clamp_norm) mu_h = exp_map_mu0(expand_proj_dims(mu), radius) p_z = HyperboloidWrappedNormal(radius, mu_h, std) # Map x, y coordinates on tangent space at origin to manifold (Lorentz model). x = np.arange(-5, 5, 0.1) y = np.arange(-5, 5, 0.1) x, y = np.meshgrid(x, y) x = torch.Tensor(x).view(-1, 1) y = torch.Tensor(y).view(-1, 1) twodim = torch.cat([x, y], dim=1) threedim = expand_proj_dims(twodim) clamped_threedim = clamp(threedim, min=-max_clamp_norm, max=max_clamp_norm).to(args.dev) on_mani = exp_map_mu0(clamped_threedim, radius) # Calculate densities of x, y coords on Lorentz model. probs = p_z.log_prob(on_mani) probs = torch.exp(probs) # Calculate the poincare coordinates xy_poincare = lorentz_to_poincare(on_mani.squeeze(), radius) mu_p = lorentz_to_poincare(mu_h, radius) plot_density(xy_poincare, probs, args.radius, args.namestr, mu=mu_p) if args.flow != 'none': plot_flow(args, radius, args.flow, p_z, args.namestr, args.n_blocks)
def encode(self, *args, **kwargs): """""" # The sample is already reparameterized node_feats, edge_index = args[0], args[1] z, self.__mu_h__, self.__std__, self.posterior_parts = self.encoder(*args, **kwargs) # TODO: Double check this masking self.mask = (self.__std__.sum(dim=-1) != 1).int().unsqueeze(1) z = self.mask * clamp(z, min=-max_clamp_norm, max=max_clamp_norm) if self.encoder.flow_model: self.encoder.flow_model.base_dist_mean = self.__mu_h__ self.encoder.flow_model.base_dist_var = self.__std__ z_k, sum_log_det_jac = self.encoder.flow_model.inverse(z, edge_index) self.sum_log_det_jac = sum_log_det_jac z_k = clamp(z_k, min=-max_clamp_norm, max=max_clamp_norm) # This is what gets used in KL Calculation as prior is # WrappedGaussian and should not be on the Tangent Space self.z_k = z_k else: self.z_k = z z_k = z if self.decoder_name not in ['fermi', 'tanh', 'distance', 'softmax']: # Log-map z back to \mathcal{T}_{\textbf{o}}\mathbb{H} z_mu0 = inverse_exp_map_mu0(z_k, self.encoder.radius) self.decoder_logdet = logmap_logdet(z_mu0, self.encoder.radius) return z, z_mu0 # if self.deterministic: # mu_h = clamp(self.__mu_h__, min=-max_clamp_norm, max=max_clamp_norm) # return mu_h, mu_h return z, z_k
def train_potential_flow(flow_model, n_blocks, radius, target): flow_model = kwargs_flows[flow_model](n_blocks, 2, 128, 1, layer_type='Linear', radius=torch.tensor(radius)).cuda() flow_opt = optim.Adam(flow_model.parameters(), lr=1e-2) sample_shape = torch.Size([10000]) num_samples = torch.Size([256]) mu_0_shape = torch.Size([1, 3]) std_0_shape = torch.Size([1, 2]) prior = HyperboloidWrappedNormal(radius, torch.zeros(mu_0_shape).cuda(), torch.ones(std_0_shape).cuda()) train_loss_avg = [] for epoch in range(0, 1000): flow_opt.zero_grad() z_0 = prior.rsample(num_samples).squeeze() z_0 = clamp(z_0, min=-max_clamp_norm, max=max_clamp_norm) q_log_prob = prior.log_prob(z_0) z_hyper, logdet = flow_model.inverse(z_0) z_hyper = clamp(z_hyper, min=-max_clamp_norm, max=max_clamp_norm) z_k = inverse_exp_map_mu0(z_hyper, radius) z_mu0 = z_k[..., 1:] logdet += logmap_logdet(z_k, radius) p_log_prob = -1 * target(z_mu0) loss = (q_log_prob - p_log_prob - logdet).mean() loss.backward() flow_opt.step() print("Epoch:{} Loss:{}".format(epoch, loss.item())) return flow_model
def sample_projection_mu0( x: Tensor, at_point: Tensor, radius: Tensor) -> Tuple[Tensor, Tuple[Tensor, Tensor]]: x_expanded = expand_proj_dims(x) pt = parallel_transport_mu0(x_expanded, dst=at_point, radius=radius) pt = clamp(pt, min=-max_clamp_norm, max=max_clamp_norm) x_proj = exp_map(pt, at_point=at_point, radius=radius) x_proj = clamp(x_proj, min=-max_clamp_norm, max=max_clamp_norm) return x_proj, (pt, x)
def inverse(self, z_hyper, edge_index=None): z = inverse_exp_map_mu0(z_hyper, self.radius) z_mu0 = z[..., 1:] log_det_J, x = z_mu0.new_zeros(z_mu0.shape[0]), z_mu0 log_det_J = logmap_logdet(z, self.radius) preclamp_norm_list = [] for i in range(0, self.n_blocks): x_ = x * self.mask[i] if self.layer_type != 'Linear': s = self.s[i](x_, edge_index) t_out = self.t[i](x_, edge_index) else: s = self.s[i](x_) t_out = self.t[i](x_) t_proj = proj_vec(t_out, self.radius) t1, t_rest = t_proj[:, 0].unsqueeze(1), t_proj[:, 1:] t = self.create_masked_t((1 - self.mask[i]), t1, t_rest) # (1-b) \odot \tilde{x} \odot exp(s(b \odot \tilde{x})) x_pt_arg = expand_proj_dims((1 - self.mask[i]) * x * torch.exp(s)) # (1-b) \odot \textnormal{PT}_{\textbf{o}\to t(b \odot \tilde{x}) pt = parallel_transport_mu0(x_pt_arg, dst=t, radius=self.radius) preclamp_norm = pt.max() pt = clamp(pt, min=-max_clamp_norm, max=max_clamp_norm) if pt.max() == max_clamp_norm: preclamp_norm_list.append(preclamp_norm) x_t = exp_map(x=pt, at_point=t, radius=self.radius) log_det_J += _logdet(pt, self.radius, subdim=(self.mask[i]).sum()) preclamp_norm = x_t.max() x_t = clamp(x_t, min=-max_clamp_norm, max=max_clamp_norm) if x_t.max() == max_clamp_norm: preclamp_norm_list.append(preclamp_norm) #\log_{\textbf{o}}(\textnormal{exp}_{t()}(\textnormal{PT}_{\textbf{o}\to t())) x_0_full = inverse_exp_map_mu0(x_t, self.radius) x_0 = x_0_full[..., 1:] log_det_J += logmap_logdet(x_0_full, self.radius, subdim=(self.mask[i]).sum()) x = x_ + (1 - self.mask[i]) * x_0 log_det_J += ((1 - self.mask[i]) * s).sum(dim=1) # log det dx/du preclamp_norm = x.max() x = clamp(x, min=-max_clamp_norm, max=max_clamp_norm) if x.max() == max_clamp_norm: preclamp_norm_list.append(preclamp_norm) x_mu0 = expand_proj_dims(x) # Project back to Manifold x = exp_map_mu0(x_mu0, self.radius) log_det_J += _logdet(x_mu0, self.radius) self.preclamp_norm = torch.Tensor([ sum(preclamp_norm_list) / len(preclamp_norm_list) ]) if preclamp_norm_list else self.preclamp_norm return x, log_det_J
def inverse_sample_projection_mu0(x: Tensor, at_point: Tensor, radius: Tensor) -> Tuple[Tensor, Tensor]: unmapped = inverse_exp_map(x, at_point=at_point, radius=radius) # if torch.isnan(unmapped).any(): # ipdb.set_trace() unmapped = clamp(unmapped, min=-max_clamp_norm, max=max_clamp_norm) unpt = inverse_parallel_transport_mu0(unmapped, src=at_point, radius=radius) unpt = clamp(unpt, min=-max_clamp_norm, max=max_clamp_norm) return unmapped, unpt[..., 1:]
def some_density(args): radius = torch.Tensor([args.radius]).cuda() n_pts = 100 f1 = lambda z: torch.sin(6 * math.pi * z[:, 0] / 4) f2 = lambda z: 3 * torch.exp(-0.5 * ((z[:, 0] - 1) / 0.6)**2) f3 = lambda z: 3 * torch.sigmoid((z[:, 0] - 1) / 0.3) xx, yy, zz = setup_grid(5, n_pts) base_prob_dist = -f1(zz) # Map x, y coordinates on tangent space at origin to manifold (Lorentz model). twodim = zz threedim = expand_proj_dims(twodim).cuda() clamped_threedim = clamp(threedim, min=-max_clamp_norm, max=max_clamp_norm).cuda() on_mani = exp_map_mu0(clamped_threedim, radius) # Calculate densities of x, y coords on Lorentz model. log_det = _logdet(clamped_threedim, radius) log_probs = base_prob_dist - log_det probs = torch.exp(log_probs) # Calculate the poincare coordinates xy_poincare = lorentz_to_poincare(on_mani.squeeze(), radius) plot_density(xy_poincare, probs, radius, args.namestr) if args.flow != 'none': plot_flow(args, radius, args.flow, f1, args.namestr)
def inverse(self, z_hyper): z = inverse_exp_map_mu0(z_hyper, self.radius) z_mu0 = z[..., 1:] log_det_J, x = z_mu0.new_zeros(z_mu0.shape[0]), z_mu0 log_det_J = logmap_logdet(z, self.radius) for i in range(0, self.n_blocks): if i > 0: # Project between Flow Layers x_proj_mu0 = inverse_exp_map_mu0(x, self.radius) x = x_proj_mu0[..., 1:] log_det_J += logmap_logdet(x_proj_mu0, self.radius) x_ = x * self.mask[i] if self.layer_type != 'Linear': s = self.s[i](x_, edge_index) t = self.t[i](x_, edge_index) else: s = self.s[i](x_) t = self.t[i](x_) x = x_ + (1 - self.mask[i]) * (x * torch.exp(s) + t) self.preclamp_norm = x.max() x = clamp(x, min=-max_clamp_norm, max=max_clamp_norm) log_det_J += ((1 - self.mask[i]) * s).sum(dim=1) # log det dx/du x_mu0 = expand_proj_dims(x) # Project back to Manifold x = exp_map_mu0(x_mu0, self.radius) log_det_J += _logdet(x_mu0, self.radius) return x, log_det_J
def forward(self, x_hyper, edge_index=None): x = inverse_exp_map_mu0(x_hyper, self.radius) x_mu0 = x[..., 1:] log_det_J, z = x.new_zeros(x_mu0.shape[0]), x_mu0 log_det_J = -1 * logmap_logdet(x, self.radius) for i in reversed(range(0, self.n_blocks)): z_ = self.mask[i] * z if self.layer_type != 'Linear': s = self.s[i](z_, edge_index) t_out = self.t[i](z_, edge_index) else: s = self.s[i](z_) t_out = self.t[i](z_) t_proj = proj_vec(t_out, self.radius) t1, t_rest = t_proj[:, 0].unsqueeze(1), t_proj[:, 1:] t = self.create_masked_t((1 - self.mask[i]), t1, t_rest) z_2 = expand_proj_dims((1 - self.mask[i]) * z) z_2 = clamp(z_2, min=-max_clamp_norm, max=max_clamp_norm) z_exp_2 = exp_map_mu0(z_2, self.radius) log_det_J -= _logdet(z_2, self.radius, subdim=(self.mask[i]).sum()) z_exp_2 = clamp(z_exp_2, min=-max_clamp_norm, max=max_clamp_norm) z_inv_pt_arg = inverse_exp_map(x=z_exp_2, at_point=t, radius=self.radius) log_det_J -= logmap_logdet(z_inv_pt_arg, self.radius, subdim=(self.mask[i]).sum()) z_inv_pt_arg = clamp(z_inv_pt_arg, min=-max_clamp_norm, max=max_clamp_norm) pt = inverse_parallel_transport_mu0(z_inv_pt_arg, src=t, radius=self.radius) pt = pt[..., 1:] z = (1 - self.mask[i]) * pt * torch.exp(-s) + z_ log_det_J -= ((1 - self.mask[i]) * s).sum(dim=1) z_mu0 = expand_proj_dims(z) z = exp_map_mu0(z_mu0, self.radius) log_det_J -= _logdet(z_mu0, self.radius) return z, log_det_J
def forward(self, x): sum_log_det_jac = 0 z, mu_h, std = self.encode(x) z = clamp(z, min=-max_clamp_norm, max=max_clamp_norm) ### Flow ### if self.flow_model: self.flow_model.base_dist_mean = mu_h self.flow_model.base_dist_var = std self.flow_model.radius = self.radius z_k, sum_log_det_jac = self.flow_model.inverse(z) z_k = clamp(z_k, min=-max_clamp_norm, max=max_clamp_norm) else: z_k = z kld = self.kl_loss(self.q_z, self.p_z, z, z_k, self.data) z_mu0 = inverse_exp_map_mu0(z_k, self.radius) # This is not really the same KL Divergence and can be negative kld = kld - sum_log_det_jac - logmap_logdet(z_mu0, self.radius) x_tilde = self.decode(z_mu0) return x_tilde, kld
def rsample_log_probs( self, sample_shape: torch.Size, q_z: HyperboloidWrappedNormal, p_z: HyperboloidWrappedNormal) -> Tuple[Tensor, Tensor, Tensor]: sum_log_det_jac = 0 z, posterior_parts = q_z.rsample_with_parts(sample_shape) z = clamp(z, min=-max_clamp_norm, max=max_clamp_norm) if self.flow_model: z_k = z.view(-1, self.z_dim + 1) z_k, sum_log_det_jac = self.flow_model.inverse(z_k) z_k = clamp(z_k, min=-max_clamp_norm, max=max_clamp_norm) z_k = z_k.view(sample_shape[0], -1, self.z_dim + 1) sum_log_det_jac = sum_log_det_jac.view(sample_shape[0], -1) else: z_k = z z_mu0 = inverse_exp_map_mu0(z_k, self.radius) log_q_z_x, log_p_z_k = self._log_prob(q_z, p_z, z, z_k, posterior_parts) log_q_z_k_x = log_q_z_x - sum_log_det_jac - logmap_logdet( z_mu0, self.radius) log_p_z_k = log_p_z_k - logmap_logdet(z_mu0, self.radius) return z_mu0, log_q_z_k_x, log_p_z_k
def train_flow(args, flow_model, radius, target, clamped_threedim, on_mani): flow_model = kwargs_flows[flow_model](4, 2, 32, 1, layer_type='Linear', radius=torch.tensor(radius)).cuda() flow_opt = optim.Adam(flow_model.parameters()) sample_shape = torch.Size([10000]) z, posterior_parts = target.rsample_with_parts(sample_shape) z = clamp(z, min=-max_clamp_norm, max=max_clamp_norm) train_dataset = FlowDataset(z) train_loader = data.DataLoader(train_dataset, batch_size=512) train_loss_avg = [] for epoch in range(0, args.flow_epochs): train_loss_avg.append(0) for batch_idx, data_batch in enumerate(train_loader): data_batch = data_batch.cuda() flow_model.base_dist_mean = torch.zeros_like(data_batch).cuda() flow_model.base_dist_var = torch.ones(data_batch.shape[0], 2).cuda() flow_opt.zero_grad() loss = -1 * flow_model.log_prob(data_batch).mean() loss.backward() flow_opt.step() train_loss_avg[-1] += loss.item() train_loss_avg[-1] /= len(train_loader.dataset) print("Loss:{}".format(train_loss_avg[-1])) print("Epoch:{}".format(epoch)) # Calculate densities of x, y coords on Lorentz model. flow_model.base_dist_mean = torch.zeros_like(on_mani).cuda() flow_model.base_dist_var = torch.ones(on_mani.shape[0], 2).cuda() probs = flow_model.log_prob(on_mani) probs += logmap_logdet(clamped_threedim.cuda(), radius) probs = torch.exp(probs) on_mani_conv = on_mani.detach().cpu() # Calculate the poincare coordinates xy_poincare = lorentz_to_poincare(on_mani.squeeze(), radius) namestr = args.namestr + str(epoch) plot_density(xy_poincare, probs, flow_model.radius, namestr) return flow_model
def forward(self, x, edge_index): x = F.relu(self.conv1(x, edge_index)) mu = self.conv_mu(x, edge_index) logvar = self.conv_logvar(x, edge_index) mu = clamp(mu, min=-max_clamp_norm, max=max_clamp_norm) assert torch.isfinite(mu).all() assert torch.isfinite(logvar).all() mu_h = exp_map_mu0(expand_proj_dims(mu), self.radius) assert torch.isfinite(mu_h).all() # +eps prevents collapse std = F.softplus(logvar) + 1e-5 assert torch.isfinite(std).all() self.q_z, self.p_z = self.reparametrize(mu_h, std) z_0, data = self.q_z.rsample_with_parts() return z_0, mu_h, std, data
def bottleneck(self, h): mu, logvar = self.fc_mean(h), self.fc_logvar(h) mu = clamp(mu, min=-max_clamp_norm, max=max_clamp_norm) assert torch.isfinite(mu).all() assert torch.isfinite(logvar).all() mu_h = exp_map_mu0(expand_proj_dims(mu), self.radius) assert torch.isfinite(mu_h).all() # +eps prevents collapse std = F.softplus(logvar) + 1e-5 assert torch.isfinite(std).all() q_z, p_z = self.reparametrize(mu_h, std) self.q_z = q_z self.p_z = p_z z, data = q_z.rsample_with_parts() self.data = data return z, mu_h, std
def mixture(args): radius = torch.Tensor([args.radius]).to(args.dev) samples = sample_2d_data(args.dataset, 100000).to(args.dev) samples = clamp(samples, min=-max_clamp_norm, max=max_clamp_norm) xi = samples[:, 0].detach().cpu().numpy() yi = samples[:, 1].detach().cpu().numpy() samples_h = exp_map_mu0(expand_proj_dims(samples), radius) # Calculate the poincare coordinates xy_poincare = lorentz_to_poincare(samples_h.squeeze(), radius) fig = plt.figure() ax = fig.add_subplot(111) x = xy_poincare[:, 0].view(-1, 100).detach().cpu() y = xy_poincare[:, 1].view(-1, 100).detach().cpu() p_z = None # Define points within circle range_lim = 5 ax.hist2d(xy_poincare[:, 0].detach().cpu().numpy(), xy_poincare[:, 1].detach().cpu().numpy(), range=[[-range_lim, range_lim], [-range_lim, range_lim]], bins=5000, cmap='magma') # ax.contourf(x, y, z, 100, antialiased=False, cmap='magma') ax.axis('off') # Makes the circle look like a circle ax.axis('equal') ax.set_xlim(-args.axis_lim, args.axis_lim) ax.set_ylim(-args.axis_lim, args.axis_lim) # Save the full figure... fig.savefig('install/{}.png'.format(args.namestr)) print("saved to install/{}.png".format(args.namestr)) if args.flow != 'none': plot_flow(args, radius, args.flow, p_z, args.namestr, n_blocks=args.n_blocks, samples=samples_h)
def MC_log_likelihood(self, x): """ :param x: Mini-batch of inputs. :param n: Number of MC samples :return: Monte Carlo estimate of log-likelihood. """ n = self.K sample_shape = torch.Size([n]) batch_size = x.shape[0] prob_shape = torch.Size([n, batch_size]) x_encoded = self.encoder(x) mu, logvar = self.fc_mean(x_encoded), self.fc_logvar(x_encoded) mu = clamp(mu, min=-max_clamp_norm, max=max_clamp_norm) mu_h = exp_map_mu0(expand_proj_dims(mu), self.radius) # +eps prevents collapse std = F.softplus(logvar) + 1e-5 q_z, p_z = self.reparametrize(mu_h, std) log_p_z = torch.zeros(prob_shape, device=x.device) log_q_z_x = torch.zeros(prob_shape, device=x.device) # Numerically more stable. z, log_q_z_x, log_p_z = self.rsample_log_probs(sample_shape, q_z, p_z) z = inverse_exp_map_mu0(z, self.radius) log_q_z_x = log_q_z_x - logmap_logdet(z, self.radius) x_mb_ = self.decode(z) x_orig = x.repeat((n, 1, 1)) log_p_x_z = -self.recon_loss(x_mb_, x_orig).sum(dim=-1) assert log_p_x_z.shape == log_p_z.shape assert log_q_z_x.shape == log_p_z.shape joint = (log_p_x_z + log_p_z - log_q_z_x) log_p_x = joint.logsumexp(dim=0) - np.log(n) assert log_q_z_x.shape == log_p_z.shape mi = (log_q_z_x - log_p_z).logsumexp(dim=0) - np.log(n) return log_p_x, mi
def plot_flow(args, radius, flow, target, namestr, n_blocks=2, samples=None): fig = plt.figure() ax = fig.add_subplot(555) # Map x, y coordinates on tangent space at origin to manifold (Lorentz model). x = torch.linspace(-5, 5, 100) xx, yy = torch.meshgrid((x, x)) # x = np.arange(-5, 5, 0.1) # y = np.arange(-5, 5, 0.1) # x, y = np.meshgrid(x, y) # x = torch.Tensor(x).view(-1, 1) # y = torch.Tensor(y).view(-1, 1) twodim = torch.stack((xx.flatten(), yy.flatten()), dim=1) # twodim = torch.cat([x, y], dim=1) threedim = expand_proj_dims(twodim) clamped_threedim = clamp(threedim, min=-max_clamp_norm, max=max_clamp_norm).to(args.dev) on_mani = exp_map_mu0(clamped_threedim, radius).cuda() # flow_model = train_potential_flow(flow, radius, target) if samples is not None: flow_model = train_flow_density(args, flow, n_blocks, radius, samples, clamped_threedim, on_mani) else: flow_model = train_flow(args, flow, radius, target, clamped_threedim, on_mani) # Calculate densities of x, y coords on Lorentz model. flow_model.base_dist_mean = torch.zeros_like(on_mani).cuda() flow_model.base_dist_var = torch.ones(on_mani.shape[0], 2).cuda() probs = flow_model.log_prob(on_mani) probs += logmap_logdet(clamped_threedim.cuda(), radius) probs = torch.exp(probs) on_mani_conv = on_mani.detach().cpu() # Calculate the poincare coordinates xy_poincare = lorentz_to_poincare(on_mani.squeeze(), radius) plot_density(xy_poincare, probs, flow_model.radius, namestr, flow=flow)