def _compute_embedding_grad_sample(layer, A, B, batch_dim=0): one_hot = F.one_hot(A, num_classes=layer.weight.shape[0]) gs = torch.einsum("n...i,n...j->n...ij", one_hot, B) _create_or_extend_grad_sample( layer.weight, torch.einsum("n...ij->nij", gs), batch_dim )
def sample(self, batch_size: int) -> torch.Tensor: z = ( self.z.repeat(batch_size) if isinstance(self.z, torch.Tensor) else torch.randint(self.num_skills, (batch_size,)) ) return F.one_hot(z, num_classes=self.num_skills,).float().to(self.device)
def one_hot(space, x): if isinstance(space, list) or isinstance(space, tuple): return tuple([OneHot.one_hot(s, xp) for s, xp in zip(space, x)]) elif isinstance(space, Tuple): return OneHot.one_hot(space.spaces, x) elif isinstance(space, Dict): return {k: OneHot.one_hot(s, x[k]) for k, s in space.spaces.items()} elif isinstance(space, MultiDiscrete): return torch.cat( OneHot.one_hot([Discrete(d) for d in space.nvec], x.split(1, dim=1)), dim=1, ) elif isinstance(space, Discrete): return F.one_hot(x.squeeze(1).long(), space.n).float() else: return x
def _compute_embedding_grad_sample(layer: nn.Embedding, A: torch.Tensor, B: torch.Tensor, batch_dim: int = 0) -> None: """ Computes per sample gradients for ``nn.Embedding`` layer Args: layer: Layer A: Activations B: Backpropagations batch_dim: Batch dimension position """ one_hot = F.one_hot(A, num_classes=layer.weight.shape[0]) gs = torch.einsum("n...i,n...j->n...ij", one_hot, B) _create_or_extend_grad_sample(layer.weight, torch.einsum("n...ij->nij", gs), batch_dim)
def one_hot(inputs, num_classes, dtype=None): """Take LongTensor with index values of shape.""" return F.one_hot(inputs, num_classes)
return dist_term def _regularization_term(self, c_means, n_clusters): bs, n_features, max_n_clusters = c_means.size() reg_term = 0 for i in range(bs): # n_features, n_clusters mean_sample = c_means[i, :, :n_clusters[i]] reg_term = reg_term + torch.mean( torch.norm(mean_sample, self.norm, 0)) reg_term = reg_term / bs return reg_term if __name__ == '__main__': input = torch.rand((4, 5, 512, 256)).cuda() target = torch.randint(0, 3, (4, 512, 256)) target_one_hot = F.one_hot(target) target_label = target_one_hot.permute([0, 3, 1, 2]) print(target_label.shape) padding = torch.zeros((4, 2, 512, 256)).type(torch.LongTensor) target = torch.cat((target_label, padding), dim=1).cuda() print(target.shape) print(target.shape) loss = DiscriminativeLoss(device_cuda=True) result = loss(input, target, [5, 5, 5, 5]) print(result)