def forward( self, theta: torch.Tensor, # (N, D) x: torch.Tensor, # (N, *) mask: torch.BoolTensor = None, # (D,) ) -> torch.Tensor: if mask is None: mask = self.default elif self.hyper is not None: self.hyper(self.net, mask * 2. - 1.) if mask.dim() == 1 and theta.size(-1) < mask.numel(): blank = theta.new_zeros(theta.shape[:-1] + mask.shape) blank[..., mask] = theta theta = blank elif mask.dim() > 1 and theta.shape != mask.shape: batch_shape = theta.shape[:-1] stack_shape = batch_shape + mask.shape[:-1] view_shape = batch_shape + (1, ) * (mask.dim() - 1) theta = theta.view(view_shape + theta.shape[-1:]).expand(stack_shape + theta.shape[-1:]) x = x.view(view_shape + x.shape[-1:]).expand(stack_shape + x.shape[-1:]) theta = self.standardize(theta) * mask if self.hyper is None: theta = torch.cat(torch.broadcast_tensors(theta, mask * 2. - 1.), dim=-1) return self.net(theta, x)
def replace_masked_values( tensor: torch.Tensor, mask: torch.BoolTensor, replace_with: float ) -> torch.Tensor: if tensor.dim() != mask.dim(): raise ConfigurationError( "tensor.dim() (%d) != mask.dim() (%d)" % (tensor.dim(), mask.dim()) ) return tensor.masked_fill(~mask, replace_with)
def masked_log_softmax(vector: torch.Tensor, mask: torch.BoolTensor, dim: int = -1) -> torch.Tensor: """ `torch.nn.functional.log_softmax(vector)` does not work if some elements of `vector` should be masked. This performs a log_softmax on just the non-masked portions of `vector`. Passing `None` in for the mask is also acceptable; you'll just get a regular log_softmax. `vector` can have an arbitrary number of dimensions; the only requirement is that `mask` is broadcastable to `vector's` shape. If `mask` has fewer dimensions than `vector`, we will unsqueeze on dimension 1 until they match. If you need a different unsqueezing of your mask, do it yourself before passing the mask into this function. In the case that the input vector is completely masked, the return value of this function is arbitrary, but not `nan`. You should be masking the result of whatever computation comes out of this in that case, anyway, so the specific values returned shouldn't matter. Also, the way that we deal with this case relies on having single-precision floats; mixing half-precision floats with fully-masked vectors will likely give you `nans`. If your logits are all extremely negative (i.e., the max value in your logit vector is -50 or lower), the way we handle masking here could mess you up. But if you've got logit values that extreme, you've got bigger problems than this. """ if mask is not None: while mask.dim() < vector.dim(): mask = mask.unsqueeze(1) # vector + mask.log() is an easy way to zero out masked elements in logspace, but it # results in nans when the whole vector is masked. We need a very small value instead of a # zero in the mask for these cases. vector = vector + (mask + 1e-30).log() return torch.nn.functional.log_softmax(vector, dim=dim)
def forward(self, features, hidden, is_start: torch.BoolTensor, additional=None): ''' Forward pass with sanity check. Args: features: list of tensor. Current feature map. hidden: list of tensor or None. The previous hidden state. is_start: a batch of bool tensors indicating if the input x is the \ start of a video. ''' assert is_start.dim() == 1 and is_start.dtype == torch.bool if hidden is None: hidden = [ torch.zeros_like(features[i]) for i in range(self.num_levels) ] else: assert len(features) == len(hidden) == self.num_levels # if any image in the batch is a start of a video, # reset the corresponding hidden state if is_start.any(): for level_hid in hidden: assert level_hid.shape[0] == len(is_start) level_hid[is_start].zero_() fused = self.fuse(features, hidden, additional) return fused
def replace_masked_values(tensor: torch.Tensor, mask: torch.BoolTensor, replace_with: float) -> torch.Tensor: """ # Reference : allennlp Replaces all masked values in `tensor` with `replace_with`. `mask` must be broadcastable to the same shape as `tensor`. We require that `tensor.dim() == mask.dim()`, as otherwise we won't know which dimensions of the mask to unsqueeze. This just does `tensor.masked_fill()`, except the pytorch method fills in things with a mask value of 1, where we want the opposite. You can do this in your own code with `tensor.masked_fill(~mask, replace_with)`. """ if tensor.dim() != mask.dim(): raise ConfigurationError("tensor.dim() (%d) != mask.dim() (%d)" % (tensor.dim(), mask.dim())) return tensor.masked_fill(~mask, replace_with)
def masked_softmax( vector: torch.Tensor, mask: torch.BoolTensor, dim: int = -1, memory_efficient: bool = False, ) -> torch.Tensor: if mask is None: result = torch.nn.functional.softmax(vector, dim=dim) else: while mask.dim() < vector.dim(): mask = mask.unsqueeze(1) if not memory_efficient: result = torch.nn.functional.softmax(vector * mask, dim=dim) result = result * mask result = result / ( result.sum(dim=dim, keepdim=True) + tiny_value_of_dtype(result.dtype) ) else: masked_vector = vector.masked_fill(~mask, min_value_of_dtype(vector.dtype)) result = torch.nn.functional.softmax(masked_vector, dim=dim) return result
def masked_softmax( vector: torch.Tensor, mask: torch.BoolTensor, dim: int = -1, memory_efficient: bool = False, ) -> torch.Tensor: """ `torch.nn.functional.softmax(vector)` does not work if some elements of `vector` should be masked. This performs a softmax on just the non-masked portions of `vector`. Passing `None` in for the mask is also acceptable; you'll just get a regular softmax. `vector` can have an arbitrary number of dimensions; the only requirement is that `mask` is broadcastable to `vector's` shape. If `mask` has fewer dimensions than `vector`, we will unsqueeze on dimension 1 until they match. If you need a different unsqueezing of your mask, do it yourself before passing the mask into this function. If `memory_efficient` is set to true, we will simply use a very large negative number for those masked positions so that the probabilities of those positions would be approximately 0. This is not accurate in math, but works for most cases and consumes less memory. In the case that the input vector is completely masked and `memory_efficient` is false, this function returns an array of `0.0`. This behavior may cause `NaN` if this is used as the last layer of a model that uses categorical cross-entropy loss. Instead, if `memory_efficient` is true, this function will treat every element as equal, and do softmax over equal numbers. """ if mask is None: result = torch.nn.functional.softmax(vector, dim=dim) else: while mask.dim() < vector.dim(): mask = mask.unsqueeze(1) if not memory_efficient: # To limit numerical errors from large vector elements outside the mask, we zero these out. result = torch.nn.functional.softmax(vector * mask, dim=dim) result = result * mask result = result / ( result.sum(dim=dim, keepdim=True) + tiny_value_of_dtype(result.dtype) ) else: masked_vector = vector.masked_fill(~mask, min_value_of_dtype(vector.dtype)) result = torch.nn.functional.softmax(masked_vector, dim=dim) return result
def forward(self, x, is_start: torch.BoolTensor = None, labels: List[ImageObjects] = None): ''' Forward pass Args: x: a batch of images, e.g. shape(8,3,608,608) is_start: a batch of bool tensors indicating if the input x is the \ start of a video. labels: a batch of ground truth ''' assert x.dim() == 4 if is_start is None: is_start = torch.zeros(x.shape[0], dtype=torch.bool, device=x.device) assert is_start.dim() == 1 and is_start.shape[0] == x.shape[0] self.img_size = x.shape[2:4] _hidden = {} # backbone features = self.backbone(x) if 'backbone' in self.hid_names: _hidden['backbone'] = [f.detach().clone() for f in features] # feature fusion features = self.fpn(features) if 'fpn' in self.hid_names: _hidden['fpn'] = [f.detach().clone() for f in features] # feature aggregation features = self.agg(features, self.hidden, is_start) # raw prediction all_branch_preds = self.rpn(features) if 'raw_pred' in self.hid_names: pred_copy = [] for level_pred in all_branch_preds: _copy = dict([(k, v.detach().clone()) for k, v in level_pred.items()]) pred_copy.append(_copy) _hidden['raw_pred'] = pred_copy # final prediction layer dts_all = [] losses_all = [] for i, raw_preds in enumerate(all_branch_preds): dts, loss = self.det_layers[i](raw_preds, self.img_size, labels) dts_all.append(dts) losses_all.append(loss) # merge the predictions from all feature levels batch_bbs = torch.cat([d['bbox'] for d in dts_all], dim=1).detach() batch_cls_idx = torch.cat([d['class_idx'] for d in dts_all], dim=1).detach() batch_scores = torch.cat([d['score'] for d in dts_all], dim=1).detach() batch_pred_objects = [] # iterate over every image in the batch for bbs, cls_idx, scores in zip(batch_bbs, batch_cls_idx, batch_scores): # initialize the pred objects in current image p_objs = ImageObjects(bboxes=bbs, cats=cls_idx, scores=scores, bb_format=self.bb_format, img_hw=self.img_size) batch_pred_objects.append(p_objs) if 'final_pred' in self.hid_names: _hidden['final_pred'] = batch_pred_objects self.hidden = _hidden if labels is None: return batch_pred_objects if self.check_gt_assignment: total_gt_num = sum([len(t) for t in labels]) assigned = sum(branch._assigned_num for branch in self.det_layers) assert assigned == total_gt_num, f'{assigned} != {total_gt_num}' self.loss_str = '' for m in self.det_layers: self.loss_str += m.loss_str + '\n' loss = sum(losses_all) return loss
def masked_log_softmax(vector: torch.Tensor, mask: torch.BoolTensor, dim: int = -1) -> torch.Tensor: if mask is not None: while mask.dim() < vector.dim(): mask = mask.unsqueeze(1) vector = vector + (mask + tiny_value_of_dtype(vector.dtype)).log() return torch.nn.functional.log_softmax(vector, dim=dim)
def _viterbi_decode( self, emissions: torch.FloatTensor, mask: torch.BoolTensor ) -> List[List[int]]: # emissions: (seq_length, batch_size, num_tags) # mask: (seq_length, batch_size) assert emissions.dim() == 3 and mask.dim() == 2 assert emissions.shape[:2] == mask.shape assert emissions.size(2) == self.num_tags assert mask[0].all() seq_length, batch_size = mask.shape # Start transition and first emission # shape: (batch_size, num_tags) score = self.start_transitions + emissions[0] history = [] # score is a tensor of size (batch_size, num_tags) where for every batch, # value at column j stores the score of the best tag sequence so far that ends # with tag j # history saves where the best tags candidate transitioned from; this is used # when we trace back the best tag sequence # Viterbi algorithm recursive case: we compute the score of the best tag sequence # for every possible next tag for i in range(1, seq_length): # Broadcast viterbi score for every possible next tag # shape: (batch_size, num_tags, 1) broadcast_score = score.unsqueeze(2) # Broadcast emission score for every possible current tag # shape: (batch_size, 1, num_tags) broadcast_emission = emissions[i].unsqueeze(1) # Compute the score tensor of size (batch_size, num_tags, num_tags) where # for each sample, entry at row i and column j stores the score of the best # tag sequence so far that ends with transitioning from tag i to tag j and emitting # shape: (batch_size, num_tags, num_tags) next_score = broadcast_score + self.transitions + broadcast_emission # Find the maximum score over all possible current tag # shape: (batch_size, num_tags) next_score, indices = next_score.max(dim=1) # Set score to the next score if this timestep is valid (mask == 1) # and save the index that produces the next score # shape: (batch_size, num_tags) score = torch.where(mask[i].unsqueeze(1), next_score, score) history.append(indices) # End transition score # shape: (batch_size, num_tags) score += self.end_transitions # Now, compute the best path for each sample # shape: (batch_size,) seq_ends = mask.long().sum(dim=0) - 1 best_tags_list = [] for idx in range(batch_size): # Find the tag which maximizes the score at the last timestep; this is our best tag # for the last timestep _, best_last_tag = score[idx].max(dim=0) best_tags = [best_last_tag.item()] # We trace back where the best last tag comes from, append that to our best tag # sequence, and trace it back again, and so on for hist in reversed(history[: seq_ends[idx]]): best_last_tag = hist[idx][best_tags[-1]] best_tags.append(best_last_tag.item()) # Reverse the order because we start from the last timestep best_tags.reverse() best_tags_list.append(best_tags) return best_tags_list