def concatenate_encoder_outputs(self, encoder_outputs: BaseModelOutputWithPastAndCrossAttentions, encoder_attention_mask: torch.LongTensor, passage_mask: Optional[torch.LongTensor] = None) -> \ Tuple[BaseModelOutputWithPastAndCrossAttentions, torch.LongTensor]: # batch x contexts x sequence_len x hidden_dim hidden_states = encoder_outputs['last_hidden_state'] assert hidden_states.shape[:2] == encoder_attention_mask.shape, \ f"Shapes of mask and hidden states do not match! {str(hidden_states.shape[:2])} vs {str(encoder_attention_mask.shape)}" # concatenate hidden states into the same dimension # supported subsequence types: document, question+document # batch x contexts x sequence_len x hidden_dim -> batch x contexts * (subsequence_len_without_padding !variable size) x hidden_dim if self.config.fusion_strategy == "passages": mask = encoder_attention_mask.bool().view( -1) & passage_mask.bool().view(-1) elif self.config.fusion_strategy == "allinputs": mask = encoder_attention_mask.bool().view(-1) hidden_states = hidden_states.view( -1, hidden_states.shape[-1])[mask].unsqueeze(0) # there is no padding now attention_mask = torch.ones(hidden_states.size()[:2], device=hidden_states.device, dtype=torch.long) encoder_outputs['last_hidden_state'] = hidden_states return encoder_outputs, attention_mask
def predict(self, inputs: torch.FloatTensor, mask: torch.LongTensor) -> List: bool_mask = mask.bool() if self.tag_projection: inputs = self.tag_projection(inputs) scores = inputs * mask.unsqueeze(-1) best_paths = self.base.viterbi_tags(scores, bool_mask, top_k=self.top_k) # Just get the top tags and ignore the scores. tags = cast(List[List[int]], [x[0][0] for x in best_paths]) return tags
def forward(self, embedded: torch.Tensor, attention_mask: torch.LongTensor) -> torch.Tensor: """ Args: embedded: batch_size, pad_len, embed_dim attention_mask: batch_size, pad_len Returns: batch_size, pad_len """ cosine = self.cosine_similarity_to_u(embedded) # batch_size, pad_len masked = cosine.masked_fill(~attention_mask.bool(), float('-inf')) attention = torch.softmax(masked, dim=1) # batch_size, pad_len return attention
def __call__(self, prediction_match_labels: torch.Tensor, gold_match_labels: torch.Tensor, mask: torch.LongTensor) -> Dict: """ 真正的计算 metric f1。 目前仅仅计算了全部的 F1, 也就是 f1-overall, 而对于每一个实体的指标并没有计算。 原始的论文中没有这一部分,所以暂时没有计算每一种类型实体的指标。 ToDo:// 增加每一种实体类型的指标计算 :param prediction_match_labels: 预测的 match label indices :param gold_match_labels: golden label indices :param mask: mask :return: metric dict """ mask = mask.bool() batch_size, seq_length = mask.size() match_label_mask = (mask.unsqueeze(-1).expand(-1, -1, seq_length) & mask.unsqueeze(1).expand(-1, seq_length, -1)) # 取上三角 match_label_mask = torch.triu( match_label_mask, 0) # start should be less or equal to end # 判断多少个是一样的 prediction_match_labels = prediction_match_labels & match_label_mask gold_match_labels = gold_match_labels & match_label_mask true_positive_value = (gold_match_labels & prediction_match_labels).long().sum() true_positives = {MRCF1Metric.All: true_positive_value} false_positive_value = (~gold_match_labels & prediction_match_labels).long().sum() false_positives = {MRCF1Metric.All: false_positive_value} false_negative_value = (gold_match_labels & ~prediction_match_labels).long().sum() false_negatives = {MRCF1Metric.All: false_negative_value} self._true_positives[MRCF1Metric.All] += true_positive_value self._false_positives[MRCF1Metric.All] += false_positive_value self._false_negatives[MRCF1Metric.All] += false_negative_value return self._metric(true_positives=true_positives, false_positives=false_positives, false_negatives=false_negatives)
def dot_product_self_attention( embedded: torch.Tensor, attention_mask: torch.LongTensor) -> torch.Tensor: """ Self attention computed from dot product with other embedded word vectors in the same sequence. Args: embedded: batch_size, pad_len, embed_dim attention_mask: batch_size, pad_len Returns: batch_size, pad_len """ summed_dot_prod = torch.bmm(embedded, embedded.transpose(1, 2)).sum( 2) # batch_size, pad_len masked = summed_dot_prod.masked_fill(~attention_mask.bool(), float('-inf')) attention = torch.softmax(masked, dim=1) # batch_size, pad_len return attention
def forward( self, inputs: torch.FloatTensor, mask: torch.LongTensor, labels: torch.LongTensor = None, reduction: str = None, ) -> Dict[str, Any]: bool_mask = mask.bool() if self.tag_projection: inputs = self.tag_projection(inputs) scores = inputs * mask.unsqueeze(-1) best_paths = self.base.viterbi_tags(scores, bool_mask, top_k=self.top_k) # Just get the top tags and ignore the scores. tags = cast(List[List[int]], [x[0][0] for x in best_paths]) if labels is None: loss = torch.tensor(0, dtype=torch.float, device=inputs.device) else: # Add negative log-likelihood as loss loss = self.base(scores, labels, bool_mask, reduction) return dict(scores=scores, predicted_tags=tags, loss=loss)
def forward( self, # pylint: disable=arguments-differ embeddings: torch.FloatTensor, mask: torch.LongTensor, num_items_to_keep: Union[int, torch.LongTensor], class_scores: torch.FloatTensor = None, gold_labels: torch.long = None ) -> Tuple[torch.FloatTensor, torch.LongTensor, torch.LongTensor, torch.FloatTensor]: """ Extracts the top-k scoring items with respect to the scorer. We additionally return the indices of the top-k in their original order, not ordered by score, so that downstream components can rely on the original ordering (e.g., for knowing what spans are valid antecedents in a coreference resolution model). May use the same k for all sentences in minibatch, or different k for each. Parameters ---------- embeddings : ``torch.FloatTensor``, required. A tensor of shape (batch_size, num_items, embedding_size), containing an embedding for each item in the list that we want to prune. mask : ``torch.LongTensor``, required. A tensor of shape (batch_size, num_items), denoting unpadded elements of ``embeddings``. num_items_to_keep : ``Union[int, torch.LongTensor]``, required. If a tensor of shape (batch_size), specifies the number of items to keep for each individual sentence in minibatch. If an int, keep the same number of items for all sentences. class_scores: Class scores to be used with entity beam. candidate_labels: If in debugging mode, use gold labels to get beam. Returns ------- top_embeddings : ``torch.FloatTensor`` The representations of the top-k scoring items. Has shape (batch_size, max_num_items_to_keep, embedding_size). top_mask : ``torch.LongTensor`` The corresponding mask for ``top_embeddings``. Has shape (batch_size, max_num_items_to_keep). top_indices : ``torch.IntTensor`` The indices of the top-k scoring items into the original ``embeddings`` tensor. This is returned because it can be useful to retain pointers to the original items, if each item is being scored by multiple distinct scorers, for instance. Has shape (batch_size, max_num_items_to_keep). top_item_scores : ``torch.FloatTensor`` The values of the top-k scoring items. Has shape (batch_size, max_num_items_to_keep, 1). num_items_kept """ # If an int was given for number of items to keep, construct tensor by repeating the value. if isinstance(num_items_to_keep, int): batch_size = mask.size(0) # Put the tensor on same device as the mask. num_items_to_keep = num_items_to_keep * torch.ones( [batch_size], dtype=torch.long, device=mask.device) mask = mask.unsqueeze(-1) num_items = embeddings.size(1) # Shape: (batch_size, num_items, 1) # If entity beam is one, use the class scores. Else ignore them and use the scorer. if self._entity_beam: scores, _ = class_scores.max(dim=-1) scores = scores.unsqueeze(-1) # If gold beam is one, give a score of 0 wherever the gold label is non-zero (indicating a # non-null label), otherwise give a large negative number. elif self._gold_beam: scores = torch.where( gold_labels > 0, torch.zeros_like(gold_labels, dtype=torch.float), -1e20 * torch.ones_like(gold_labels, dtype=torch.float)) scores = scores.unsqueeze(-1) else: scores = self._scorer(embeddings) # If we're only keeping items that score above a given threshold, change the number of kept # items here. if self._min_score_to_keep is not None: num_good_items = torch.sum(scores > self._min_score_to_keep, dim=1).squeeze() num_items_to_keep = torch.min(num_items_to_keep, num_good_items) # If gold beam is on, keep the gold items. if self._gold_beam: num_items_to_keep = torch.sum(gold_labels > 0, dim=1) # Always keep at least one item to avoid edge case with empty matrix. max_items_to_keep = max(num_items_to_keep.max().item(), 1) if scores.size(-1) != 1 or scores.dim() != 3: raise ValueError( f"The scorer passed to Pruner must produce a tensor of shape" f"(batch_size, num_items, 1), but found shape {scores.size()}") # Make sure that we don't select any masked items by setting their scores to be very # negative. These are logits, typically, so -1e20 should be plenty negative. # NOTE(`mask` needs to be a byte tensor now.) scores = util.replace_masked_values(scores, mask.bool(), -1e20) # Shape: (batch_size, max_num_items_to_keep, 1) _, top_indices = scores.topk(max_items_to_keep, 1) # Mask based on number of items to keep for each sentence. # Shape: (batch_size, max_num_items_to_keep) top_indices_mask = util.get_mask_from_sequence_lengths( num_items_to_keep, max_items_to_keep) top_indices_mask = top_indices_mask.bool() # Shape: (batch_size, max_num_items_to_keep) top_indices = top_indices.squeeze(-1) # Fill all masked indices with largest "top" index for that sentence, so that all masked # indices will be sorted to the end. # Shape: (batch_size, 1) fill_value, _ = top_indices.max(dim=1) fill_value = fill_value.unsqueeze(-1) # Shape: (batch_size, max_num_items_to_keep) top_indices = torch.where(top_indices_mask, top_indices, fill_value) # Now we order the selected indices in increasing order with # respect to their indices (and hence, with respect to the # order they originally appeared in the ``embeddings`` tensor). top_indices, _ = torch.sort(top_indices, 1) # Shape: (batch_size * max_num_items_to_keep) # torch.index_select only accepts 1D indices, but here # we need to select items for each element in the batch. flat_top_indices = util.flatten_and_batch_shift_indices( top_indices, num_items) # Shape: (batch_size, max_num_items_to_keep, embedding_size) top_embeddings = util.batched_index_select(embeddings, top_indices, flat_top_indices) # Combine the masks on spans that are out-of-bounds, and the mask on spans that are outside # the top k for each sentence. # Shape: (batch_size, max_num_items_to_keep) sequence_mask = util.batched_index_select(mask, top_indices, flat_top_indices) sequence_mask = sequence_mask.squeeze(-1).bool() top_mask = top_indices_mask & sequence_mask top_mask = top_mask.long() # Shape: (batch_size, max_num_items_to_keep, 1) top_scores = util.batched_index_select(scores, top_indices, flat_top_indices) return top_embeddings, top_mask, top_indices, top_scores, num_items_to_keep