def concatenate_encoder_outputs(self,
                                    encoder_outputs: BaseModelOutputWithPastAndCrossAttentions,
                                    encoder_attention_mask: torch.LongTensor,
                                    passage_mask: Optional[torch.LongTensor] = None) -> \
            Tuple[BaseModelOutputWithPastAndCrossAttentions, torch.LongTensor]:
        # batch x contexts x sequence_len x hidden_dim
        hidden_states = encoder_outputs['last_hidden_state']

        assert hidden_states.shape[:2] == encoder_attention_mask.shape, \
            f"Shapes of mask and hidden states do not match! {str(hidden_states.shape[:2])} vs {str(encoder_attention_mask.shape)}"
        # concatenate hidden states into the same dimension
        # supported subsequence types: document, question+document
        # batch x contexts x sequence_len x hidden_dim -> batch x contexts * (subsequence_len_without_padding !variable size) x hidden_dim
        if self.config.fusion_strategy == "passages":
            mask = encoder_attention_mask.bool().view(
                -1) & passage_mask.bool().view(-1)
        elif self.config.fusion_strategy == "allinputs":
            mask = encoder_attention_mask.bool().view(-1)

        hidden_states = hidden_states.view(
            -1, hidden_states.shape[-1])[mask].unsqueeze(0)
        # there is no padding now
        attention_mask = torch.ones(hidden_states.size()[:2],
                                    device=hidden_states.device,
                                    dtype=torch.long)
        encoder_outputs['last_hidden_state'] = hidden_states
        return encoder_outputs, attention_mask
    def predict(self, inputs: torch.FloatTensor, mask: torch.LongTensor) -> List:
        bool_mask = mask.bool()
        if self.tag_projection:
            inputs = self.tag_projection(inputs)
        scores = inputs * mask.unsqueeze(-1)

        best_paths = self.base.viterbi_tags(scores, bool_mask, top_k=self.top_k)
        # Just get the top tags and ignore the scores.
        tags = cast(List[List[int]], [x[0][0] for x in best_paths])

        return tags
Exemple #3
0
    def forward(self, embedded: torch.Tensor,
                attention_mask: torch.LongTensor) -> torch.Tensor:
        """

        Args:
            embedded: batch_size, pad_len, embed_dim
            attention_mask: batch_size, pad_len

        Returns:
            batch_size, pad_len
        """
        cosine = self.cosine_similarity_to_u(embedded)  # batch_size, pad_len
        masked = cosine.masked_fill(~attention_mask.bool(), float('-inf'))
        attention = torch.softmax(masked, dim=1)  # batch_size, pad_len
        return attention
Exemple #4
0
    def __call__(self, prediction_match_labels: torch.Tensor,
                 gold_match_labels: torch.Tensor,
                 mask: torch.LongTensor) -> Dict:
        """
        真正的计算 metric f1。
        目前仅仅计算了全部的 F1, 也就是 f1-overall, 而对于每一个实体的指标并没有计算。
        原始的论文中没有这一部分,所以暂时没有计算每一种类型实体的指标。
        ToDo:// 增加每一种实体类型的指标计算
        :param prediction_match_labels: 预测的 match label indices
        :param gold_match_labels: golden label indices
        :param mask: mask
        :return: metric dict
        """
        mask = mask.bool()
        batch_size, seq_length = mask.size()

        match_label_mask = (mask.unsqueeze(-1).expand(-1, -1, seq_length)
                            & mask.unsqueeze(1).expand(-1, seq_length, -1))

        # 取上三角
        match_label_mask = torch.triu(
            match_label_mask, 0)  # start should be less or equal to end

        # 判断多少个是一样的
        prediction_match_labels = prediction_match_labels & match_label_mask
        gold_match_labels = gold_match_labels & match_label_mask

        true_positive_value = (gold_match_labels
                               & prediction_match_labels).long().sum()
        true_positives = {MRCF1Metric.All: true_positive_value}
        false_positive_value = (~gold_match_labels
                                & prediction_match_labels).long().sum()
        false_positives = {MRCF1Metric.All: false_positive_value}
        false_negative_value = (gold_match_labels
                                & ~prediction_match_labels).long().sum()
        false_negatives = {MRCF1Metric.All: false_negative_value}

        self._true_positives[MRCF1Metric.All] += true_positive_value
        self._false_positives[MRCF1Metric.All] += false_positive_value
        self._false_negatives[MRCF1Metric.All] += false_negative_value

        return self._metric(true_positives=true_positives,
                            false_positives=false_positives,
                            false_negatives=false_negatives)
Exemple #5
0
def dot_product_self_attention(
        embedded: torch.Tensor,
        attention_mask: torch.LongTensor) -> torch.Tensor:
    """
    Self attention computed from dot product with other embedded word
    vectors in the same sequence.

    Args:
        embedded: batch_size, pad_len, embed_dim
        attention_mask: batch_size, pad_len

    Returns:
        batch_size, pad_len
    """
    summed_dot_prod = torch.bmm(embedded, embedded.transpose(1, 2)).sum(
        2)  # batch_size, pad_len
    masked = summed_dot_prod.masked_fill(~attention_mask.bool(), float('-inf'))
    attention = torch.softmax(masked, dim=1)  # batch_size, pad_len
    return attention
    def forward(
        self, inputs: torch.FloatTensor, mask: torch.LongTensor,
        labels: torch.LongTensor = None, reduction: str = None,
    ) -> Dict[str, Any]:
        bool_mask = mask.bool()
        if self.tag_projection:
            inputs = self.tag_projection(inputs)
        scores = inputs * mask.unsqueeze(-1)

        best_paths = self.base.viterbi_tags(scores, bool_mask, top_k=self.top_k)
        # Just get the top tags and ignore the scores.
        tags = cast(List[List[int]], [x[0][0] for x in best_paths])

        if labels is None:
            loss = torch.tensor(0, dtype=torch.float, device=inputs.device)
        else:
            # Add negative log-likelihood as loss
            loss = self.base(scores, labels, bool_mask, reduction)

        return dict(scores=scores, predicted_tags=tags, loss=loss)
    def forward(
        self,  # pylint: disable=arguments-differ
        embeddings: torch.FloatTensor,
        mask: torch.LongTensor,
        num_items_to_keep: Union[int, torch.LongTensor],
        class_scores: torch.FloatTensor = None,
        gold_labels: torch.long = None
    ) -> Tuple[torch.FloatTensor, torch.LongTensor, torch.LongTensor,
               torch.FloatTensor]:
        """
        Extracts the top-k scoring items with respect to the scorer. We additionally return
        the indices of the top-k in their original order, not ordered by score, so that downstream
        components can rely on the original ordering (e.g., for knowing what spans are valid
        antecedents in a coreference resolution model). May use the same k for all sentences in
        minibatch, or different k for each.

        Parameters
        ----------
        embeddings : ``torch.FloatTensor``, required.
            A tensor of shape (batch_size, num_items, embedding_size), containing an embedding for
            each item in the list that we want to prune.
        mask : ``torch.LongTensor``, required.
            A tensor of shape (batch_size, num_items), denoting unpadded elements of
            ``embeddings``.
        num_items_to_keep : ``Union[int, torch.LongTensor]``, required.
            If a tensor of shape (batch_size), specifies the number of items to keep for each
            individual sentence in minibatch.
            If an int, keep the same number of items for all sentences.
        class_scores:
           Class scores to be used with entity beam.
        candidate_labels: If in debugging mode, use gold labels to get beam.

        Returns
        -------
        top_embeddings : ``torch.FloatTensor``
            The representations of the top-k scoring items.
            Has shape (batch_size, max_num_items_to_keep, embedding_size).
        top_mask : ``torch.LongTensor``
            The corresponding mask for ``top_embeddings``.
            Has shape (batch_size, max_num_items_to_keep).
        top_indices : ``torch.IntTensor``
            The indices of the top-k scoring items into the original ``embeddings``
            tensor. This is returned because it can be useful to retain pointers to
            the original items, if each item is being scored by multiple distinct
            scorers, for instance. Has shape (batch_size, max_num_items_to_keep).
        top_item_scores : ``torch.FloatTensor``
            The values of the top-k scoring items.
            Has shape (batch_size, max_num_items_to_keep, 1).
        num_items_kept
        """
        # If an int was given for number of items to keep, construct tensor by repeating the value.
        if isinstance(num_items_to_keep, int):
            batch_size = mask.size(0)
            # Put the tensor on same device as the mask.
            num_items_to_keep = num_items_to_keep * torch.ones(
                [batch_size], dtype=torch.long, device=mask.device)

        mask = mask.unsqueeze(-1)
        num_items = embeddings.size(1)

        # Shape: (batch_size, num_items, 1)
        # If entity beam is one, use the class scores. Else ignore them and use the scorer.
        if self._entity_beam:
            scores, _ = class_scores.max(dim=-1)
            scores = scores.unsqueeze(-1)
        # If gold beam is one, give a score of 0 wherever the gold label is non-zero (indicating a
        # non-null label), otherwise give a large negative number.
        elif self._gold_beam:
            scores = torch.where(
                gold_labels > 0,
                torch.zeros_like(gold_labels, dtype=torch.float),
                -1e20 * torch.ones_like(gold_labels, dtype=torch.float))
            scores = scores.unsqueeze(-1)
        else:
            scores = self._scorer(embeddings)

        # If we're only keeping items that score above a given threshold, change the number of kept
        # items here.
        if self._min_score_to_keep is not None:
            num_good_items = torch.sum(scores > self._min_score_to_keep,
                                       dim=1).squeeze()
            num_items_to_keep = torch.min(num_items_to_keep, num_good_items)
        # If gold beam is on, keep the gold items.
        if self._gold_beam:
            num_items_to_keep = torch.sum(gold_labels > 0, dim=1)

        # Always keep at least one item to avoid edge case with empty matrix.
        max_items_to_keep = max(num_items_to_keep.max().item(), 1)

        if scores.size(-1) != 1 or scores.dim() != 3:
            raise ValueError(
                f"The scorer passed to Pruner must produce a tensor of shape"
                f"(batch_size, num_items, 1), but found shape {scores.size()}")
        # Make sure that we don't select any masked items by setting their scores to be very
        # negative.  These are logits, typically, so -1e20 should be plenty negative.
        # NOTE(`mask` needs to be a byte tensor now.)
        scores = util.replace_masked_values(scores, mask.bool(), -1e20)

        # Shape: (batch_size, max_num_items_to_keep, 1)
        _, top_indices = scores.topk(max_items_to_keep, 1)

        # Mask based on number of items to keep for each sentence.
        # Shape: (batch_size, max_num_items_to_keep)
        top_indices_mask = util.get_mask_from_sequence_lengths(
            num_items_to_keep, max_items_to_keep)
        top_indices_mask = top_indices_mask.bool()

        # Shape: (batch_size, max_num_items_to_keep)
        top_indices = top_indices.squeeze(-1)

        # Fill all masked indices with largest "top" index for that sentence, so that all masked
        # indices will be sorted to the end.
        # Shape: (batch_size, 1)
        fill_value, _ = top_indices.max(dim=1)
        fill_value = fill_value.unsqueeze(-1)
        # Shape: (batch_size, max_num_items_to_keep)
        top_indices = torch.where(top_indices_mask, top_indices, fill_value)

        # Now we order the selected indices in increasing order with
        # respect to their indices (and hence, with respect to the
        # order they originally appeared in the ``embeddings`` tensor).
        top_indices, _ = torch.sort(top_indices, 1)

        # Shape: (batch_size * max_num_items_to_keep)
        # torch.index_select only accepts 1D indices, but here
        # we need to select items for each element in the batch.
        flat_top_indices = util.flatten_and_batch_shift_indices(
            top_indices, num_items)

        # Shape: (batch_size, max_num_items_to_keep, embedding_size)
        top_embeddings = util.batched_index_select(embeddings, top_indices,
                                                   flat_top_indices)

        # Combine the masks on spans that are out-of-bounds, and the mask on spans that are outside
        # the top k for each sentence.
        # Shape: (batch_size, max_num_items_to_keep)
        sequence_mask = util.batched_index_select(mask, top_indices,
                                                  flat_top_indices)
        sequence_mask = sequence_mask.squeeze(-1).bool()
        top_mask = top_indices_mask & sequence_mask
        top_mask = top_mask.long()

        # Shape: (batch_size, max_num_items_to_keep, 1)
        top_scores = util.batched_index_select(scores, top_indices,
                                               flat_top_indices)

        return top_embeddings, top_mask, top_indices, top_scores, num_items_to_keep