Ejemplo n.º 1
0
  def forward(self,
              sequence_tensor: torch.FloatTensor,
              span_indices: torch.LongTensor,
              sequence_mask: torch.LongTensor = None,
              span_indices_mask: torch.LongTensor = None):
    span_starts, span_ends = [index.squeeze(-1) for index in span_indices.split(1, dim=-1)]

    if span_indices_mask is not None:
      span_starts = span_starts * span_indices_mask.long()
      span_ends = span_ends * span_indices_mask.long()

    # The span is exclusive on the right, so the span_ends need to -1

    start_embeddings = utils.batched_index_select(sequence_tensor, span_starts)
    inclusive_span_ends = torch.relu((span_ends - 1).float()).long()
    end_embeddings = utils.batched_index_select(sequence_tensor ,inclusive_span_ends)

    combined_tensors = torch.cat([start_embeddings, end_embeddings], dim=-1)

    if self._span_width_embedding is not None:
      # Embed the span widths and concatenate to the rest of the representations.
      if self._bucket_widths:
        span_widths = utils.bucket_values(span_ends - span_starts,
                                          num_total_buckets=self._num_width_embeddings)
      else:
        span_widths = span_ends - span_starts

      span_width_embeddings = self._span_width_embedding(span_widths)
      combined_tensors = torch.cat([combined_tensors, span_width_embeddings], dim=-1)

    if span_indices_mask is not None:
      return combined_tensors * span_indices_mask.unsqueeze(-1).float()

    return combined_tensors
Ejemplo n.º 2
0
    def forward(  # type: ignore
            self,
            embedded_text: TextFieldTensors,
            gold_labels: torch.LongTensor = []) -> Dict[str, torch.Tensor]:
        """
        # Parameters
        tokens : `TextFieldTensors`
            From a `TextField`
        label : `torch.IntTensor`, optional (default = `None`)
            From a `LabelField`
        # Returns
        An output dictionary consisting of:
            - `logits` (`torch.FloatTensor`) :
                A tensor of shape `(batch_size, num_labels)` representing
                unnormalized log probabilities of the label.
            - `probs` (`torch.FloatTensor`) :
                A tensor of shape `(batch_size, num_labels)` representing
                probabilities of the label.
            - `loss` : (`torch.FloatTensor`, optional) :
                A scalar loss to be optimised.
        """
        logits = self._classification_layer(embedded_text)
        probs = torch.nn.functional.softmax(logits, dim=-1)

        output_dict = {"logits": logits, "class_probabilities": probs}

        if gold_labels != None:
            output_dict['loss'] = self._loss(
                logits,
                gold_labels.long().view(-1)) * self.loss_weight
            for metric in self.metrics.values():
                metric(logits, gold_labels)

        return output_dict
Ejemplo n.º 3
0
    def _prepare_decode_step_input(
        self,
        input_indices: torch.LongTensor,
        decoder_hidden_state: torch.LongTensor = None,
        encoder_outputs: torch.LongTensor = None,
        encoder_outputs_mask: torch.LongTensor = None,
    ) -> torch.LongTensor:
        """
        Given the input indices for the current timestep of the decoder, and all the encoder
        outputs, compute the input at the current timestep.  Note: This method is agnostic to
        whether the indices are gold indices or the predictions made by the decoder at the last
        timestep.

        If we're not using attention, the output of this method is just an embedding of the input
        indices.  If we are, the output will be a concatentation of the embedding and an attended
        average of the encoder inputs.

        Parameters
        ----------
        input_indices : torch.LongTensor
            Indices of either the gold inputs to the decoder or the predicted labels from the
            previous timestep.
        decoder_hidden_state : torch.LongTensor, optional (not needed if no attention)
            Output of from the decoder at the last time step. Needed only if using attention.
        encoder_outputs : torch.LongTensor, optional (not needed if no attention)
            Encoder outputs from all time steps. Needed only if using attention.
        encoder_outputs_mask : torch.LongTensor, optional (not needed if no attention)
            Masks on encoder outputs. Needed only if using attention.
        """
        input_indices = input_indices.long()
        # input_indices : (batch_size,)  since we are processing these one timestep at a time.
        # (batch_size, target_embedding_dim)
        embedded_input = self._target_embedder(input_indices)

        if self._decoder_attention is not None:
            # encoder_outputs : (batch_size, input_sequence_length, encoder_output_dim)
            # Ensuring mask is also a FloatTensor. Or else the multiplication within attention will
            # complain.

            # important - need to use zero-masking instead of -inf for attention
            # I've checked that doing this doesn't significantly increase time
            # per batch, but should consider only doing once
            encoder_outputs.data.masked_fill_(
                1 - encoder_outputs_mask.byte().data, 0.0)

            encoder_outputs = 0.5 * encoder_outputs
            encoder_outputs_mask = encoder_outputs_mask.float()
            encoder_outputs_mask = encoder_outputs_mask[:, :, 0]
            # (batch_size, input_sequence_length)
            attention_input = torch.cat((decoder_hidden_state, embedded_input),
                                        1)
            input_weights = self._decoder_attention(attention_input,
                                                    encoder_outputs,
                                                    encoder_outputs_mask)
            # (batch_size, input_dim)
            attended_input = weighted_sum(encoder_outputs, input_weights)
            # (batch_size, input_dim + target_embedding_dim)
            return torch.cat((attended_input, embedded_input), -1)
        else:
            return embedded_input
Ejemplo n.º 4
0
    def calculate_instance_loss(self,
                                predictions: torch.FloatTensor,
                                targets: torch.LongTensor,
                                mode: str,
                                as_numpy: bool = False) -> dict:
        """Calculate loss per instance in a batch

        :param predictions: Predictions (Predicted)
        :type predictions: torch.FloatTensor
        :param targets: Targets (Ground Truth)
        :type targets: torch.LongTensor
        :param mode: train/val/test
        :type mode: str
        :param as_numpy: flag to decide whether to return losses as np.ndarray
        :type as_numpy: bool

        :return: dict of losses with list of loss values per instance
        """
        loss_config = self.model_config.get('loss')[mode]
        criterion = loss_factory.create(loss_config['name'],
                                        **loss_config['params'])

        # correct data type to handle mismatch between
        # CrossEntropyLoss and BCEWithLogitsLoss
        if loss_config['name'] == 'cross-entropy':
            targets = targets.long()

        loss = criterion(predictions, targets)

        if as_numpy:
            loss = loss.cpu().numpy()

        return {'loss': loss}
Ejemplo n.º 5
0
    def forward(self,
                images: torch.Tensor,
                objects: torch.LongTensor,
                segms: torch.Tensor,
                boxes: torch.Tensor,
                box_mask: torch.LongTensor,
                question: Dict[str, torch.Tensor],
                question_tags: torch.LongTensor,
                question_mask: torch.LongTensor,
                answers: Dict[str, torch.Tensor],
                answer_tags: torch.LongTensor,
                answer_mask: torch.LongTensor,
                metadata: List[Dict[str, Any]] = None,
                label: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        """
        :param images: [batch_size, 3, im_height, im_width]
        :param objects: [batch_size, max_num_objects] Padded objects
        :param boxes:  [batch_size, max_num_objects, 4] Padded boxes
        :param box_mask: [batch_size, max_num_objects] Mask for whether or not each box is OK
        :param question: AllenNLP representation of the question. [batch_size, num_answers, seq_length]
        :param question_tags: A detection label for each item in the Q [batch_size, num_answers, seq_length]
        :param question_mask: Mask for the Q [batch_size, num_answers, seq_length]
        :param answers: AllenNLP representation of the answer. [batch_size, num_answers, seq_length]
        :param answer_tags: A detection label for each item in the A [batch_size, num_answers, seq_length]
        :param answer_mask: Mask for the As [batch_size, num_answers, seq_length]
        :param metadata: Ignore, this is about which dataset item we're on
        :param label: Optional, which item is valid
        """
        features = self.trunk.forward(
            images,
            objects,
            segms,
            boxes,
            box_mask,
            question,
            question_tags,
            question_mask,
            answers,
            answer_tags,
            answer_mask,
        )

        logits = self.final_mlp(features['pooled_rep']).squeeze(2)
        class_probabilities = F.softmax(logits, dim=-1)

        output_dict = {
            'label_logits': logits,
            'label_probs': class_probabilities,
            'cnn_regularization_loss': features['cnn_regularization_loss'],
            # Uncomment to visualize attention, if you want
            # 'qa_attention_weights': features['qa_attention_weights'],
            # 'atoo_attention_weights': features['atoo_attention_weights'],
        }

        if label is not None:
            loss = self._loss(logits, label.long().view(-1))
            self._accuracy(logits, label)
            output_dict["loss"] = loss[None]

        return output_dict
Ejemplo n.º 6
0
def sequence_ctc_loss_with_logits(
    logits: torch.FloatTensor,
    logit_mask: Union[torch.FloatTensor, torch.BoolTensor],
    targets: torch.LongTensor,
    target_mask: Union[torch.FloatTensor, torch.BoolTensor],
    blank_index: torch.LongTensor
) -> torch.FloatTensor:

    # lengths : (batch_size, )
    # calculated by counting number of mask
    logit_lengths = (logit_mask.bool()).long().sum(1)
    target_lengths = (target_mask.bool()).long().sum(1)

    # log_logits : (T, batch_size, n_class), this kind of shape is required for ctc_loss
    #log_logits = logits + (logit_mask.unsqueeze(-1) + 1e-45).log()
    log_logits = logits.log_softmax(-1).transpose(0, 1)
    targets = targets.long()

    loss = F.ctc_loss(log_logits, 
                      targets, 
                      logit_lengths, 
                      target_lengths,
                      blank=blank_index,
                      reduction='mean')
    
    if (logit_lengths < target_lengths).sum() > 0:
        print("The length of predicted alignment is shoter than target length, increase upsample factor.")
        raise Exception

    return loss
    def forward(
            self,
            tokens: Dict[str, torch.LongTensor],
            input_mask: torch.LongTensor,
            tags: torch.LongTensor = None,
            labels: torch.LongTensor = None,
            metadata: List[Dict[str, Any]] = None,
            # pylint: disable=unused-argument
            **kwargs) -> Dict[str, torch.Tensor]:
        transformed_tokens = self._text_field_embedder(tokens)
        first_token_tensor = transformed_tokens[:, 0, :]
        encoded_text = transformed_tokens[:, 1:, :]
        pooled_output = self.dropout(
            torch.tanh(self._feedforward(first_token_tensor)))
        tag_logits = self._tag_feedforward(encoded_text)
        mask = input_mask[:, 1:].long()
        best_paths = self.crf.viterbi_tags(tag_logits, mask)
        intent_logits = self._intent_feedforward(pooled_output)
        intent_probs = torch.nn.functional.softmax(intent_logits, dim=-1)
        # Just get the tags and ignore the score.
        predicted_tags = [x for x, y in best_paths]
        output = {
            'tag_logits': tag_logits,
            'mask': input_mask,
            'tags': predicted_tags,
            'intent_probs': intent_probs
        }
        if tags is not None:
            # Add negative log-likelihood as loss
            tags = tags[:, 1:]
            log_likelihood = self.crf(tag_logits, tags, mask)
            output["slot_loss"] = -log_likelihood

            # Represent viterbi tags as "class probabilities" that we can
            # feed into the metrics
            class_probabilities = tag_logits * 0.
            for i, instance_tags in enumerate(predicted_tags):
                for j, tag_id in enumerate(instance_tags):
                    class_probabilities[i, j, tag_id] = 1
            mask = mask.float()
            # for metric in self.metrics.values():
            #     metric(class_probabilities, tags.contiguous(), mask)
            if self.calculate_span_f1:
                self._f1_metric(class_probabilities, tags, mask)
        if labels is not None:
            output["intents_loss"] = self._intent_loss(intent_logits,
                                                       labels.long().view(-1))
            self._intent_accuracy(intent_logits, labels)
            self._intent_accuracy_3(intent_logits, labels)
        if metadata is not None:
            output["words"] = [x["words"] for x in metadata]

        if 'slot_loss' in output and 'intents_loss' in output:
            output["loss"] = output["slot_loss"] + output["intents_loss"]
        elif 'slot_loss' in output:
            output["loss"] = output["slot_loss"]
        elif 'intents_loss' in output:
            output["loss"] = output["intents_loss"]
        return output
Ejemplo n.º 8
0
def column_gather(y_out: torch.FloatTensor,
                  x_lengths: torch.LongTensor) -> torch.FloatTensor:
    x_lengths = x_lengths.long().detach().cpu().numpy() - 1

    out = []
    for batch_index, column_index in enumerate(x_lengths):
        out.append(y_out[batch_index, column_index])

    return torch.stack(out)
Ejemplo n.º 9
0
    def forward(self, attention_mask: torch.LongTensor, past_key_values_length: int = 0):
        """`input_ids_shape` is expected to be [bsz x seqlen]."""
        attention_mask = attention_mask.long()

        # create positions depending on attention_mask
        positions = (torch.cumsum(attention_mask, dim=1).type_as(attention_mask) * attention_mask).long() - 1

        # cut positions if `past_key_values_length` is > 0
        positions = positions[:, past_key_values_length:]

        return super().forward(positions + self.offset)
    def forward(self, 
                text: Dict[str, torch.LongTensor],
                predicate_indicator: torch.LongTensor,
                labeled_spans: torch.LongTensor,
                **kwargs):
        span_mask = (labeled_spans[:, :, 0] >= 0).long()

        span_slot_labels = []
        for i, n in enumerate(self.slot_labels):
            if 'span_slot_%s'%n in kwargs and kwargs['span_slot_%s'%n] is not None:
                span_slot_labels.append(kwargs['span_slot_%s'%n] * span_mask)
        if len(span_slot_labels) == 0:
            span_slot_labels = None

        embedded_text_input = self.embedding_dropout(self.text_field_embedder(text))
        mask = get_text_field_mask(text)
        embedded_predicate_indicator = self.predicate_feature_embedding(predicate_indicator.long())
 
        embedded_text_with_predicate_indicator = torch.cat([embedded_text_input, embedded_predicate_indicator], -1)
        batch_size, sequence_length, embedding_dim_with_predicate_feature = embedded_text_with_predicate_indicator.size()

        if self.stacked_encoder.get_input_dim() != embedding_dim_with_predicate_feature:
            raise ConfigurationError("The SRL model uses an indicator feature, which makes "
                                     "the embedding dimension one larger than the value "
                                     "specified. Therefore, the 'input_dim' of the stacked_encoder "
                                     "must be equal to total_embedding_dim + 1.")

        encoded_text = self.stacked_encoder(embedded_text_with_predicate_indicator, mask)

        span_reps = self.span_extractor(encoded_text, labeled_spans, sequence_mask=mask, span_indices_mask = span_mask)

        output_dict = {}
        slot_logits = self.question_generator(span_reps, slot_labels=span_slot_labels)
        for i, n in enumerate(self.slot_labels):
            # Replace scores for padding and unk
            slot_logits[i][:,:,0:2] -= 9999999

            output_dict["slot_logits_%s"%n] = slot_logits[i]

        loss = None
        if span_slot_labels is not None:
            for i, n in enumerate(self.slot_labels):
                slot_loss = sequence_cross_entropy_with_logits(slot_logits[i], span_slot_labels[i], span_mask.float())
                if loss is None:
                    loss = slot_loss
                else:
                    loss += slot_loss
            self.question_metric(slot_logits, span_slot_labels, labeled_spans, mask=span_mask, sequence_mask=mask)
            output_dict["loss"] = loss

        output_dict['span_mask'] = span_mask

        return output_dict
Ejemplo n.º 11
0
    def forward(
            self,  # type: ignore
            tokens: Dict[str, torch.LongTensor],
            label: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        tags : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels of shape
            ``(batch_size, num_tokens)``.

        Returns
        -------
        An output dictionary consisting of:
        logits : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            unnormalised log probabilities of the tag classes.
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            a distribution of the tag classes per word.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.

        """
        embedded_text_input = self.text_field_embedder(tokens)
        batch_size, sequence_length, _ = embedded_text_input.size()
        mask = get_text_field_mask(tokens)
        encoded_text = self.encoder(embedded_text_input, mask)
        logits = self.projection_layer(encoded_text)

        class_probabilities = F.softmax(logits)

        output_dict = {
            "logits": logits,
            "class_probabilities": class_probabilities
        }
        if label is not None:
            loss = self._loss(logits, label.long().view(-1))
            output_dict["loss"] = loss
            self._accuracy(logits, label.squeeze(-1))

        return output_dict
Ejemplo n.º 12
0
    def forward(
            self,
            title: Dict[str, torch.LongTensor],
            abstract: Dict[str, torch.LongTensor],
            md: MetadataField,
            label: torch.LongTensor = None,
            label_true: torch.FloatTensor = None) -> Dict[str, torch.Tensor]:

        embedded_abstract = self.text_field_embedder(abstract)
        embedded_title = self.text_field_embedder(title)

        title_mask = util.get_text_field_mask(title)
        abstract_mask = util.get_text_field_mask(abstract)

        encoded_title = self.title_encoder(embedded_title, title_mask)
        encoded_abstract = self.abstract_encoder(embedded_abstract,
                                                 abstract_mask)

        logits = self.classifier_feedforward(
            torch.cat([encoded_title, encoded_abstract], dim=-1))

        if not self.pu_loss:
            if self.positive_class == 1:
                logits = torch.cat(((-logits).view(-1, 1), logits.view(-1, 1)),
                                   dim=1)
            else:
                logits = torch.cat((logits.view(-1, 1), (-logits).view(-1, 1)),
                                   dim=1)
            class_probabilities = F.softmax(logits, dim=1)
        else:
            positive_pred = self.normalize(logits)
            negative_pred = 1 - positive_pred

            if self.positive_class == 1:
                class_probabilities = torch.cat(
                    (negative_pred.view(-1, 1), positive_pred.view(-1, 1)),
                    dim=1)
            else:
                class_probabilities = torch.cat(
                    (positive_pred.view(-1, 1), negative_pred.view(-1, 1)),
                    dim=1)

        output_dict = {"class_probabilities": class_probabilities}

        if label is not None:
            loss = self.loss(logits, label)

            for metric in self.metrics.values():
                metric(class_probabilities, label.long())
            output_dict["loss"] = loss

        return output_dict
Ejemplo n.º 13
0
 def _encode(self, source_tokens: Dict[str, torch.Tensor],
             verb_indicator: torch.LongTensor,
             lang_indicator: torch.LongTensor) -> Dict[str, torch.Tensor]:
     """
     Encode source input sentences.
     """
     # shape: (batch_size, max_input_sequence_length, encoder_input_dim)
     embedded_input = self._source_embedder(source_tokens)
     if self._binary_feature_embedding:
         embedded_verb_indicator = self._binary_feature_embedding(
             verb_indicator.long())
         embedded_input = torch.cat(
             [embedded_input, embedded_verb_indicator], -1)
     if self._language_embedding:
         embedded_lang_indicator = self._language_embedding(
             lang_indicator.long())
         # print("ENC", embedded_input.size(), embedded_lang_indicator.size())
         embedded_input = torch.cat(
             [embedded_input, embedded_lang_indicator], -1)
     # shape: (batch_size, max_input_sequence_length)
     source_mask = util.get_text_field_mask(source_tokens)
     # shape: (batch_size, max_input_sequence_length, encoder_output_dim)
     encoder_outputs = self._encoder(embedded_input, source_mask)
     return {"source_mask": source_mask, "encoder_outputs": encoder_outputs}
Ejemplo n.º 14
0
 def forward(
     self, tokens: Dict[str,
                        torch.LongTensor], input_mask: torch.LongTensor,
     segment_ids: torch.LongTensor, next_sentence_labels: torch.FloatTensor,
     masked_lm_positions: torch.LongTensor,
     masked_lm_weights: torch.LongTensor,
     masked_lm_labels: Dict[str,
                            torch.LongTensor]) -> Dict[str, torch.Tensor]:
     embedded_tokens = self._text_field_embedder(tokens)
     transformed_tokens = self._transformer(embedded_tokens, input_mask,
                                            segment_ids)
     first_token_tensor = transformed_tokens[:, 0, :]
     pooled_output = torch.tanh(self._feedforward(first_token_tensor))
     output_dict = {
         'encoded_layer': transformed_tokens,
         'pooled_output': pooled_output
     }
     embedding_table = self._text_field_embedder.get_embedding_by_name(
         'tokens')
     masked_lm_loss = None
     next_sentence_loss = None
     if masked_lm_labels is not None:
         (masked_lm_loss, masked_lm_example_loss,
          masked_lm_log_probs) = get_masked_lm_output(
              self._use_fp16, transformed_tokens, self._norm_layer,
              self._vocab_bias,
              self._masked_lm_feedforward, embedding_table,
              masked_lm_positions.long(), masked_lm_labels['tokens'],
              masked_lm_weights)
         output_dict['masked_lm_loss'] = masked_lm_loss
         output_dict['masked_lm_example_loss'] = masked_lm_example_loss
         output_dict['masked_lm_log_probs'] = masked_lm_log_probs
         self._masked_lm_accuracy(masked_lm_log_probs.float(),
                                  masked_lm_labels["tokens"].view(-1))
     if next_sentence_labels is not None:
         (next_sentence_loss, next_sentence_example_loss,
          next_sentence_log_probs) = get_next_sentence_output(
              self._use_fp16, pooled_output,
              self._next_sentence_feedforward, next_sentence_labels)
         output_dict['next_sentence_loss'] = next_sentence_loss
         output_dict[
             'next_sentence_example_loss'] = next_sentence_example_loss
         output_dict['next_sentence_log_probs'] = next_sentence_log_probs
         self._next_sentence_accuracy(next_sentence_log_probs.float(),
                                      next_sentence_labels)
     output_dict["loss"] = masked_lm_loss
     return output_dict
Ejemplo n.º 15
0
    def forward(
            self,  # type: ignore
            left: Dict[str, torch.LongTensor],
            right: Dict[str, torch.Tensor],
            label: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        """
        Parameters
        ----------
        tokens : Dict[str, Variable], required
            The output of ``TextField.as_array()``.
        sentence_per_document : Dict[str, torch.Tensor], required
            The number of sentences for each document.
        word_per_sentence : Dict[str, torch.Tensor], required
            The number of words for each sentence in each document.
        label : Variable, optional (default = None)
            A variable representing the label for each instance in the batch.

        Returns
        -------
        An output dictionary consisting of:
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_classes)`` representing a distribution over the
            label classes for each instance.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.
        """

        left_embedded = self.text_field_embedder(left)
        left_mask = util.get_text_field_mask(left)

        right_embedded = self.text_field_embedder(right)
        right_mask = util.get_text_field_mask(right)

        v_l = self.left_encoder(left_embedded, left_mask)
        v_r = self.right_encoder(right_embedded, right_mask)

        loss = self.loss(v_l, v_r, label)
        sim = F.cosine_similarity(v_l, v_r) > self.prediction_threshold

        output_dict = {'loss': loss}

        for metric in self.metrics.values():
            #logging.info(f"Sim {sim}")
            #logging.info(f"Label {label}")
            metric(sim.long(), label.long())

        return output_dict
Ejemplo n.º 16
0
    def forward(self,
                images: torch.Tensor,
                objects: torch.LongTensor,
                segms: torch.Tensor,
                boxes: torch.Tensor,
                box_mask: torch.LongTensor,
                question: Dict[str, torch.Tensor],
                question_tags: torch.LongTensor,
                question_mask: torch.LongTensor,
                answers: Dict[str, torch.Tensor],
                answer_tags: torch.LongTensor,
                answer_mask: torch.LongTensor,
                metadata: List[Dict[str, Any]] = None,
                label: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        features = self.trunk.forward(
            images,
            objects,
            segms,
            boxes,
            box_mask,
            question,
            question_tags,
            question_mask,
            answers,
            answer_tags,
            answer_mask,
        )

        probs = features['probs']
        output_dict = {
            'label_probs': features['probs'],
            'cnn_regularization_loss': features['cnn_regularization_loss'],
            # Uncomment to visualize attention, if you want
            # 'qa_attention_weights': features['qa_attention_weights'],
            # 'atoo_attention_weights': features['atoo_attention_weights'],
        }

        if label is not None:
            self._accuracy(probs.argmax(dim=1), label)
            # We use NLLLoss as don't have the logits.
            # Need to take log(softmax_probs) first.
            loss = self._loss(torch.log(probs), label.long().view(-1))
            output_dict["loss"] = loss[None]

        return output_dict
Ejemplo n.º 17
0
    def forward(self,
                embedded_tokens: torch.FloatTensor,
                input_mask: torch.LongTensor,
                segment_ids: torch.LongTensor = None):  # pylint: disable=arguments-differ
        embedded_tokens = embedded_tokens * self.embed_scale
        embedded_tokens = common_attention.embedding_postprocessor(
            embedded_tokens,
            input_mask.long(),
            self._use_fp16,
            token_type_ids=segment_ids,
            use_token_type=self._use_token_type,
            token_type_embedding=self._token_type_embedding,
            use_position_embeddings=self._use_position_embeddings,
            position_embedding=self._position_embedding,
            norm_layer=self._norm_layer,
            dropout=self._dropout)
        encoder_self_attention_bias = common_attention.create_attention_mask_from_input_mask(
            embedded_tokens, input_mask, self._use_fp16)

        encoder_padding_mask = input_mask.eq(0)
        if not encoder_padding_mask.any():
            encoder_padding_mask = None
        prev_output = embedded_tokens
        for (attention, feedforward_output, feedforward,
             feedforward_intemediate, layer_norm_output, layer_norm) in zip(
                 self._attention_layers, self._feedforward_output_layers,
                 self._feedforward_layers,
                 self._feedforward_intermediate_layers,
                 self._layer_norm_output_layers, self._layer_norm_layers):
            layer_input = prev_output
            attention_output = attention(layer_input,
                                         encoder_self_attention_bias,
                                         key_padding_mask=encoder_padding_mask)
            attention_output = self._dropout(
                feedforward_output(attention_output))
            attention_output = layer_norm_output(attention_output +
                                                 layer_input)
            attention_intermediate = self._activation(
                feedforward_intemediate(attention_output))
            layer_output = self._dropout(feedforward(attention_intermediate))
            layer_output = layer_norm(layer_output + attention_output)
            prev_output = layer_output

        return prev_output
Ejemplo n.º 18
0
    def forward(self,
                premise_img: torch.Tensor,
                hypothesis: Dict[str, torch.Tensor],
                label: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        """

        :param premise_img:
        :param hypothesis:
        :param label:
        :return:
        """
        embedded_hypothesis = self._text_field_embedder(hypothesis)
        hypothesis_mask = get_text_field_mask(hypothesis).float()

        if self.rnn_input_dropout:
            embedded_hypothesis = self.rnn_input_dropout(embedded_hypothesis)

        encoded_hypothesis = self._encoder(embedded_hypothesis, hypothesis_mask)

        hypothesis_hidden_state = get_final_encoder_states(
            encoded_hypothesis,
            hypothesis_mask,
            self._encoder.is_bidirectional()
        )

        img_feats = self.detector(premise_img)

        fused_features = torch.cat((img_feats, hypothesis_hidden_state), dim=-1)

        label_logits = self._output_feedforward(fused_features)
        label_probs = nn.functional.softmax(label_logits, dim=-1)

        output_dict = {
            "label_logits": label_logits,
            "label_probs": label_probs
        }

        if label is not None:
            loss = self._loss(label_logits, label.long().view(-1))
            self._accuracy(label_logits, label)
            output_dict["loss"] = loss

        return output_dict
Ejemplo n.º 19
0
    def _get_permutation_indices(mask: torch.LongTensor):
        """
        Get the index for sorting with the length of the sequences.
        Empty sequences will be removed, but later restored with `restoration_idx`.
        """

        seq_lens = mask.long().sum(-1)

        sorted_seq_lens, perm_idx = seq_lens.sort(descending=True)

        # remove empty sequences
        num_non_zero_seqs = len(seq_lens.nonzero())
        truncated_sorted_seq_lens = sorted_seq_lens[:num_non_zero_seqs]
        truncated_perm_idx = perm_idx[:num_non_zero_seqs]

        # compute restoration index to sort tensors into the original order later.
        _, restoration_idx = perm_idx.sort()

        return truncated_perm_idx, truncated_sorted_seq_lens, restoration_idx
Ejemplo n.º 20
0
    def _embed_source(self, source_tokens: Dict[str, torch.Tensor],
                      source_entity_length: torch.LongTensor):
        """
        :param source_tokens
        :param source_entity_length: (batch_size, max_token_num)
        :return
            (batch_size, max_token_num, embedding_dim)
        """
        token_ids = source_tokens['tokens']
        embedded = self._source_embedding(token_ids)

        batched_embedded = list()
        embedding_dim = embedded.size(-1)
        batch_size, max_token_num = source_entity_length.size()

        for _embedded, _length in zip(embedded, source_entity_length.long()):
            merged_embedded_input = list()
            idx = 0
            for length in _length:
                if length > 0:
                    embedding = torch.mean(_embedded[idx:idx + length, :],
                                           dim=0)
                    merged_embedded_input.append(embedding)
                    idx += length
                else:
                    break
            merged_embedded_input = torch.stack(merged_embedded_input, dim=0)
            pad_num = max_token_num - merged_embedded_input.size(0)
            if pad_num > 0:
                merged_embedded_input = torch.cat(
                    (merged_embedded_input,
                     merged_embedded_input.new_zeros([pad_num, embedding_dim
                                                      ])),
                    dim=0)
            batched_embedded.append(merged_embedded_input)

        # shape: (batch_size, max_token_num, embedding_dim)
        batched_embedded = torch.stack(batched_embedded, dim=0)
        assert batched_embedded.size(0) == embedded.size(
            0) and batched_embedded.size(1) == source_entity_length.size(1)
        # TODO: Dropout
        return batched_embedded
Ejemplo n.º 21
0
    def _decoder_step(
            self, last_predictions: torch.Tensor,
            selective_weights: torch.Tensor, lang_indicator: torch.LongTensor,
            state: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        # shape: (group_size, max_input_sequence_length, encoder_output_dim)
        encoder_outputs_mask = state["source_mask"].float()
        # shape: (group_size, target_embedding_dim)
        embedded_input = self._target_embedder(last_predictions)

        if self._language_embedding:
            embedded_lang_indicator = self._language_embedding(
                lang_indicator.long())
            # print("DEC", embedded_input.size(), embedded_lang_indicator.size())
            if len(embedded_lang_indicator.size()) == 3:
                if embedded_lang_indicator.size(1) == embedded_input.size(0):
                    embedded_lang_indicator = embedded_lang_indicator[0]
                else:
                    embedded_lang_indicator = embedded_lang_indicator.view(
                        embedded_input.size(0), -1)
            embedded_input = torch.cat(
                [embedded_input, embedded_lang_indicator], -1)
        # print("DEC2", embedded_input.size(), embedded_lang_indicator.size())
        # shape: (group_size, max_input_sequence_length)
        attentive_weights = self._attention(state["decoder_hidden"],
                                            state["encoder_outputs"],
                                            encoder_outputs_mask)
        # shape: (group_size, encoder_output_dim)
        attentive_read = util.weighted_sum(state["encoder_outputs"],
                                           attentive_weights)
        # shape: (group_size, encoder_output_dim)
        selective_read = util.weighted_sum(state["encoder_outputs"][:, 1:-1],
                                           selective_weights)
        # shape: (group_size, target_embedding_dim + encoder_output_dim * 2)
        decoder_input = torch.cat(
            (embedded_input, attentive_read, selective_read), -1)
        # shape: (group_size, decoder_input_dim)
        projected_decoder_input = self._input_projection_layer(decoder_input)

        state["decoder_hidden"], state["decoder_context"] = self._decoder_cell(
            projected_decoder_input,
            (state["decoder_hidden"], state["decoder_context"]))
        return state
Ejemplo n.º 22
0
    def forward(self,  # type: ignore
                text: Dict[str, torch.LongTensor],
                predicate_indicator: torch.LongTensor,
                labeled_spans: torch.LongTensor = None,
                annotations: Dict = None,
                **kwargs):
        embedded_text_input = self.embedding_dropout(self.text_field_embedder(text))
        mask = get_text_field_mask(text)
        embedded_predicate_indicator = self.predicate_feature_embedding(predicate_indicator.long())

        embedded_text_with_predicate_indicator = torch.cat([embedded_text_input, embedded_predicate_indicator], -1)
        batch_size, sequence_length, embedding_dim_with_predicate_feature = embedded_text_with_predicate_indicator.size()

        if self.stacked_encoder.get_input_dim() != embedding_dim_with_predicate_feature:
            raise ConfigurationError("The SRL model uses an indicator feature, which makes "
                                     "the embedding dimension one larger than the value "
                                     "specified. Therefore, the 'input_dim' of the stacked_encoder "
                                     "must be equal to total_embedding_dim + 1.")

        encoded_text = self.stacked_encoder(embedded_text_with_predicate_indicator, mask)
        span_hidden, span_mask = self.span_hidden(encoded_text, encoded_text, mask, mask)

        logits = self.pred(F.relu(span_hidden)).squeeze()
        probs = F.sigmoid(logits) * span_mask.float()

        output_dict = {"logits": logits, "probs": probs, 'span_mask': span_mask}
        if labeled_spans is not None:
            span_label_mask = (labeled_spans[:, :, 0] >= 0).squeeze(-1).long()
            prediction_mask = self.get_prediction_map(labeled_spans, span_label_mask, sequence_length,
                                                      annotations=annotations)
            loss = F.binary_cross_entropy_with_logits(logits, prediction_mask, weight=span_mask.float(),
                                                      size_average=False)
            output_dict["loss"] = loss
            if not self.training:
                spans = self.to_scored_spans(probs, span_mask)
                self.threshold_metric(spans, annotations)

        # We need to retain the mask in the output dictionary
        # so that we can crop the sequences to remove padding
        # when we do viterbi inference in self.decode.
        output_dict["mask"] = mask
        return output_dict
Ejemplo n.º 23
0
    def forward(self, text: Dict[str, torch.LongTensor],
                predicate_indicator: torch.LongTensor):
        # Shape: batch_size, num_tokens, embedding_dim
        embedded_text_input = self._embedding_dropout(
            self._text_field_embedder(text))
        # Shape: batch_size, num_tokens ?
        text_mask = get_text_field_mask(text)
        if self._predicate_feature_dim > 0:
            # Shape: batch_size, num_tokens, predicate_feature_dim ?
            embedded_predicate_indicator = self._predicate_feature_embedding(
                predicate_indicator.long())
            # Shape: batch_size, num_tokens, embedding_dim + predicate_feature_dim
            full_embedded_text = torch.cat(
                [embedded_text_input, embedded_predicate_indicator], -1)
        else:
            full_embedded_text = embedded_text_input

        if self._stacked_encoder is not None:
            # Shape: batch_size, num_tokens, encoder_output_dim
            encoded_text = self._stacked_encoder(full_embedded_text, text_mask)
        else:
            encoded_text = full_embedded_text

        return encoded_text, text_mask
Ejemplo n.º 24
0
    def forward(
            self,  # type: ignore
            tokens: Dict[str, torch.LongTensor],
            verb_indicator: torch.LongTensor,
            tags: torch.LongTensor = None,
            metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        verb_indicator: torch.LongTensor, required.
            An integer ``SequenceFeatureField`` representation of the position of the verb
            in the sentence. This should have shape (batch_size, num_tokens) and importantly, can be
            all zeros, in the case that the sentence has no verbal predicate.
        tags : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels
            of shape ``(batch_size, num_tokens)``
        metadata : ``List[Dict[str, Any]]``, optional, (default = None)
            metadata containg the original words in the sentence and the verb to compute the
            frame for, under 'words' and 'verb' keys, respectively.

        Returns
        -------
        An output dictionary consisting of:
        logits : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            unnormalised log probabilities of the tag classes.
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            a distribution of the tag classes per word.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.

        """
        embedded_text_input = self.embedding_dropout(
            self.text_field_embedder(tokens))
        mask = get_text_field_mask(tokens)
        embedded_verb_indicator = self.binary_feature_embedding(
            verb_indicator.long())
        # Concatenate the verb feature onto the embedded text. This now
        # has shape (batch_size, sequence_length, embedding_dim + binary_feature_dim).
        embedded_text_with_verb_indicator = torch.cat(
            [embedded_text_input, embedded_verb_indicator], -1)
        batch_size, sequence_length, _ = embedded_text_with_verb_indicator.size(
        )

        encoded_text = self.encoder(embedded_text_with_verb_indicator, mask)

        logits = self.tag_projection_layer(encoded_text)
        reshaped_log_probs = logits.view(-1, self.num_classes)
        class_probabilities = F.softmax(reshaped_log_probs, dim=-1).view(
            [batch_size, sequence_length, self.num_classes])
        output_dict = {
            "logits": logits,
            "class_probabilities": class_probabilities
        }
        if tags is not None:
            loss = sequence_cross_entropy_with_logits(
                logits, tags, mask, label_smoothing=self._label_smoothing)
            if not self.ignore_span_metric:
                self.span_metric(class_probabilities, tags, mask)
            output_dict["loss"] = loss

        # We need to retain the mask in the output dictionary
        # so that we can crop the sequences to remove padding
        # when we do viterbi inference in self.decode.
        output_dict["mask"] = mask

        words, verbs = zip(*[(x["words"], x["verb"]) for x in metadata])
        if metadata is not None:
            output_dict["words"] = list(words)
            output_dict["verb"] = list(verbs)
        return output_dict
Ejemplo n.º 25
0
def paste(background: Tensor, patch: Tensor, x: LongTensor, y: LongTensor, mask: Optional[Tensor] = None):
    """
    Pastes the given patch into the background image tensor at the specified location.
    Optionally a mask of the same size as the patch can be passed in to blend the
    pasted contents with the background.

    :param background: A batch of image tensors of shape (B, C, H, W) that represent the background
    :param patch: A batch of image tensors of shape (B, C, h, w) which values get pasted into the background
    :param x: The horizontal integer coordinates relative to the top left corner of the background image.
        This tensor must be a one-dimensional tensor of shape (B, ).
    :param y: The vertical integer coordinates relative to the top left corner of the background image.
        This tensor must be a one-dimensional tensor of shape (B, ).
    :param mask: A mask of the same size as the patch that is used to blend foreground and background values.
        It is optional and defaults to ones (all is foreground).
    :return: The composite tensor of background and foreground values of shape (B, C, H, W).

    Note:
        1.  The X- and Y-coordinates can exceed the range of the background image (negative and positive).
            The background will be dynamically padded and cropped again after pasting such that the
            contents can go over the borders of the background image.
        2.  Currently it only supports integer locations.
        3.  All tensors must be on the same device.
    """
    # background: (B, C, H, W)
    # patch, mask: (B, C, h, w)
    # x, y: (B, )
    b, c, H, W = background.shape
    _, _, h, w = patch.shape
    mask = torch.ones_like(patch) if mask is None else mask
    device = background.device
    assert b == patch.size(0) == mask.size(0)
    assert b == x.size(0) == y.size(0)
    assert c == patch.size(1) == mask.size(1)
    assert h == mask.size(-2)
    assert w == mask.size(-1)
    assert 1 == x.ndimension() == y.ndimension()
    assert device == patch.device == x.device == y.device == mask.device
    x = x.long()
    y = y.long()

    # dynamically pad background for patches that go over borders
    left = min(x.min().abs().item(), 0)
    top = min(y.min().abs().item(), 0)
    right = max(x.max().item() + w - W, 0)
    bottom = max(y.max().item() + h - H, 0)
    background = nn.functional.pad(background, pad=[left, right, top, bottom])

    # generate indices
    gridb, gridc, gridy, gridx = torch.meshgrid(
        torch.arange(b, device=device),
        torch.arange(c, device=device),
        torch.arange(h, device=device),
        torch.arange(w, device=device)
    )
    x = x.view(b, 1, 1, 1).repeat(1, c, h, w)
    y = y.view(b, 1, 1, 1).repeat(1, c, h, w)
    x = x + gridx + left
    y = y + gridy + top

    # we need to ignore negative indices, or pasted conent will be rolled to the other side
    mask = mask * (x >= 0) * (y >= 0)
    # paste
    one = torch.tensor(1, dtype=mask.dtype)
    background[(gridb, gridc, y, x)] = mask * patch + (one - mask) * background[(gridb, gridc, y, x)]
    # crop away the padded regions
    background = background[..., top:(top + H), left:(left + W)]
    return background
Ejemplo n.º 26
0
    def forward(
            self,  # type: ignore
            label_indices: torch.LongTensor,
            token_representations: torch.FloatTensor = None,
            raw_tokens: List[List[str]] = None,
            labels: torch.LongTensor = None,
            **kwargs) -> Dict[str, torch.Tensor]:
        """
        If ``token_representations`` is provided, ``tokens`` is not required. If
        ``token_representations`` is ``None``, then ``tokens`` is required.

        Parameters
        ----------
        label_indices : torch.LongTensor
            A LongTensor of shape (batch_size, max_num_adpositions) with the tokens
            to predict a label for for each element (sentence) in the batch.
        token_representations : torch.FloatTensor, optional (default = None)
            A tensor of shape (batch_size, sequence_length, representation_dim) with
            the represenatation of the first token. If None, we use a contextualizer
            within this model to produce the token representation.
        raw_tokens : List[List[str]], optional (default = None)
            A batch of lists with the raw token strings. Used to compute
            token_representations, if either are None.
        labels : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels
            of shape ``(batch_size, num_label_indices)``.

        Returns
        -------
        An output dictionary consisting of:
        logits : torch.FloatTensor
            A tensor of shape ``(batch_size, num_label_indices,
            num_classes)`` representing unnormalized log probabilities
            of the classes.
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_label_indices,
            num_classes)`` representing a distribution of the tag classes.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimized.
        """
        # Convert to LongTensor
        # TODO: add PR to ArrayField to preserve array types.
        label_indices = label_indices.long()
        if token_representations is None:
            if self._contextualizer is None:
                raise ConfigurationError(
                    "token_representation not provided as input to the model, and no "
                    "contextualizer was specified. Either add a contextualizer to your "
                    "dataset reader (preferred if your contextualizer is frozen) or to "
                    "this model (if you wish to train your contextualizer).")
            if raw_tokens is None:
                raise ValueError(
                    "Input raw_tokens is ``None`` --- make sure to set "
                    "include_raw_tokens in the DatasetReader to True.")
            if label_indices is None:
                raise ValueError("Did not recieve any token indices, needed "
                                 "if the contextualizer is within the model.")
            # Convert contextualizer output into a tensor
            # Shape: (batch_size, max_seq_len, representation_dim)
            token_representations, _ = pad_contextualizer_output(
                self._contextualizer(raw_tokens))

        # Move token representation to the same device as the
        # module (CPU or CUDA). TODO(nfliu): This only works if the module
        # is on one device.
        device = next(self._decoder._linear_layers[0].parameters()).device
        token_representations = token_representations.to(device)
        text_mask = get_text_mask_from_representations(token_representations)
        text_mask = text_mask.to(device)
        label_mask = self._get_label_mask_from_label_indices(label_indices)
        label_mask = label_mask.to(device)

        # Mask out the -1 padding in the label_indices, since that doesn't
        # work with indexing. Note that we can't 0 pad because 0 is actually
        # a valid label index, so we pad with -1 just for the purposes of
        # proper mask calculation and then convert to 0-padding by applying
        # the mask.
        label_indices = label_indices * label_mask

        # Encode the token representation.
        encoded_token_representations = self._encoder(token_representations,
                                                      text_mask)

        batch_size = label_indices.size(0)
        # Index into the encoded_token_representations to get tensors corresponding
        # to the representations of the tokens to predict labels for.
        # Shape: (batch_size, num_label_indices, representation_dim)
        range_vector = get_range_vector(
            batch_size, get_device_of(label_indices)).unsqueeze(1)
        selected_token_representations = encoded_token_representations[
            range_vector, label_indices]
        selected_token_representations = selected_token_representations.contiguous(
        )

        # Decode out a label from the token representation
        # Shape: (batch_size, num_label_indices, num_classes)
        logits = self._decoder(selected_token_representations)
        class_probabilities = F.softmax(logits, dim=-1)
        output_dict = {
            "logits": logits,
            "class_probabilities": class_probabilities
        }
        if labels is not None:
            loss = sequence_cross_entropy_with_logits(
                logits, labels, label_mask, average=self.loss_average)
            for name, metric in self.metrics.items():
                # When not running in error analysis mode, skip
                # metrics that start with "_"
                if not self.error_analysis and name.startswith("_"):
                    continue
                metric(logits, labels, label_mask.float())
            output_dict["loss"] = loss
        return output_dict
    def forward(
            self,  # type: ignore
            tokens: Dict[str, torch.LongTensor],
            verb_indicator: torch.LongTensor,
            target_index: torch.LongTensor,
            span_starts: torch.LongTensor,
            span_ends: torch.LongTensor,
            span_mask: torch.LongTensor,
            constituents: torch.LongTensor = None,
            tags: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        verb_indicator: torch.LongTensor, required.
            An integer ``SequenceFeatureField`` representation of the position of the verb
            in the sentence. This should have shape (batch_size, num_tokens) and importantly, can be
            all zeros, in the case that the sentence has no verbal predicate.
        bio : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels
            of shape ``(batch_size, num_tokens)``
        tags: shape ``(batch_size, num_spans)``
        span_starts: shape ``(batch_size, num_spans)``
        span_ends: shape ``(batch_size, num_spans)``

        Returns
        -------
        An output dictionary consisting of:
        logits : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            unnormalised log probabilities of the tag classes.
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            a distribution of the tag classes per word.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.

        """
        self.batch += 1
        embedded_text_input = self.embedding_dropout(
            self.text_field_embedder(tokens))
        batch_size = embedded_text_input.size(0)
        text_mask = util.get_text_field_mask(tokens)
        embedded_verb_indicator = self.binary_feature_embedding(
            verb_indicator.long())
        # Concatenate the verb feature onto the embedded text. This now
        # has shape (batch_size, sequence_length, embedding_dim + binary_feature_dim).
        embedded_text_with_verb_indicator = torch.cat(
            [embedded_text_input, embedded_verb_indicator], -1)
        embedding_dim_with_binary_feature = embedded_text_with_verb_indicator.size(
        )[2]

        if self.stacked_encoder.get_input_dim(
        ) != embedding_dim_with_binary_feature:
            raise ConfigurationError(
                "The SRL model uses an indicator feature, which makes "
                "the embedding dimension one larger than the value "
                "specified. Therefore, the 'input_dim' of the stacked_encoder "
                "must be equal to total_embedding_dim + 1.")
        encoded_text = self.stacked_encoder(embedded_text_with_verb_indicator,
                                            text_mask)

        span_starts = F.relu(span_starts.float()).long().view(batch_size, -1)
        span_ends = F.relu(span_ends.float()).long().view(batch_size, -1)
        target_index = F.relu(target_index.float()).long().view(batch_size)
        # shape (batch_size, sequence_length * max_span_width, embedding_dim)
        span_embeddings = span_srl_util.compute_span_representations(
            self.max_span_width, encoded_text, target_index, span_starts,
            span_ends, self.span_width_embedding,
            self.span_direction_embedding, self.span_distance_embedding,
            self.span_distance_bin, self.head_scorer)
        span_scores = self.span_feedforward(span_embeddings)

        srl_logits = self.srl_arg_projection_layer(span_scores)
        constit_logits = self.constit_arg_projection_layer(span_scores)
        output_dict = {
            "srl_logits": srl_logits,
            "constit_logits": constit_logits,
            "mask": text_mask
        }

        tags = tags.view(batch_size, -1, self.max_span_width)
        constituents = constituents.view(batch_size, -1, self.max_span_width)

        # Viterbi decoding
        if not self.training or (self.training and not self.fast_mode):
            srl_prediction, srl_probabilities = self.semi_crf.viterbi_tags(
                srl_logits, text_mask)
            output_dict["srl_tags"] = srl_prediction
            output_dict["srl_tag_probabilities"] = srl_probabilities
            self.metrics["srl"](predictions=srl_prediction.view(
                batch_size, -1, self.max_span_width),
                                gold_labels=tags,
                                mask=text_mask)

            reshaped_constit_logits = constit_logits.view(
                -1, self.num_constit_tags)
            constit_probabilities = F.softmax(reshaped_constit_logits, dim=-1)
            constit_predictions = constit_probabilities.max(-1)[1]
            output_dict["constit_tags"] = constit_predictions
            output_dict["constit_probabilities"] = constit_probabilities

            constit_predictions = constit_predictions.view(
                batch_size, -1, self.max_span_width)
            self.metrics["constituents"](predictions=constit_predictions,
                                         gold_labels=constituents,
                                         mask=text_mask)

        # Loss computation
        if self.training or (not self.training and not self.fast_mode):
            if tags is not None:
                srl_log_likelihood, _ = self.semi_crf(srl_logits,
                                                      tags,
                                                      mask=text_mask)
                output_dict["srl_loss"] = -srl_log_likelihood
            if constituents is not None:
                # Flattening it out.
                constituents = constituents.view(batch_size, -1)
                constit_loss = util.sequence_cross_entropy_with_logits(
                    constit_logits, constituents, span_mask)
                output_dict["constit_loss"] = constit_loss
            if tags is not None and constituents is not None:
                if self.batch > self.cutoff_batch:
                    output_dict["loss"] = - srl_log_likelihood + self.mixing_ratio * \
                        constit_loss
                else:
                    output_dict["loss"] = -srl_log_likelihood
        if self.fast_mode and not self.training:
            output_dict["loss"] = Variable(torch.FloatTensor([0.00]))

        return output_dict
Ejemplo n.º 28
0
    def forward(
            self,  # type: ignore
            tokens: Dict[str, torch.LongTensor],
            verb_indicator: torch.LongTensor,
            tags: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        verb_indicator: torch.LongTensor, required.
            An integer ``SequenceFeatureField`` representation of the position of the verb
            in the sentence. This should have shape (batch_size, num_tokens) and importantly, can be
            all zeros, in the case that the sentence has no verbal predicate.
        tags : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels
            of shape ``(batch_size, num_tokens)``

        Returns
        -------
        An output dictionary consisting of:
        logits : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            unnormalised log probabilities of the tag classes.
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            a distribution of the tag classes per word.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.

        """
        embedded_text_input = self.embedding_dropout(
            self.text_field_embedder(tokens))
        mask = get_text_field_mask(tokens)
        embedded_verb_indicator = self.binary_feature_embedding(
            verb_indicator.long())
        # Concatenate the verb feature onto the embedded text. This now
        # has shape (batch_size, sequence_length, embedding_dim + binary_feature_dim).
        embedded_text_with_verb_indicator = torch.cat(
            [embedded_text_input, embedded_verb_indicator], -1)
        batch_size, sequence_length, embedding_dim_with_binary_feature = embedded_text_with_verb_indicator.size(
        )

        if self.stacked_encoder.get_input_dim(
        ) != embedding_dim_with_binary_feature:
            raise ConfigurationError(
                "The SRL model uses an indicator feature, which makes "
                "the embedding dimension one larger than the value "
                "specified. Therefore, the 'input_dim' of the stacked_encoder "
                "must be equal to total_embedding_dim + 1.")

        encoded_text = self.stacked_encoder(embedded_text_with_verb_indicator,
                                            mask)

        logits = self.tag_projection_layer(encoded_text)
        reshaped_log_probs = logits.view(-1, self.num_classes)
        class_probabilities = F.softmax(reshaped_log_probs).view(
            [batch_size, sequence_length, self.num_classes])
        output_dict = {
            "logits": logits,
            "class_probabilities": class_probabilities,
            "encoded_text": encoded_text
        }
        if tags is not None:
            loss = sequence_cross_entropy_with_logits(logits, tags, mask)
            self.span_metric(class_probabilities, tags, mask)
            output_dict["loss"] = loss

        # We need to retain the mask in the output dictionary
        # so that we can crop the sequences to remove padding
        # when we do viterbi inference in self.decode.
        output_dict["mask"] = mask
        return output_dict
Ejemplo n.º 29
0
    def forward(
            self,  # type: ignore
            tokens: Dict[str, torch.LongTensor],
            verb_span: torch.LongTensor,
            entity_span: torch.LongTensor,
            state_change_type_labels: torch.LongTensor = None,
            state_change_tags: torch.LongTensor = None
    ) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        verb_span: torch.LongTensor, required.
            An integer ``SequenceLabelField`` representation of the position of the focus verb
            in the sentence. This should have shape (batch_size, num_tokens) and importantly, can be
            all zeros, in the case that pre-processing stage could not extract a verbal predicate.
        entity_span: torch.LongTensor, required.
            An integer ``SequenceLabelField`` representation of the position of the focus entity
            in the sentence. This should have shape (batch_size, num_tokens) 
        state_change_type_labels: torch.LongTensor, optional (default = None)
            A torch tensor representing the state change type class labels of shape ``(batch_size, 1)???
        state_change_tags : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels
            of shape ``(batch_size, num_tokens)``
            In the first implementation we focus only on state_change_types.

        Returns
        -------
        An output dictionary consisting of:
        type_probs : torch.FloatTensor
            A tensor of shape ``(batch_size, num_state_change_types)`` representing
            a distribution of state change types per datapoint.
        tags_class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_state_change_types, num_tokens)`` representing
            a distribution of location tags per token in a sentence.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.
        """

        # Layer 1 = Word + Character embedding layer
        embedded_sentence = self.text_field_embedder(tokens)
        mask = get_text_field_mask(tokens).float()

        # Layer 2 = Add positional bit to encode position of focus verb and entity
        embedded_sentence_verb_entity = \
            torch.cat([embedded_sentence, verb_span.float().unsqueeze(-1), entity_span.float().unsqueeze(-1)], dim=-1)

        # Layer 3 = Contextual embedding layer using Bi-LSTM over the sentence
        contextual_embedding = self.seq2seq_encoder(
            embedded_sentence_verb_entity, mask)

        # Layer 4: Attention (Contextual embedding, BOW(verb span))
        verb_weight_matrix = verb_span.float() / (
            verb_span.float().sum(-1).unsqueeze(-1) + 1e-13)
        verb_vector = weighted_sum(
            contextual_embedding * verb_span.float().unsqueeze(-1),
            verb_weight_matrix)
        entity_weight_matrix = entity_span.float() / (
            entity_span.float().sum(-1).unsqueeze(-1) + 1e-13)
        entity_vector = weighted_sum(
            contextual_embedding * entity_span.float().unsqueeze(-1),
            entity_weight_matrix)
        verb_entity_vector = torch.cat([verb_vector, entity_vector], 1)
        batch_size, sequence_length, binary_feature_dim = verb_span.float(
        ).unsqueeze(-1).size()

        # attention weights for type prediction
        attention_weights_types = self.attention_layer(verb_entity_vector,
                                                       contextual_embedding)
        attention_output_vector = weighted_sum(contextual_embedding,
                                               attention_weights_types)

        # contextual embedding + positional vectors for tag prediction
        context_positional_tags = torch.cat([
            contextual_embedding,
            verb_span.float().unsqueeze(-1),
            entity_span.float().unsqueeze(-1)
        ],
                                            dim=-1)

        # Layer 5 = Dense softmax layer to pick one state change type per datapoint,
        # and one tag per word in the sentence
        type_logits = self.aggregate_feedforward(attention_output_vector)
        type_probs = torch.nn.functional.softmax(type_logits, dim=-1)

        tags_logits = self.tag_projection_layer(context_positional_tags)
        reshaped_log_probs = tags_logits.view(-1, self.num_tags)
        tags_class_probabilities = F.softmax(reshaped_log_probs, dim=-1).view(
            [batch_size, sequence_length, self.num_tags])

        # Create output dictionary for the trainer
        # Compute loss and epoch metrics
        output_dict = {'type_probs': type_probs}
        if state_change_type_labels is not None:
            state_change_type_labels_loss = self._loss(
                type_logits,
                state_change_type_labels.long().view(-1))
            for type_label in self.type_labels_vocab.values():
                metric = self.type_f1_metrics["type_" + type_label]
                metric(type_probs, state_change_type_labels.squeeze(-1))

            self._type_accuracy(type_probs,
                                state_change_type_labels.squeeze(-1))

        if state_change_tags is not None:
            state_change_tags_loss = sequence_cross_entropy_with_logits(
                tags_logits, state_change_tags, mask)
            self.span_metric(tags_class_probabilities, state_change_tags, mask)
            output_dict["tags_class_probabilities"] = tags_class_probabilities

        output_dict['loss'] = (state_change_type_labels_loss +
                               state_change_tags_loss)

        return output_dict
Ejemplo n.º 30
0
    def forward(
            self,  # type: ignore
            tokens: Dict[str, torch.LongTensor],
            targets: torch.LongTensor,
            target_index: torch.LongTensor,
            span_starts: torch.LongTensor,
            span_ends: torch.LongTensor,
            tags: torch.LongTensor = None,
            **kwargs) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        verb_indicator: torch.LongTensor, required.
            An integer ``SequenceFeatureField`` representation of the position of the verb
            in the sentence. This should have shape (batch_size, num_tokens) and importantly, can be
            all zeros, in the case that the sentence has no verbal predicate.
        bio : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels
            of shape ``(batch_size, num_tokens)``
        tags: shape ``(batch_size, num_spans)``
        span_starts: shape ``(batch_size, num_spans)``
        span_ends: shape ``(batch_size, num_spans)``

        Returns
        -------
        An output dictionary consisting of:
        logits : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            unnormalised log probabilities of the tag classes.
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            a distribution of the tag classes per word.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.

        """
        embedded_text_input = self.embedding_dropout(
            self.text_field_embedder(tokens))
        text_mask = util.get_text_field_mask(tokens)

        embedded_verb_indicator = self.binary_feature_embedding(targets.long())
        # Concatenate the verb feature onto the embedded text. This now
        # has shape (batch_size, sequence_length, embedding_dim + binary_feature_dim).
        embedded_text_with_verb_indicator = torch.cat(
            [embedded_text_input, embedded_verb_indicator], -1)
        embedding_dim_with_binary_feature = embedded_text_with_verb_indicator.size(
        )[2]

        if self.stacked_encoder.get_input_dim(
        ) != embedding_dim_with_binary_feature:
            raise ConfigurationError(
                "The SRL model uses an indicator feature, which makes "
                "the embedding dimension one larger than the value "
                "specified. Therefore, the 'input_dim' of the stacked_encoder "
                "must be equal to total_embedding_dim + 1.")

        encoded_text = self.stacked_encoder(embedded_text_with_verb_indicator,
                                            text_mask)

        batch_size, num_spans = tags.size()
        assert num_spans % self.max_span_width == 0
        tags = tags.view(batch_size, -1, self.max_span_width)

        span_starts = F.relu(span_starts.float()).long().view(batch_size, -1)
        span_ends = F.relu(span_ends.float()).long().view(batch_size, -1)
        target_index = F.relu(target_index.float()).long().view(batch_size)

        # shape (batch_size, sequence_length * max_span_width, embedding_dim)
        span_embeddings = span_srl_util.compute_span_representations(
            self.max_span_width, encoded_text, target_index, span_starts,
            span_ends, self.span_width_embedding,
            self.span_direction_embedding, self.span_distance_embedding,
            self.span_distance_bin, self.head_scorer)
        span_scores = self.span_feedforward(span_embeddings)

        # FN-specific parameters.
        fn_args = []
        for extra_arg in ['frame', 'valid_frame_elements']:
            if extra_arg in kwargs and kwargs[extra_arg] is not None:
                fn_args.append(kwargs[extra_arg])

        if fn_args:  # FrameSRL batch.
            frame, valid_frame_elements = fn_args
            output_dict = self.compute_srl_graph(
                span_scores=span_scores,
                frame=frame,
                valid_frame_elements=valid_frame_elements,
                tags=tags,
                text_mask=text_mask,
                target_index=target_index)
        else:  # Scaffold batch.
            if "span_mask" in kwargs and kwargs["span_mask"] is not None:
                span_mask = kwargs["span_mask"]
            if "parent_tags" in kwargs and kwargs["parent_tags"] is not None:
                parent_tags = kwargs["parent_tags"]
            if self.unlabeled_constits:
                not_a_constit = self.vocab.get_token_index(
                    "*", self.constit_label_namespace)
                tags = (tags != not_a_constit).float().view(
                    batch_size, -1, self.max_span_width)
            elif self.constit_label_namespace == "parent_labels":
                tags = parent_tags.view(batch_size, -1, self.max_span_width)
            elif self.np_pp_constits:
                tags = self.get_new_tags_np_pp(tags, batch_size)
            output_dict = self.compute_constit_graph(span_mask=span_mask,
                                                     span_scores=span_scores,
                                                     constit_tags=tags,
                                                     text_mask=text_mask)

        if self.fast_mode and not self.training:
            output_dict["loss"] = Variable(torch.FloatTensor([0.00]))

        return output_dict
Ejemplo n.º 31
0
    def forward(self,  # type: ignore
                tokens: Dict[str, torch.LongTensor],
                verb_indicator: torch.LongTensor,
                tags: torch.LongTensor = None,
                metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        verb_indicator: torch.LongTensor, required.
            An integer ``SequenceFeatureField`` representation of the position of the verb
            in the sentence. This should have shape (batch_size, num_tokens) and importantly, can be
            all zeros, in the case that the sentence has no verbal predicate.
        tags : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels
            of shape ``(batch_size, num_tokens)``
        metadata : ``List[Dict[str, Any]]``, optional, (default = None)
            metadata containg the original words in the sentence and the verb to compute the
            frame for, under 'words' and 'verb' keys, respectively.

        Returns
        -------
        An output dictionary consisting of:
        logits : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            unnormalised log probabilities of the tag classes.
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            a distribution of the tag classes per word.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.

        """
        embedded_text_input = self.embedding_dropout(self.text_field_embedder(tokens))
        mask = get_text_field_mask(tokens)
        embedded_verb_indicator = self.binary_feature_embedding(verb_indicator.long())
        # Concatenate the verb feature onto the embedded text. This now
        # has shape (batch_size, sequence_length, embedding_dim + binary_feature_dim).
        embedded_text_with_verb_indicator = torch.cat([embedded_text_input, embedded_verb_indicator], -1)
        batch_size, sequence_length, _ = embedded_text_with_verb_indicator.size()

        encoded_text = self.encoder(embedded_text_with_verb_indicator, mask)

        logits = self.tag_projection_layer(encoded_text)
        reshaped_log_probs = logits.view(-1, self.num_classes)
        class_probabilities = F.softmax(reshaped_log_probs, dim=-1).view([batch_size,
                                                                          sequence_length,
                                                                          self.num_classes])
        output_dict = {"logits": logits, "class_probabilities": class_probabilities}
        if tags is not None:
            loss = sequence_cross_entropy_with_logits(logits,
                                                      tags,
                                                      mask,
                                                      label_smoothing=self._label_smoothing)
            if not self.ignore_span_metric:
                self.span_metric(class_probabilities, tags, mask)
            output_dict["loss"] = loss

        # We need to retain the mask in the output dictionary
        # so that we can crop the sequences to remove padding
        # when we do viterbi inference in self.decode.
        output_dict["mask"] = mask

        words, verbs = zip(*[(x["words"], x["verb"]) for x in metadata])
        if metadata is not None:
            output_dict["words"] = list(words)
            output_dict["verb"] = list(verbs)
        return output_dict