def forward(self,
                paragraph: torch.FloatTensor,
                query: torch.FloatTensor,
                dm: torch.FloatTensor = None,
                qm: torch.FloatTensor = None) -> torch.FloatTensor:
        batch_size = paragraph.size(0)
        passage_length = paragraph.size(1)
        encoding_dim = paragraph.size(2)

        passage_question_similarity = self.matrix_attention(paragraph, query).squeeze(-1)

        passage_question_attention = last_dim_softmax(passage_question_similarity, qm)
        passage_question_vectors = weighted_sum(query, passage_question_attention)
        masked_similarity = replace_masked_values(passage_question_similarity,
                                                  qm.unsqueeze(1),
                                                  -1e7)

        question_passage_similarity = masked_similarity.max(dim=-1)[0].squeeze(-1)
        question_passage_attention = masked_softmax(question_passage_similarity, dm)
        question_passage_vector = weighted_sum(paragraph, question_passage_attention)
        # Shape: (batch_size, passage_length, encoding_dim)
        tiled_question_passage_vector = question_passage_vector.unsqueeze(1).expand(batch_size,
                                                                                    passage_length,
                                                                                    encoding_dim)

        return passage_question_vectors, tiled_question_passage_vector
Beispiel #2
0
 def forward(self, query, value, mask=None):
     intermediate = value.bmm(query.unsqueeze(-1)).squeeze(-1)
     if mask is not None:
         score = masked_softmax(intermediate, mask)
     else:
         score = F.softmax(intermediate, dim=-1)
     return score
Beispiel #3
0
 def forward(self, query, value, mask=None):
     intermediate = query.mm(self._weight_matrix).unsqueeze(1)
     intermediate = torch.tanh(intermediate.bmm(value.transpose(1, 2)).squeeze(1) + self._bias)
     if mask is not None:
         score = masked_softmax(intermediate, mask)
     else:
         score = F.softmax(intermediate, dim=-1)
     return score
Beispiel #4
0
 def forward(self, query, value, mask=None):
     q_norm = query / (query.norm(p=2, dim=-1, keepdim=True) + 1e-13)
     v_norm = value / (value.norm(p=2, dim=-1, keepdim=True) + 1e-13)
     intermediate = torch.bmm(q_norm.unsqueeze(1), v_norm.transpose(1, 2)).squeeze(1)
     if mask is not None:
         score = masked_softmax(intermediate, mask)
     else:
         score = F.softmax(intermediate, dim=-1)
     return score
Beispiel #5
0
 def forward(self, query, value, mask=None):
     hidden = self.linear_query(query).unsqueeze(1) \
              + self.linear_value(value)
     intermediate = self.tanh(hidden)
     intermediate = self.linear_hidden(intermediate).squeeze(-1)
     if mask is not None:
         score = masked_softmax(intermediate, mask)
     else:
         score = F.softmax(intermediate, dim=-1)
     return score
    def forward(self,
                paragraph, query,
                char_paragraph=None,
                char_query=None,
                dm=None, qm=None,
                sent_start=None, sent_end=None,
                sent_mask=None):
        batch_size = paragraph.size(0)
        paragraph_length = paragraph.size(1)

        passage_encoded, _ = self.embedding_layer(paragraph, char_paragraph, batch_size, mask=dm)
        query_encoded, query_hidden_states = self.embedding_layer(query, char_query, batch_size, mask=qm)

        query2context, context2query = self.attention_flow(passage_encoded, query_encoded, dm=dm, qm=qm)
        attention_flow_vectors = torch.cat([passage_encoded, query2context,
                                            passage_encoded * query2context,
                                            passage_encoded * context2query], dim=-1)

        if self.sent_attention:
            sentence_encode_input = relu(self.start_residual_encoding(attention_flow_vectors))

            sent_represent_matrix = self.word_aligned_sentence_encoder_layer(sentence_encode_input,
                                                                             sent_start, sent_end,
                                                                             sent_mask,
                                                                             query_hidden_state=query_hidden_states)
            sent2context, context2sent = self.sent_attention_flow(sentence_encode_input,
                                                                  sent_represent_matrix, dm=dm, qm=sent_mask)
            sent_attention_flow_vectors = torch.cat([sentence_encode_input, sent2context,
                                                     sentence_encode_input * sent2context,
                                                     sentence_encode_input * context2sent], dim=-1)
            output_sent_attn_vectors = relu(self.end_residual_encoding(sent_attention_flow_vectors))
            model_layer_input = sentence_encode_input + output_sent_attn_vectors
        else:
            model_layer_input = attention_flow_vectors

        model_hidden_state = self.model_layer.init_hidden(batch_size)
        modeled_passage, _ = self.model_layer(model_layer_input, mask=dm, hidden_state=model_hidden_state)
        modeled_passage_dim = modeled_passage.size(-1)

        start_logits_input = torch.cat([model_layer_input, modeled_passage], dim=-1)
        start_logits = self.start_logits_linear(start_logits_input).squeeze(-1)

        start_probs = masked_softmax(start_logits, dm)
        span_start_representation = weighted_sum(modeled_passage, start_probs)
        tiled_start_representation = span_start_representation.unsqueeze(1).expand(batch_size,
                                                                                   paragraph_length,
                                                                                   modeled_passage_dim)

        encode_end_inputs = torch.cat([model_layer_input, modeled_passage, tiled_start_representation,
                                       modeled_passage * tiled_start_representation], dim=-1)

        encode_end_hidden_state = self.end_index_encoder_layer.init_hidden(batch_size)
        encoded_end_logits, _ = self.end_index_encoder_layer(encode_end_inputs, mask=dm, hidden_state=encode_end_hidden_state)

        end_logits_input = torch.cat([model_layer_input, encoded_end_logits], dim=-1)
        end_logits = self.end_logits_linear(end_logits_input).squeeze(-1)
        end_probs = masked_softmax(end_logits, dm)
        start_log = masked_log_softmax(start_logits, dm)
        end_log = masked_log_softmax(end_logits, dm)
        start_logits = replace_masked_values(start_logits, dm, -1e7)
        end_logits = replace_masked_values(end_logits, dm, -1e7)

        output_dict = {"span_start_logits": start_logits,
                       "span_start_probs": start_probs,
                       "span_end_logits": end_logits,
                       "span_end_probs": end_probs,
                       "span_start_log": start_log,
                       "span_end_log": end_log}

        return output_dict