def forward(self, metadata: Dict[str, torch.Tensor], bert0: Dict[str, torch.Tensor], bert1: Dict[str, torch.Tensor], label: Optional[torch.Tensor] = None ) -> Dict[str, torch.Tensor]: # Every sample in a batch has to have the same size (as it's a tensor), # so smaller entries are padded. The mask is used to counteract this # padding. t0_masks = util.get_text_field_mask(bert0) t1_masks = util.get_text_field_mask(bert1) # We create the embeddings from the input text t0_embs = self.word_embeddings(bert0) t1_embs = self.word_embeddings(bert1) t0_sentence_encodings = self.encoder_dropout( seq_over_seq(self.sentence_encoder, t0_embs, t0_masks)) t1_sentence_encodings = self.encoder_dropout( seq_over_seq(self.sentence_encoder, t1_embs, t1_masks)) t0_enc_out = self.encoder_dropout( self.document_encoder(t0_sentence_encodings, mask=None)) t1_enc_out = self.encoder_dropout( self.document_encoder(t1_sentence_encodings, mask=None)) logit0 = self.dense(t0_enc_out).squeeze(-1) logit1 = self.dense(t1_enc_out).squeeze(-1) logits = torch.stack((logit0, logit1), dim=-1) # We also compute the class with highest likelihood (our prediction) prob = torch.softmax(logits, dim=-1) output = {"logits": logits, "prob": prob} # Labels are optional. If they're present, we calculate the accuracy # and the loss function. if label is not None: self.accuracy(prob, label) output["loss"] = self.loss(logits, label) # The output is the dict we've been building, with the logits, loss # and the prediction. return output
def forward(self, sentences: Dict[str, torch.Tensor]) -> torch.Tensor: sentences_msks = util.get_text_field_mask(sentences, num_wrapping_dims=1) sentences_embs = self.word_embeddings(sentences) sentences_embs = self.embedding_dropout(sentences_embs) sentences_encs = seq_over_seq(self.sentence_encoder, sentences_embs, sentences_msks) sentences_encs = self.encoder_dropout(sentences_encs) document_enc = self.document_encoder(sentences_encs, mask=None) document_enc = self.encoder_dropout(document_enc) mid = document_enc.size(2) // 2 document_enc = document_enc[:, :, :mid] + document_enc[:, :, mid:] return cast(torch.Tensor, document_enc)
def _forward_internal(self, bert: Dict[str, torch.Tensor], relations: Dict[str, torch.Tensor]) -> torch.Tensor: t_masks = util.get_text_field_mask(bert, num_wrapping_dims=1) t_embs = self.word_embeddings(bert) t_sentence_hiddens = self.encoder_dropout( hierarchical_seq_over_seq(self.sentence_encoder, t_embs, t_masks)) t_sentence_encodings = seq_over_seq(self.sentence_attn, t_sentence_hiddens) t_document_hiddens = self.encoder_dropout( self.document_encoder(t_sentence_encodings, mask=None)) t_document_encoding = self.document_attn(t_document_hiddens) logit = self.ffn(t_document_encoding) logit = self.norm(logit + t_document_encoding) logit = self.output(t_document_encoding).squeeze(-1) return cast(torch.Tensor, logit)
def _forward_internal(self, text: Dict[str, torch.Tensor], relations: Dict[str, torch.Tensor]) -> torch.Tensor: t_mask = util.get_text_field_mask(text) t_emb = self.word_embeddings(text) t_hiddens = self.encoder_dropout(self.text_encoder(t_emb, t_mask)) t_encoding = self.text_attn(t_hiddens) r_masks = util.get_text_field_mask(relations, num_wrapping_dims=1) r_embs = self.word_embeddings(relations) r_sentence_encodings = self.encoder_dropout( seq_over_seq(self.relation_encoder, r_embs, r_masks)) r_attn = self.relation_attn(vector=t_encoding, matrix=r_sentence_encodings) r_encoding = util.weighted_sum(r_sentence_encodings, r_attn) final = torch.cat((t_encoding, r_encoding), dim=-1) logit = self.output(final).squeeze(-1) return cast(torch.Tensor, logit)
def forward(self, metadata: Dict[str, torch.Tensor], bert0: Dict[str, torch.Tensor], bert1: Dict[str, torch.Tensor], p_a0_rel: Dict[str, torch.Tensor], p_a1_rel: Dict[str, torch.Tensor], label: Optional[torch.Tensor] = None ) -> Dict[str, torch.Tensor]: # Every sample in a batch has to have the same size (as it's a tensor), # so smaller entries are padded. The mask is used to counteract this # padding. r0_masks = util.get_text_field_mask(p_a0_rel, num_wrapping_dims=1) r1_masks = util.get_text_field_mask(p_a1_rel, num_wrapping_dims=1) r0_embs = self.rel_embeddings(p_a0_rel) r1_embs = self.rel_embeddings(p_a1_rel) r0_sentence_hiddens = self.encoder_dropout( hierarchical_seq_over_seq(self.relation_sentence_encoder, r0_embs, r0_masks)) r1_sentence_hiddens = self.encoder_dropout( hierarchical_seq_over_seq(self.relation_sentence_encoder, r1_embs, r1_masks)) r0_sentence_encodings = seq_over_seq(self.relation_sentence_attn, r0_sentence_hiddens) r1_sentence_encodings = seq_over_seq(self.relation_sentence_attn, r1_sentence_hiddens) # We create the embeddings from the input text t0_masks = util.get_text_field_mask(bert0, num_wrapping_dims=1) t1_masks = util.get_text_field_mask(bert1, num_wrapping_dims=1) t0_embs = self.word_embeddings(bert0) t1_embs = self.word_embeddings(bert1) t0_sentence_hiddens = self.encoder_dropout( hierarchical_seq_over_seq(self.sentence_encoder, t0_embs, t0_masks)) t1_sentence_hiddens = self.encoder_dropout( hierarchical_seq_over_seq(self.sentence_encoder, t1_embs, t1_masks)) t0_sentence_encodings = seq_over_seq(self.sentence_attn, t0_sentence_hiddens) t1_sentence_encodings = seq_over_seq(self.sentence_attn, t1_sentence_hiddens) # Joining Text and Knowledge t0_document_hiddens = self.relational_encoder( src=t0_sentence_encodings, kb=r0_sentence_encodings, ) t1_document_hiddens = self.relational_encoder( src=t1_sentence_encodings, kb=r1_sentence_encodings ) t0_document_encoding = self.document_attn(t0_document_hiddens) t1_document_encoding = self.document_attn(t1_document_hiddens) t0_final = t0_document_encoding t1_final = t1_document_encoding # Joining everything and getting the result logit0 = self.output(t0_final).squeeze(-1) logit1 = self.output(t1_final).squeeze(-1) logits = torch.stack((logit0, logit1), dim=-1) # We also compute the class with highest likelihood (our prediction) prob = torch.softmax(logits, dim=-1) output = {"logits": logits, "prob": prob} # Labels are optional. If they're present, we calculate the accuracy # and the loss function. if label is not None: self.accuracy(prob, label) output["loss"] = self.loss(logits, label) # The output is the dict we've been building, with the logits, loss # and the prediction. return output