Ejemplo n.º 1
0
    def embed_target(self, target_ids, seq_lens, mask_lm=True):
        target_ids = get_tensor_from_array(target_ids).long()
        embedding_output = self.pretrained_word_embeddings(target_ids)

        predict_mask = None
        recon_target = None
        if mask_lm:
            input_mask, predict_mask = get_mlm_masks(target_ids,
                                                     self.mask_prob,
                                                     self.mask_but_no_prob)
            input_mask = input_mask.unsqueeze(2)
            embedding_output = self.use_pretrained_mask_embedding(
                embedding_output, input_mask)
            recon_target = embedding_output
            recon_target = self.pad_front(recon_target, 0)

        target_ids = self.pad_front(target_ids, 0)
        embedding_output, seq_lens = self.wrap_with_embeddings(
            embedding_output, seq_lens)
        embedding_output = self.add_beside_word_embeddings(embedding_output)
        embedding_output = self.pretrained_embedding_layer_norm(
            embedding_output)

        attention_mask = get_attention_mask(seq_lens, target_ids.shape[1])
        if predict_mask is not None:
            predict_mask = self.pad_front(predict_mask, 1)
        return recon_target, embedding_output, attention_mask, predict_mask
Ejemplo n.º 2
0
 def predict(self, frame_feat, lens):
     frame_feat = get_tensor_from_array(frame_feat)
     mask = get_attention_mask(lens, frame_feat.shape[1])
     x = self.feat_embeddings(frame_feat)
     x = self.positional_encoding(x)
     outputs = self.forward(x, mask)
     outputs = self.target_out_layer(outputs)
     return outputs
Ejemplo n.º 3
0
 def finetune_loss(self, frame_feat, frame_label, lens):
     outputs = self.predict(frame_feat, lens)
     outputs = outputs.transpose(1, 2)
     frame_label = get_tensor_from_array(frame_label).long()
     loss = nn.CrossEntropyLoss(reduction='none')(outputs, frame_label)
     mask = get_attention_mask(lens, frame_feat.shape[1])
     loss = masked_reduce_mean(loss, mask)
     loss = loss.mean()
     return loss
Ejemplo n.º 4
0
 def test_create_attention_mask(self):
     seq_lens = np.array([3, 5, 1, 0, 9, 100])
     max_len = 9
     mask = get_attention_mask(seq_lens, max_len)
     mask = mask.data.numpy()
     ans = np.array([
         [1, 1, 1, 0, 0, 0, 0, 0, 0],
         [1, 1, 1, 1, 1, 0, 0, 0, 0],
         [1, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [1, 1, 1, 1, 1, 1, 1, 1, 1],
         [1, 1, 1, 1, 1, 1, 1, 1, 1],
     ])
     self.assertTrue(np.all(mask == ans))
Ejemplo n.º 5
0
    def embed_feats(self, feats, seq_lens, mask_lm=True):
        feats = get_tensor_from_array(feats)
        embedding_output = self.feat_embeddings(feats)
        predict_mask = None
        if mask_lm:
            input_mask, predict_mask = get_mlm_masks(feats, self.mask_prob,
                                                     self.mask_but_no_prob)
            embedding_output = self.use_pretrained_mask_embedding(
                embedding_output, input_mask)

        feats = self.pad_front(feats, 0)
        embedding_output, seq_lens = self.wrap_with_embeddings(
            embedding_output, seq_lens)
        embedding_output = self.add_beside_word_embeddings(embedding_output)

        attention_mask = get_attention_mask(seq_lens, feats.shape[1])
        if predict_mask is not None:
            predict_mask = self.pad_front(predict_mask, 1)
        return feats, embedding_output, attention_mask, predict_mask
Ejemplo n.º 6
0
    def pretrain_loss(self, input_feats, seq_lens):
        input_feats = get_tensor_from_array(input_feats)
        attention_mask = get_attention_mask(seq_lens, input_feats.shape[1])
        input_mask, predict_mask = get_mlm_masks(input_feats, self.mask_prob,
                                                 self.mask_but_no_prob)

        masked_input_feats = input_mask * input_feats + (
            1 - input_mask) * self.feat_mask_vec
        masked_input_feats *= attention_mask.unsqueeze(
            2)  # taking care of the paddings

        x = self.feat_embeddings(masked_input_feats)
        x = self.positional_encoding(x)
        output = self.forward(x, attention_mask)
        output = self.feat_out_layer(output)

        to_predict = (1 -
                      predict_mask.squeeze()) * attention_mask  # shape: (N, T)
        loss = cpc_loss(output, input_feats, to_predict, attention_mask)
        return loss