def process_x_pad_batch(self, x_a_pad_batch, x_t_pad_batch): # preprocess with the acoustic inputs a_valid_batchid, a_inputs, a_mask_labels, a_attention_mask, a_labels = process_train_MAM_data( spec=(x_a_pad_batch, ), config=self.acoustic_config) # preprocess with the semantic inputs x_t_pad_batch = self.tokenizer.pad(x_t_pad_batch, return_tensors="pt") s_inputs, s_labels = self.mlm_collater.mask_tokens( x_t_pad_batch['input_ids']) s_attention_mask = x_t_pad_batch['attention_mask'] s_valid_batchid = torch.nonzero(torch.sum(s_labels != -100, dim=1), as_tuple=False).view(-1) #---------- process the valid batch id ----------# a_valid = torch.zeros(a_labels.size(0)) a_valid[a_valid_batchid] = 1 s_valid = torch.zeros(s_labels.size(0)) s_valid[s_valid_batchid] = 1 valid_batchid = a_valid.long() & s_valid.long() valid_batchid = torch.nonzero(valid_batchid, as_tuple=False).view(-1) #---------- valid assertation ----------# batch_is_valid = len(valid_batchid) > 0 #---------- acoustic features ----------# a_inputs = a_inputs[valid_batchid] a_mask_labels = a_mask_labels[valid_batchid] a_attention_mask = a_attention_mask[valid_batchid] a_labels = a_labels[valid_batchid] #---------- semantic features ----------# s_inputs = s_inputs[valid_batchid] s_attention_mask = s_attention_mask[valid_batchid] s_labels = s_labels[valid_batchid] return batch_is_valid, (a_inputs, a_mask_labels, a_attention_mask, a_labels), (s_inputs, s_attention_mask, s_labels)
def process_x_pad_batch(self, x_a_pad_batch, x_t_pad_batch): # preprocess with the acoustic inputs a_valid_batchid, a_inputs, a_mask_labels, a_attention_mask, a_labels = process_train_MAM_data( spec=(x_a_pad_batch, ), mask_proportion=self.mask_proportion, config=self.acoustic_config, tail_masking=False, main_random=self.main_random_noise, do_downsampling=False) # preprocess with the semantic inputs x_t_pad_batch = self.tokenizer.pad(x_t_pad_batch, return_tensors="pt") s_inputs, s_labels = mask_tokens(inputs=x_t_pad_batch['input_ids'], mlm_probability=self.mask_proportion, tokenizer=self.tokenizer, tail_masking=False, main_random=self.main_random_noise) s_attention_mask = x_t_pad_batch['attention_mask'] s_valid_batchid = torch.nonzero(torch.sum(s_labels != -100, dim=1), as_tuple=False).view(-1) # ---------- process the valid batch id ----------# a_valid = torch.zeros(a_labels.size(0)) a_valid[a_valid_batchid] = 1 s_valid = torch.zeros(s_labels.size(0)) s_valid[s_valid_batchid] = 1 valid_batchid = a_valid.long() & s_valid.long() valid_batchid = torch.nonzero(valid_batchid, as_tuple=False).view(-1) # ---------- valid assertation ----------# batch_is_valid = len(valid_batchid) > 0 # ---------- acoustic features ----------# a_inputs = a_inputs[valid_batchid] a_mask_labels = a_mask_labels[valid_batchid] a_attention_mask = a_attention_mask[valid_batchid] a_labels = a_labels[valid_batchid] # ---------- semantic features ----------# s_inputs = s_inputs[valid_batchid] s_attention_mask = s_attention_mask[valid_batchid] s_labels = s_labels[valid_batchid] text_raw = x_t_pad_batch['input_ids'][valid_batchid] return batch_is_valid, (a_inputs, a_mask_labels, a_attention_mask, a_labels), (s_inputs, s_attention_mask, s_labels, text_raw)
def process_x_pad_batch(self, x_a_pad_batch, x_t_pad_batch): # preprocess with the acoustic inputs a_valid_batchid, a_inputs, a_mask_labels, a_attention_mask, a_labels = process_train_MAM_data( spec=(x_a_pad_batch, ), mask_proportion=self.mask_proportion, config=self.acoustic_config, tail_masking=False, main_random=self.main_random_noise, do_downsampling=False) # preprocess with the semantic inputs s_inputs = x_t_pad_batch s_labels = x_t_pad_batch s_attention_mask = torch.ones_like(x_t_pad_batch[:, :, 0], dtype=torch.float) # ---------- process the valid batch id ----------# a_valid = torch.zeros(a_labels.size(0)) a_valid[a_valid_batchid] = 1 valid_batchid = a_valid.long() valid_batchid = torch.nonzero(valid_batchid, as_tuple=False).view(-1) # ---------- valid assertation ----------# batch_is_valid = len(valid_batchid) > 0 # ---------- acoustic features ----------# a_inputs = a_inputs[valid_batchid] a_mask_labels = a_mask_labels[valid_batchid] a_attention_mask = a_attention_mask[valid_batchid] a_labels = a_labels[valid_batchid] # ---------- semantic features ----------# s_inputs = s_inputs[valid_batchid] s_attention_mask = s_attention_mask[valid_batchid] s_labels = s_labels[valid_batchid] x_t_pad_batch = x_t_pad_batch[valid_batchid] return batch_is_valid, (a_inputs, a_mask_labels, a_attention_mask, a_labels), (s_inputs, s_attention_mask, s_labels, x_t_pad_batch)