def __getitem__(self, index): # Load acoustic feature and pad x_batch = [torch.FloatTensor(np.load(os.path.join(self.root, x_file))) for x_file in self.X[index]] x_pad_batch = pad_sequence(x_batch, batch_first=True) # Return (x_spec, speaker_label) s_batch = torch.LongTensor([self.speaker2idx[self.get_speaker_from_path(x_file)] for x_file in self.X[index]]) if self.run_mam: x_pad_batch = process_test_MAM_data(spec=(x_pad_batch,), config=self.mam_config) return x_pad_batch, s_batch
def __getitem__(self, index): # Load acoustic feature and pad x_batch = [torch.FloatTensor(np.load(os.path.join(self.root, x_file))) for x_file in self.X[index]] x_pad_batch = pad_sequence(x_batch, batch_first=True) p_batch = [torch.LongTensor(self.Y[self.parse_x_name(x_file)]) for x_file in self.X[index]] p_pad_batch = pad_sequence(p_batch, batch_first=True) x_match_batch, p_match_batch = self.match_sequence(x_pad_batch, p_pad_batch) # Return (x_spec, phone_label) if self.run_mam: x_match_batch = process_test_MAM_data(spec=(x_match_batch,), config=self.mam_config) return x_match_batch, p_match_batch
def __getitem__(self, index): # Load acoustic feature and pad x_batch = [torch.FloatTensor(np.load(os.path.join(self.npy_dir, x_file))) for x_file in self.X[index]] # [(seq, feature), ...] x_pad_batch = pad_sequence(x_batch, batch_first=True) # (batch, seq, feature) with all seq padded with zeros to align the longest seq in this batch truncate_length = self.config['truncate_length'] if x_pad_batch.size(1) > self.config['truncate_length']: x_pad_batch = x_pad_batch[:, :truncate_length, :] # Load label if self.config['label_mode'] == 'regression': y_batch = torch.FloatTensor(self.Y[index]) # (batch, ) else: y_batch = torch.LongTensor(self.Y[index]) # (batch, ) # y_broadcast_int_batch = y_batch.repeat(x_pad_batch.size(1), 1).T # (batch, seq) if self.run_mam: x_pad_batch = process_test_MAM_data(spec=(x_pad_batch,), config=self.mam_config) return x_pad_batch, y_batch