def __getitem__(self, idx): uttname = self.uttlist[idx] info_filename = "{}/data/{}.txt".format(self.data_dir, uttname) assert os.path.exists(info_filename) with open(info_filename, 'r') as fh: info = fh.readline().strip('\n') info_split = info.split(None, 3) # each record has 4 fields # (1)uttname (2)uttdur (3)label file (4)feature file feat_file, label_file = info_split[3], info_split[2] # compute STFT feature data, samplerate = kaldi_data.load_wav(feat_file) Y = feature.stft(data, self.frame_size, self.frame_shift) feat = feature.transform(Y, self.input_transform) # prepare diarization label label = self.process_label_file(label_file) second_per_frame = self.frame_shift * 1.0 / self.rate label[:, :2] = (label[:, :2] / second_per_frame).astype(int) label[:, 2] = label[:, 2] + 1 if len(label) > self.padded_len: print( "Warning: length of {} exceeds padded length".format(uttname)) label = label[:self.padded_len, :] label_padded = np.zeros((self.padded_len, 3)) label_padded[:len(label), :] = label return uttname, feat, label_padded, len(label)
def __getitem__(self, idx): uttname = self.uttlist[idx] data, samplerate = kaldi_data.load_wav(self.utt2ark[uttname]) Y = feature.stft(data, self.frame_size, self.frame_shift) feat = feature.transform(Y, self.input_transform) seg_list = self.utt2seg[uttname] label = self.process_label(seg_list) second_per_frame = self.frame_shift * 1.0 / self.rate label[:, :2] = (label[:, :2] / second_per_frame).astype(int) label[:, 2] = label[:, 2] + 1 return uttname, feat, label