Ejemplo n.º 1
0
    def __getitem__(self, idx):
        uttname = self.uttlist[idx]
        info_filename = "{}/data/{}.txt".format(self.data_dir, uttname)
        assert os.path.exists(info_filename)
        with open(info_filename, 'r') as fh:
            info = fh.readline().strip('\n')
        info_split = info.split(None, 3)
        # each record has 4 fields
        # (1)uttname (2)uttdur (3)label file (4)feature file
        feat_file, label_file = info_split[3], info_split[2]

        # compute STFT feature
        data, samplerate = kaldi_data.load_wav(feat_file)
        Y = feature.stft(data, self.frame_size, self.frame_shift)
        feat = feature.transform(Y, self.input_transform)

        # prepare diarization label
        label = self.process_label_file(label_file)
        second_per_frame = self.frame_shift * 1.0 / self.rate
        label[:, :2] = (label[:, :2] / second_per_frame).astype(int)
        label[:, 2] = label[:, 2] + 1

        if len(label) > self.padded_len:
            print(
                "Warning: length of {} exceeds padded length".format(uttname))
            label = label[:self.padded_len, :]
        label_padded = np.zeros((self.padded_len, 3))
        label_padded[:len(label), :] = label
        return uttname, feat, label_padded, len(label)
Ejemplo n.º 2
0
    def __getitem__(self, idx):
        uttname = self.uttlist[idx]
        data, samplerate = kaldi_data.load_wav(self.utt2ark[uttname])
        Y = feature.stft(data, self.frame_size, self.frame_shift)
        feat = feature.transform(Y, self.input_transform)

        seg_list = self.utt2seg[uttname]
        label = self.process_label(seg_list)
        second_per_frame = self.frame_shift * 1.0 / self.rate
        label[:, :2] = (label[:, :2] / second_per_frame).astype(int)
        label[:, 2] = label[:, 2] + 1
        return uttname, feat, label