def collect_features(self,file_path):
        """PyWorld analysis"""
        sr = 16000

        save_path = os.path.join(self.preprocess_dir, self.speaker, os.path.basename(file_path))

        if os.path.exists(save_path):
            features = np.load(save_path)
        else:

            wav, _ = librosa.load(file_path, sr=sr, mono=True)
            wav_padded = wav_padding(wav, sr=sr, frame_period=5, multiple=4)
            f0, _, sp, ap = world_decompose(wav_padded,sr)

            mcep = world_encode_spectral_envelop(sp, sr, dim=24)

            # Extending to 2D to stack and log zeroes 1e-16. TODO: Better solution for this
            f0 = np.ma.log(f0[:,None])
            #f0[f0 == -np.inf] = 1e-16

            features = np.hstack((f0, mcep, ap))
            features.dump(save_path)

        return features
Example #2
0
def load_wav(wavfile, sr=16000):
    wav, _ = librosa.load(wavfile, sr=sr, mono=True)
    return wav_padding(wav, sr=sr, frame_period=5, multiple=4)
Example #3
0
        }
        print('-------------------------------')
        wavs = os.listdir(data_root)
        self.wavs = [os.path.join(data_root, wav_name) for wav_name in wavs]
        self.samples = []
        self.samples_paths = []
        for wav_path in self.wavs:
            _sample = wav_loader(wav_path, period=3, stride=1)
            self.samples.extend(_sample)
            self.samples_paths.extend([wav_path for _ in range(len(_sample))])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        ffimg = self.samples[idx]
        _path = self.samples_paths[idx]
        return ffimg, _path


if __name__ == '__main__':
    import cv2
    import utils
    wavDataset = WavDataset(is_train=True)
    for _ff, _label, _path in wavDataset:
        print(_ff.shape, _label, _path)
        pa_wav = utils.wav_padding([_ff, _ff])
        print(pa_wav.shape)
    #     cv2.imshow('', pa_wav[0])
    #     cv2.waitKey()