コード例 #1
0
ファイル: test_backend.py プロジェクト: KPatr1ck/models
def test_load():
    s, r = paddleaudio.load(TEST_FILE, sr=16000)
    assert r == 16000
    assert s.dtype == 'float32'

    s, r = paddleaudio.load(TEST_FILE,
                            sr=16000,
                            offset=1,
                            duration=2,
                            dtype='int16')
    assert len(s) / r == 2.0
    assert r == 16000
    assert s.dtype == 'int16'
コード例 #2
0
    def _load(self, file):

        if self.data_type == 'wav':
            s, _ = paddleaudio.load(file, sr=c['sample_rate'])
            s = np.pad(s, ((0, 1), (0, 0)), 'constant', constant_values=(0, ))
            power = (np.exp(s) - 1)**2
            power = np.abs(s)**2
            melW = librosa.filters.mel(sr=c['sample_rate'],
                                       n_fft=c['window_size'],
                                       n_mels=c['mel_bins'],
                                       fmin=c['fmin'],
                                       fmax=c['fmax'])
            mel = np.matmul(melW, power)
            x = librosa.power_to_db(mel, ref=1.0, amin=1e-10, top_db=None)
        else:
            s = np.load(file)
            print(s.shape)
            x = s

        if self.training:
            x = self._pad(x, c['mel_crop_len'])
            if self.augment:
                x = augmentation.random_crop2d(x,
                                               c['mel_crop_len'],
                                               tempo_axis=1)
                x = augmentation.spect_augment(x, tempo_axis=1)
        else:  #use all data for evaluation
            x = self._pad(x, c['max_mel_len'])
            x = x[:, :c['max_mel_len']]

        return x.T
コード例 #3
0
def load_audio(file):
    """Load audio from local path
    The function will resample the audio to 16K and re-normalize it to have zero-mean and unit-variance
    """
    s, _ = paddleaudio.load(file, sr=16000, normal=True, norm_type='gaussian')
    x = paddle.to_tensor(s)
    x = x.unsqueeze(0)
    return x
コード例 #4
0
ファイル: test.py プロジェクト: cuicheng01/models
def get_feature(file, model, melspectrogram, random_sampling=False):
    global file2feature
    if file in file2feature:
        return file2feature[file]
    s0, _ = paddleaudio.load(file, sr=16000)  #, norm_type='gaussian')
    s = paddle.to_tensor(s0[None, :])
    s = melspectrogram(s).astype('float32')
    with paddle.no_grad():
        feature = model(s)  #.squeeze()
    feature = feature / paddle.sqrt(paddle.sum(feature**2))

    file2feature.update({file: feature})
    return feature
コード例 #5
0
def load_and_extract_feature(file):
    s, r = pa.load(file, sr=c['sample_rate'])
    x = pa.features.mel_spect(s,
                              sample_rate=c['sample_rate'],
                              window_size=c['window_size'],
                              hop_length=c['hop_size'],
                              mel_bins=c['mel_bins'],
                              fmin=c['fmin'],
                              fmax=c['fmax'],
                              window='hann',
                              center=True,
                              pad_mode='reflect',
                              ref=1.0,
                              amin=1e-10,
                              top_db=None)

    x = x.T  #!!
    x = paddle.Tensor(x).unsqueeze((0, 1))
    return x
コード例 #6
0
ファイル: inference.py プロジェクト: cuicheng01/models
def load_and_extract_feature(file, c):
    s, _ = pa.load(file, sr=c['sample_rate'])
    x = melspectrogram(paddle.to_tensor(s),
                       sr=c['sample_rate'],
                       win_length=c['window_size'],
                       n_fft=c['window_size'],
                       hop_length=c['hop_size'],
                       n_mels=c['mel_bins'],
                       f_min=c['fmin'],
                       f_max=c['fmax'],
                       window='hann',
                       center=True,
                       pad_mode='reflect',
                       to_db=True,
                       amin=1e-3,
                       top_db=None)
    x = x.transpose((0, 2, 1))
    x = x.unsqueeze((0, ))
    return x
コード例 #7
0
def test_case(sr, n_fft, hop_length, win_length, window, center, pad_mode,
              power, n_mels, f_min, f_max, dtype, device):

    paddle.set_device(device)
    signal, sr = paddleaudio.load('./test/unit_test/test_audio.wav')
    signal_tensor = paddle.to_tensor(signal)
    paddle_cpu_feat = paddleaudio.functional.melspectrogram(
        signal_tensor,
        sr=16000,
        n_fft=n_fft,
        hop_length=hop_length,
        win_length=win_length,
        window=window,
        center=center,
        n_mels=n_mels,
        pad_mode=pad_mode,
        f_min=f_min,
        f_max=f_max,
        htk=True,
        norm='slaney',
        dtype=dtype)

    librosa_feat = librosa.feature.melspectrogram(signal,
                                                  sr=16000,
                                                  n_fft=n_fft,
                                                  hop_length=hop_length,
                                                  win_length=win_length,
                                                  window=window,
                                                  center=center,
                                                  n_mels=n_mels,
                                                  pad_mode=pad_mode,
                                                  power=2.0,
                                                  norm='slaney',
                                                  htk=True,
                                                  fmin=f_min,
                                                  fmax=f_max)
    err = np.mean(np.abs(librosa_feat - paddle_cpu_feat.numpy()))
    if dtype == 'float64':
        assert err < 1.0e-07
    else:
        assert err < 5.0e-07
コード例 #8
0
ファイル: dataset.py プロジェクト: cuicheng01/models
    def __getitem__(self, idx):
        idx = idx % len(self.keys)
        key = self.keys[idx]
        spk = key.split('-')[0]
        cls_idx = self.spk2cls[spk]
        file = self.key2file[key]
        file_duration = None
        if not self.augment and self.duration:
            file_duration = self.duration
        while True:
            try:
                wav, sr = paddleaudio.load(file,
                                           sr=self.sample_rate,
                                           duration=file_duration)
                break
            except:
                key = self.keys[idx]
                spk = key.split('-')[0]
                #spk = self.speakers[idx]
                cls_idx = self.spk2cls[spk]
                file = self.key2file[key]
                print(f'error loading file {file}')
        speed = random.choice([0, 1, 2])
        if speed == 1:
            wav = paddleaudio.resample(wav, 16000, 16000 * 0.9)
            cls_idx = cls_idx * 3 + 1
        elif speed == 2:
            wav = paddleaudio.resample(wav, 16000, 16000 * 1.1)
            cls_idx = cls_idx * 3 + 2
        else:
            cls_idx = cls_idx * 3

        if self.augment:
            wav = augments.random_crop_or_pad1d(wav, self.duration)
        elif self.duration:
            wav = augments.center_crop_or_pad1d(wav, self.duration)

        return wav, cls_idx
コード例 #9
0
ファイル: test_m4a.py プロジェクト: cuicheng01/models
import paddleaudio

if __name__ == '__main__':
    paddleaudio.load('./test_audio.m4a')
コード例 #10
0
                                           amin=1e-10,
                                           top_db=None)
            dst_h5.create_dataset(key, data=x)
        src_h5.close()
        dst_h5.close()

if len(wav_files) > 0:

    assert args.dst_h5_file != '', 'for using wav file or wav list, dst_h5_file must be specified'

    dst_file = args.dst_h5_file
    assert not os.path.exists(dst_file), f'target file {dst_file} existed'
    dst_h5 = h5py.File(dst_file, "w")
    print(f'{len(wav_files)} wav files listed')
    for f in tqdm.tqdm(wav_files):
        s, _ = pa.load(f, sr=args.sample_rate)
        x = pa.melspectrogram(s,
                              sr=args.sample_rate,
                              window_size=args.window_size,
                              hop_length=args.hop_length,
                              n_mels=args.mel_bins,
                              fmin=args.fmin,
                              fmax=args.fmax,
                              window='hann',
                              center=True,
                              pad_mode='reflect',
                              ref=1.0,
                              amin=1e-10,
                              top_db=None)
        #         figure(figsize=(8,8))
        #         imshow(x)