Ejemplo n.º 1
0
def run_audio(data, audio_base, audio_path, audio_conf):
    if os.path.exists(audio_path):
        print('%s already exists. skip' % audio_path)
        return

    print('Dumping audio to HDF5 : %s' % audio_path)
    print('  audio_conf : %s' % audio_conf)

    audio_conf['num_mel_bins'] = audio_conf.get('num_mel_bins', 40)
    audio_conf['target_length'] = audio_conf.get('target_length', 2048)
    audio_conf['use_raw_length'] = audio_conf.get('use_raw_length', False)
    assert(not audio_conf['use_raw_length'])
   
    # dump audio
    n = len(data)
    Path(os.path.dirname(audio_path)).mkdir(parents=True, exist_ok=True)
    f = h5py.File(audio_path, 'w')
    dset_mel_shape = (n, audio_conf['num_mel_bins'],
                      audio_conf['target_length'])
    dset_mel = f.create_dataset('melspec', dset_mel_shape, dtype='f')
    dset_len = f.create_dataset('melspec_len', (n,), dtype='i8')

    start = time.time()
    for i, d in enumerate(data):
        y, sr = librosa.load('%s/%s' % (audio_base, d['wav']), None)
        logspec, n_frames = compute_spectrogram(y, sr, audio_conf)
        dset_mel[i, :, :] = logspec
        dset_len[i] = n_frames

        if i % 100 == 0:
            t = time.time() - start
            print('processed %d / %d audios (%.fs)' % (i, n, t))
Ejemplo n.º 2
0
def load_raw_spectrogram(path):
    audio_conf = {
        'audio_type': 'spectrogram',
        'sample_rate': 16000,
        'window_size': 0.005,
        'window_stride': 0.002,
        'n_fft': 1024,
        'use_raw_length': True
    }
    y, sr = librosa.load(path, audio_conf['sample_rate'])
    logspec, _ = compute_spectrogram(y, sr, audio_conf)
    logspec = logspec.numpy()
    return logspec, audio_conf['window_stride'], audio_conf['sample_rate']
Ejemplo n.º 3
0
 def load_mel_spectrogram_and_path(path):
     y, sr = librosa.load(path, 16000)
     logmelspec, n_frames = compute_spectrogram(y, sr, audio_conf)
     return logmelspec, n_frames, path
Ejemplo n.º 4
0
 def _LoadAudio(self, path):
     y, sr = librosa.load(path, None)
     logspec, n_frames = compute_spectrogram(y, sr, self.audio_conf)
     return logspec, n_frames