def test_scan_bad(): with pytest.raises(ValueError) as err: Audio.scan(__file__) assert 'is it a wav?' in str(err) with pytest.raises(ValueError) as err: Audio.scan('/path/to/some/lost/place') assert 'file not found' in str(err)
def test_save(tmpdir, audio): p = str(tmpdir.join('test.wav')) audio.save(p) # cannot overwrite an existing file with pytest.raises(ValueError) as err: audio.save(p) assert 'file already exist' in str(err) audio2 = Audio.load(p) assert audio == audio2 # test with float32 wav signal = np.zeros((1000,), dtype=np.float32) signal[10] = 1.0 signal[20] = -1.0 p = str(tmpdir.join('test2.wav')) audio = Audio(signal, 1000) audio.save(p) meta = Audio.scan(p) assert meta.nsamples == 1000 assert meta.nchannels == 1 audio2 = Audio.load(p) assert audio2 == audio assert audio2.data.min() == -1.0 assert audio2.data.max() == 1.0
def __init__(self, config, utterances, log=get_logger('manager', 'warning')): self._config = config self._utterances = utterances self._warps = {} self.log = log self._check_utterances() # store the metadata because we need to access the sample rate # for processors instanciation audio_files = set(utt.audio_file for utt in utterances) self._audio_metadata = {} for audio in audio_files: log.debug('scanning %s', audio) self._audio_metadata[audio] = Audio.scan(audio) # make sure all the audio files are compatible with the pipeline log.info('scanning %s utterances...', len(self._utterances)) self._check_audio_files() # the features type to be extracted self.features = [ k for k in self.config.keys() if k in self.valid_features ][0] # get some framing parameters constant for all processors # (retrieve them from a features processor instance) proc = self.get_features_processor(next(iter(self.utterances))) self.frame_length = proc.frame_length self.frame_shift = proc.frame_shift # if CMVN by speaker, instanciate a CMVN processor by speaker # here, else instanciate a processor per utterance if 'cmvn' in self.config: if self.config['cmvn']['by_speaker']: self._cmvn_processors = { spk: self.get_processor_class('cmvn')(proc.ndims) for spk in set(utt.speaker for utt in self.utterances) } else: self._cmvn_processors = { utt.name: self.get_processor_class('cmvn')(proc.ndims) for utt in self.utterances }
def __init__(self, config, utterances, log=get_logger()): self._config = config self._utterances = utterances self.log = log # the list of speakers self._speakers = set(u.speaker for u in self.utterances.values()) if self._speakers == {None}: self._speakers = None self._check_speakers() # store the metadata because we need to access the sample rate # for processors instanciation wavs = set(u.file for u in utterances.values()) self._wavs_metadata = {w: Audio.scan(w) for w in wavs} # make sure all the wavs are compatible with the pipeline log.info(f'scanning {len(self._utterances)} utterances...') self._check_wavs() # the features type to be extracted self.features = [ k for k in self.config.keys() if k in self._valid_features][0] # get some framing parameters constant for all processors # (retrieve them from a features processor instance) p = self.get_features_processor(next(iter(self.utterances.keys()))) self.frame_length = p.frame_length self.frame_shift = p.frame_shift # if CMVN by speaker, instanciate a CMVN processor by speaker # here, else instanciate a processor per utterance if 'cmvn' in self.config: if self.config['cmvn']['by_speaker']: self._cmvn_processors = { spk: self.get_processor_class('cmvn')(p.ndims) for spk in self.speakers} else: self._cmvn_processors = { utt: self.get_processor_class('cmvn')(p.ndims) for utt in self.utterances}
def test_scan(wav_file, audio): meta = Audio.scan(wav_file) assert meta.sample_rate == audio.sample_rate == 16000 assert meta.nchannels == audio.nchannels == 1 assert meta.nsamples == audio.nsamples == 22713 assert meta.duration == audio.duration == pytest.approx(1.419, rel=1e-3)