Ejemplo n.º 1
0
def test_scan_bad():
    with pytest.raises(ValueError) as err:
        Audio.scan(__file__)
    assert 'is it a wav?' in str(err)

    with pytest.raises(ValueError) as err:
        Audio.scan('/path/to/some/lost/place')
    assert 'file not found' in str(err)
Ejemplo n.º 2
0
def test_save(tmpdir, audio):
    p = str(tmpdir.join('test.wav'))
    audio.save(p)

    # cannot overwrite an existing file
    with pytest.raises(ValueError) as err:
        audio.save(p)
    assert 'file already exist' in str(err)

    audio2 = Audio.load(p)
    assert audio == audio2

    # test with float32 wav
    signal = np.zeros((1000,), dtype=np.float32)
    signal[10] = 1.0
    signal[20] = -1.0
    p = str(tmpdir.join('test2.wav'))
    audio = Audio(signal, 1000)
    audio.save(p)
    meta = Audio.scan(p)
    assert meta.nsamples == 1000
    assert meta.nchannels == 1

    audio2 = Audio.load(p)
    assert audio2 == audio
    assert audio2.data.min() == -1.0
    assert audio2.data.max() == 1.0
Ejemplo n.º 3
0
    def __init__(self,
                 config,
                 utterances,
                 log=get_logger('manager', 'warning')):
        self._config = config
        self._utterances = utterances
        self._warps = {}
        self.log = log

        self._check_utterances()

        # store the metadata because we need to access the sample rate
        # for processors instanciation
        audio_files = set(utt.audio_file for utt in utterances)
        self._audio_metadata = {}
        for audio in audio_files:
            log.debug('scanning %s', audio)
            self._audio_metadata[audio] = Audio.scan(audio)

        # make sure all the audio files are compatible with the pipeline
        log.info('scanning %s utterances...', len(self._utterances))
        self._check_audio_files()

        # the features type to be extracted
        self.features = [
            k for k in self.config.keys() if k in self.valid_features
        ][0]

        # get some framing parameters constant for all processors
        # (retrieve them from a features processor instance)
        proc = self.get_features_processor(next(iter(self.utterances)))
        self.frame_length = proc.frame_length
        self.frame_shift = proc.frame_shift

        # if CMVN by speaker, instanciate a CMVN processor by speaker
        # here, else instanciate a processor per utterance
        if 'cmvn' in self.config:
            if self.config['cmvn']['by_speaker']:
                self._cmvn_processors = {
                    spk: self.get_processor_class('cmvn')(proc.ndims)
                    for spk in set(utt.speaker for utt in self.utterances)
                }
            else:
                self._cmvn_processors = {
                    utt.name: self.get_processor_class('cmvn')(proc.ndims)
                    for utt in self.utterances
                }
Ejemplo n.º 4
0
    def __init__(self, config, utterances, log=get_logger()):
        self._config = config
        self._utterances = utterances
        self.log = log

        # the list of speakers
        self._speakers = set(u.speaker for u in self.utterances.values())
        if self._speakers == {None}:
            self._speakers = None
        self._check_speakers()

        # store the metadata because we need to access the sample rate
        # for processors instanciation
        wavs = set(u.file for u in utterances.values())
        self._wavs_metadata = {w: Audio.scan(w) for w in wavs}

        # make sure all the wavs are compatible with the pipeline
        log.info(f'scanning {len(self._utterances)} utterances...')
        self._check_wavs()

        # the features type to be extracted
        self.features = [
            k for k in self.config.keys() if k in self._valid_features][0]

        # get some framing parameters constant for all processors
        # (retrieve them from a features processor instance)
        p = self.get_features_processor(next(iter(self.utterances.keys())))
        self.frame_length = p.frame_length
        self.frame_shift = p.frame_shift

        # if CMVN by speaker, instanciate a CMVN processor by speaker
        # here, else instanciate a processor per utterance
        if 'cmvn' in self.config:
            if self.config['cmvn']['by_speaker']:
                self._cmvn_processors = {
                    spk: self.get_processor_class('cmvn')(p.ndims)
                    for spk in self.speakers}
            else:
                self._cmvn_processors = {
                    utt: self.get_processor_class('cmvn')(p.ndims)
                    for utt in self.utterances}
Ejemplo n.º 5
0
def test_scan(wav_file, audio):
    meta = Audio.scan(wav_file)
    assert meta.sample_rate == audio.sample_rate == 16000
    assert meta.nchannels == audio.nchannels == 1
    assert meta.nsamples == audio.nsamples == 22713
    assert meta.duration == audio.duration == pytest.approx(1.419, rel=1e-3)