Ejemplo n.º 1
0
    def test_fbank(self):
        """Test Fbank using 16kHz && 8kHz wav."""
        wav_path_16k = str(
            Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln.wav"))
        wav_path_8k = str(
            Path(os.environ["MAIN_ROOT"]).joinpath("examples/english.wav"))

        with self.session():
            # value test
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path_16k)
            fbank = Fbank.params({"delta_delta": False}).instantiate()
            fbank_test = fbank(input_data, sample_rate)
            real_fank_feats = np.array([
                [3.768338, 4.946218, 6.289874, 6.330853, 6.761764, 6.884573],
                [3.803553, 5.450971, 6.547878, 5.796172, 6.397846, 7.242926],
            ])
            # self.assertAllClose(np.squeeze(fbank_test.eval()[0:2, 0:6, 0]),
            #                     real_fank_feats, rtol=1e-05, atol=1e-05)
            if tf.executing_eagerly():
                print(fbank_test.numpy()[0:2, 0:6, 0])
            else:
                print(fbank_test.eval()[0:2, 0:6, 0])
            count = 1

            for wav_file in [wav_path_8k, wav_path_16k]:

                read_wav = ReadWav.params().instantiate()
                input_data, sample_rate = read_wav(wav_file)
                if tf.executing_eagerly():
                    print(wav_file, sample_rate.numpy())
                else:
                    print(wav_file, sample_rate.eval())

                conf = {
                    "delta_delta": True,
                    "lower_frequency_limit": 100,
                    "upper_frequency_limit": 0,
                }
                fbank = Fbank.params(conf).instantiate()
                fbank_test = fbank(input_data, sample_rate)
                if tf.executing_eagerly():
                    print(fbank_test.numpy())
                else:
                    print(fbank_test.eval())
                print(fbank.num_channels())

                conf = {
                    "delta_delta": False,
                    "lower_frequency_limit": 100,
                    "upper_frequency_limit": 0,
                }
                fbank = Fbank.params(conf).instantiate()
                fbank_test = fbank(input_data, sample_rate)
                print(fbank_test)
                print(fbank.num_channels())
                count += 1
                del read_wav
                del fbank
Ejemplo n.º 2
0
    def test_framepow(self):
        wav_path_16k = str(
            Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln.wav"))

        with self.session():
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path_16k)
            config = {"snip_edges": 1}
            framepow = Framepow.params(config).instantiate()
            framepow_test = framepow(input_data, sample_rate)

            real_framepow_feats = np.array(
                [9.819611, 9.328745, 9.247337, 9.26451, 9.266059])

            if tf.executing_eagerly():
                self.assertAllClose(
                    framepow_test.numpy()[0:5],
                    real_framepow_feats,
                    rtol=1e-05,
                    atol=1e-05,
                )
                print(framepow_test.numpy()[0:5])
            else:
                self.assertAllClose(
                    framepow_test.eval()[0:5],
                    real_framepow_feats,
                    rtol=1e-05,
                    atol=1e-05,
                )
                print(framepow_test.eval()[0:5])
Ejemplo n.º 3
0
    def test_mel_spectrum(self):
        # 16kHz test
        wav_path_16k = str(
            Path(os.environ['MAIN_ROOT']).joinpath('examples/sm1_cln.wav'))

        with self.session():
            # value test
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path_16k)
            config = {
                'type': 'MelSpectrum',
                'window_type': 'hann',
                'upper_frequency_limit': 7600,
                'filterbank_channel_count': 80,
                'lower_frequency_limit': 80,
                'dither': 0.0,
                'window_length': 0.025,
                'frame_length': 0.010,
                'remove_dc_offset': False,
                'preEph_coeff': 0.0,
                'output_type': 3
            }
            mel_spectrum = MelSpectrum.params(config).instantiate()
            mel_spectrum_test = mel_spectrum(input_data, sample_rate)
            if tf.executing_eagerly():
                print(mel_spectrum_test.numpy()[0:2, 0:10])
            else:
                print(mel_spectrum_test.eval()[0:2, 0:10])
Ejemplo n.º 4
0
    def test_write_wav(self):
        wav_path = str(Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln.wav"))

        with self.cached_session() as sess:
            config = {"speed": 1.1}
            read_wav = ReadWav.params(config).instantiate()
            input_data, sample_rate = read_wav(wav_path)
            write_wav = WriteWav.params().instantiate()
            new_path = str(
                Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln_resample.wav")
            )
            writewav_op = write_wav(new_path, input_data / 32768, sample_rate)
            sess.run(writewav_op)
Ejemplo n.º 5
0
    def test_spectrum(self):
        """Test Pitch using 16kHz && 8kHz wav."""
        wav_path_16k = str(
            Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln.wav"))
        wav_path_8k = str(
            Path(os.environ["MAIN_ROOT"]).joinpath("examples/english.wav"))

        with self.session():
            for wav_file in [wav_path_16k]:
                read_wav = ReadWav.params().instantiate()
                input_data, sample_rate = read_wav(wav_file)

                pitch = Pitch.params({
                    "window_length": 0.025,
                    "soft_min_f0": 10.0
                }).instantiate()
                pitch_test = pitch(input_data, sample_rate)

                if tf.executing_eagerly():
                    self.assertEqual(tf.rank(pitch_test).numpy(), 2)
                else:
                    self.assertEqual(tf.rank(pitch_test).eval(), 2)

                output_true = [
                    [-0.1366025, 143.8855],
                    [-0.0226383, 143.8855],
                    [-0.08464742, 143.8855],
                    [-0.08458386, 143.8855],
                    [-0.1208689, 143.8855],
                ]

                if wav_file == wav_path_16k:
                    if tf.executing_eagerly():
                        print("Transform: ", pitch_test.numpy()[0:5, :])
                        print("kaldi:", output_true)
                        self.assertAllClose(
                            pitch_test.numpy()[0:5, :],
                            output_true,
                            rtol=1e-05,
                            atol=1e-05,
                        )
                    else:
                        print("Transform: ", pitch_test.eval())
                        print("kaldi:", output_true)
                        self.assertAllClose(
                            pitch_test.eval()[0:5, :],
                            output_true,
                            rtol=1e-05,
                            atol=1e-05,
                        )
Ejemplo n.º 6
0
    def test_read_wav(self):
        wav_path = str(Path(os.environ['MAIN_ROOT']).joinpath('examples/sm1_cln.wav'))

        with self.session():
            speed = 0.9
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path, speed)

            audio_data_true, sample_rate_true = librosa.load(wav_path, sr=16000)
            if (speed == 1.0):
                if tf.executing_eagerly():
                    self.assertAllClose(input_data.numpy() / 32768, audio_data_true)
                    self.assertAllClose(sample_rate.numpy(), sample_rate_true)
                else:
                    self.assertAllClose(input_data.eval() / 32768, audio_data_true)
                    self.assertAllClose(sample_rate.eval(), sample_rate_true)
Ejemplo n.º 7
0
    def test_spectrum(self):
        wav_path_16k = str(
            Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln.wav"))
        wav_path_8k = str(
            Path(os.environ["MAIN_ROOT"]).joinpath("examples/english.wav"))

        with self.session():
            for wav_file in [wav_path_8k, wav_path_16k]:
                read_wav = ReadWav.params().instantiate()
                input_data, sample_rate = read_wav(wav_file)

                spectrum = Spectrum.params({
                    "window_length": 0.025,
                    "dither": 0.0
                }).instantiate()
                spectrum_test = spectrum(input_data, sample_rate)

                output_true = np.array([
                    [9.819611, 2.84503, 3.660894, 2.7779, 1.212233],
                    [9.328745, 2.553949, 3.276319, 3.000918, 2.499342],
                ])
                if tf.executing_eagerly():
                    self.assertEqual(tf.rank(spectrum_test).numpy(), 2)
                else:
                    self.assertEqual(tf.rank(spectrum_test).eval(), 2)

                if wav_file == wav_path_16k:
                    if tf.executing_eagerly():
                        self.assertAllClose(
                            spectrum_test.numpy()[0:2, 0:5],
                            output_true,
                            rtol=1e-05,
                            atol=1e-05,
                        )
                    else:
                        self.assertAllClose(
                            spectrum_test.eval()[0:2, 0:5],
                            output_true,
                            rtol=1e-05,
                            atol=1e-05,
                        )
Ejemplo n.º 8
0
    def test_FbankPitch(self):
        wav_path = str(
            Path(os.environ['MAIN_ROOT']).joinpath('examples/sm1_cln.wav'))

        with self.session():
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path)
            config = {
                'window_length': 0.025,
                'output_type': 1,
                'frame_length': 0.010,
                'dither': 0.0
            }
            fbank_pitch = FbankPitch.params(config).instantiate()
            fbank_pitch_test = fbank_pitch(input_data, sample_rate)

            if tf.executing_eagerly():
                self.assertEqual(tf.rank(fbank_pitch_test).numpy(), 3)
                print(fbank_pitch_test.numpy()[0:2, :, 0])
            else:
                self.assertEqual(tf.rank(fbank_pitch_test).eval(), 3)
                print(fbank_pitch_test.eval()[0:2, :, 0])
Ejemplo n.º 9
0
    def test_mfcc(self):
        wav_path_16k = str(
            Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln.wav"))

        with self.session():
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path_16k)
            config = {"use_energy": True}
            mfcc = Mfcc.params(config).instantiate()
            mfcc_test = mfcc(input_data, sample_rate)

            real_mfcc_feats = np.array([
                [
                    9.819611, -30.58736, -7.088838, -10.67966, -1.646479,
                    -4.36086
                ],
                [
                    9.328745, -30.73371, -6.128432, -7.930599, 3.208357,
                    -1.086456
                ],
            ])

            if tf.executing_eagerly():
                self.assertAllClose(
                    mfcc_test.numpy()[0, 0:2, 0:6],
                    real_mfcc_feats,
                    rtol=1e-05,
                    atol=1e-05,
                )
            else:
                self.assertAllClose(
                    mfcc_test.eval()[0, 0:2, 0:6],
                    real_mfcc_feats,
                    rtol=1e-05,
                    atol=1e-05,
                )