예제 #1
0
    def test_pitch(self):

        wav_path = str(
            Path(PACKAGE_ROOT_DIR).joinpath('layers/ops/data/sm1_cln.wav'))
        with self.cached_session(use_gpu=False, force_gpu=False):
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav.call(wav_path)
            input_data = input_data / 32768
            pitch = Pitch.params({
                'window_length': 0.025,
                'frame_length': 0.010,
                'thres_autoc': 0.3
            }).instantiate()
            pitch_test = pitch(input_data, sample_rate)

            output_true = np.array([
                0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
                0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
                0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
                0.000000, 0.000000, 122.823532, 117.647057, 116.788322,
                116.788322, 119.402985, 119.402985, 119.402985, 119.402985,
                119.402985, 123.076920, 124.031006, 125.000000, 132.065216,
                139.130432, 139.130432, 137.931030, 126.108368, 114.285713,
                115.107910, 122.070084, 129.032257, 130.081299, 130.081299,
                129.032257, 130.081299, 131.147537, 129.032257, 125.000000,
                120.300751, 115.107910
            ])

            self.assertAllClose(pitch_test.eval().flatten()[:50], output_true)
예제 #2
0
  def test_fbank(self):
    wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

    with self.cached_session(use_gpu=False, force_gpu=False):
      read_wav = ReadWav.params().instantiate()
      input_data, sample_rate = read_wav(wav_path)
      config = {
          'window_length': 0.025,
          'output_type': 1,
          'frame_length': 0.010,
          'snip_edges': True
      }
      fbank = Fbank.params(config).instantiate()
      fbank_test = fbank(input_data, sample_rate)

      self.assertEqual(tf.rank(fbank_test).eval(), 3)

      real_fank_feats = np.array(
          [[3.768338, 4.946218, 6.289874, 6.330853, 6.761764, 6.884573],
           [3.803553, 5.450971, 6.547878, 5.796172, 6.397846, 7.242926]])

      self.assertAllClose(
          np.squeeze(fbank_test.eval()[0:2, 0:6, 0]),
          real_fank_feats,
          rtol=1e-05,
          atol=1e-05)
예제 #3
0
    def test_framepow(self):
        wav_path = str(
            Path(os.environ['MAIN_ROOT']).joinpath(
                'delta/layers/ops/data/sm1_cln.wav'))

        with self.session():
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path)

            framepow = Framepow.params({
                'window_length': 0.025,
                'frame_length': 0.010
            }).instantiate()
            framepow_test = framepow(input_data, sample_rate)

            output_true = np.array([
                0.000018, 0.000011, 0.000010, 0.000010, 0.000010, 0.000010,
                0.000008, 0.000009, 0.000009, 0.000009, 0.000009, 0.000011,
                0.090164, 0.133028, 0.156547, 0.053551, 0.056670, 0.097706,
                0.405659, 2.119505, 4.296845, 6.139090, 6.623638, 6.136467,
                7.595072, 7.904415, 7.655983, 6.771016, 5.706427, 4.220942,
                3.259599, 2.218259, 1.911394, 2.234246, 3.056905, 2.534153,
                0.464354, 0.013493, 0.021231, 0.148362, 0.364829, 0.627266,
                0.494912, 0.366029, 0.315408, 0.312441, 0.323796, 0.267505,
                0.152856, 0.045305
            ])

            self.assertEqual(tf.rank(framepow_test).eval(), 1)
            self.assertAllClose(framepow_test.eval().flatten()[:50],
                                output_true)
예제 #4
0
    def test_plp(self):
        wav_path = str(
            Path(os.environ['MAIN_ROOT']).joinpath(
                'delta/layers/ops/data/sm1_cln.wav'))

        with self.cached_session(use_gpu=False, force_gpu=False):
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path)

            plp = Plp.params({
                'window_length': 0.025,
                'frame_length': 0.010,
                'plp_order': 12
            }).instantiate()
            plp_test = plp(input_data, sample_rate)

            output_true = np.array(
                [[-0.209490, -0.326126, 0.010536, -0.027167, -0.117118],
                 [-0.020293, -0.454695, -0.104243, 0.001560, -0.234854],
                 [-0.015118, -0.444044, -0.156695, -0.086221, -0.319310],
                 [-0.031856, -0.130708, 0.047435, -0.089916, -0.160247],
                 [0.052763, -0.271487, 0.011329, 0.025320, 0.012851]])

            self.assertEqual(tf.rank(plp_test).eval(), 2)
            self.assertAllClose(plp_test.eval()[50:55, 5:10], output_true)
예제 #5
0
  def test_plp(self):
    wav_path = str(
        Path(PACKAGE_ROOT_DIR).joinpath('layers/ops/data/sm1_cln.wav'))

    with self.cached_session(use_gpu=False, force_gpu=False):
      read_wav = ReadWav.params().instantiate()
      input_data, sample_rate = read_wav(wav_path)
      input_data = input_data / 32768

      plp = Plp.params({
          'window_length': 0.025,
          'frame_length': 0.010,
          'plp_order': 12
      }).instantiate()
      plp_test = plp(input_data, sample_rate)

      output_true = np.array(
          [[-0.209490, -0.326126, 0.010536, -0.027167, -0.117118],
           [-0.020293, -0.454695, -0.104243, 0.001560, -0.234854],
           [-0.015118, -0.444044, -0.156695, -0.086221, -0.319310],
           [-0.031856, -0.130708, 0.047435, -0.089916, -0.160247],
           [0.052763, -0.271487, 0.011329, 0.025320, 0.012851]])

      self.assertEqual(tf.rank(plp_test).eval(), 2)
      # Because the povey window is used instead of the hamming window in spectrum.
      self.assertAllClose(
          plp_test.eval()[50:55, 5:10], output_true, rtol=1e-02, atol=1e-02)
  def test_add_rir_noise_aecres(self):
    wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

    # reset path of noise && rir
    data_path = str(Path(PACKAGE_OPS_DIR).joinpath('data')) + '/'
    noise_file = data_path + 'noiselist_new.scp'
    change_file_path(data_path, 'noiselist.scp', 'noiselist_new.scp')
    rir_file = data_path + 'rirlist_new.scp'
    change_file_path(data_path, 'rirlist.scp', 'rirlist_new.scp')

    with self.cached_session(use_gpu=False, force_gpu=False) as sess:
      read_wav = ReadWav.params().instantiate()
      input_data, sample_rate = read_wav(wav_path)
      config = {
          'if_add_noise': True,
          'noise_filelist': noise_file,
          'if_add_rir': True,
          'rir_filelist': rir_file
      }
      add_rir_noise_aecres = Add_rir_noise_aecres.params(config).instantiate()
      add_rir_noise_aecres_test = add_rir_noise_aecres(input_data, sample_rate)
      print('Clean Data:', input_data.eval())
      print('Noisy Data:', add_rir_noise_aecres_test.eval())

      new_noise_file = data_path + 'sm1_cln_noisy.wav'
      write_wav = WriteWav.params().instantiate()
      writewav_op = write_wav(new_noise_file, add_rir_noise_aecres_test / 32768,
                              sample_rate)
      sess.run(writewav_op)
예제 #7
0
  def test_spectrum(self):
    wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

    with self.cached_session(use_gpu=False, force_gpu=False):
      read_wav = ReadWav.params().instantiate()
      input_data, sample_rate = read_wav(wav_path)

      pitch = Pitch.params({
          'window_length': 0.025,
          'soft_min_f0': 10.0
      }).instantiate()
      pitch_test = pitch(input_data, sample_rate)

      self.assertEqual(tf.rank(pitch_test).eval(), 2)

      output_true = np.array(
        [
          [0.03881124, 0.3000031, - 0.02324523],
          [0.006756478, 0.3000097, 0.01047742],
          [0.02455365, 0.3000154, 0.00695902],
          [0.02453586, 0.3000221, 0.008448198],
          [0.03455311, 0.3000307, - 0.07547269],
          [0.04293294, 0.3000422, - 0.04193667]
        ]
      )
예제 #8
0
    def test_mel_spectrum(self):
        # 16kHz test
        wav_path_16k = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

        with self.cached_session(use_gpu=False, force_gpu=False):
            # value test
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path_16k)
            print(sample_rate.eval())
            config = {
                'type': 'MelSpectrum',
                'window_type': 'hann',
                'upper_frequency_limit': 7600,
                'filterbank_channel_count': 80,
                'lower_frequency_limit': 80,
                'dither': 0.0,
                'window_length': 0.025,
                'frame_length': 0.010,
                'remove_dc_offset': False,
                'preEph_coeff': 0.0,
                'output_type': 3,
                'sample_rate': 16000
            }
            mel_spectrum = MelSpectrum.params(config).instantiate()
            mel_spectrum_test = mel_spectrum(input_data, sample_rate)
            print(mel_spectrum_test.eval()[0:2, 0:10])
예제 #9
0
    def test_zcr(self):

        wav_path = str(
            Path(os.environ['MAIN_ROOT']).joinpath(
                'delta/layers/ops/data/sm1_cln.wav'))

        with self.cached_session(use_gpu=False, force_gpu=False):
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav.call(wav_path)

            zcr = Zcr.params({
                'window_length': 0.025,
                'frame_length': 0.010
            }).instantiate()
            zcr_test = zcr(input_data, sample_rate)

            output_true = np.array([
                0.406250, 0.418750, 0.425000, 0.407500, 0.393750, 0.392500,
                0.388750, 0.417500, 0.427500, 0.456250, 0.447500, 0.386250,
                0.357500, 0.282500, 0.232500, 0.262500, 0.282500, 0.295000,
                0.220000, 0.157500, 0.125000, 0.107500, 0.100000, 0.092500,
                0.092500, 0.095000, 0.097500, 0.105000, 0.100000, 0.112500,
                0.120000, 0.132500, 0.130000, 0.135000, 0.112500, 0.120000,
                0.090000, 0.080000, 0.070000, 0.080000, 0.087500, 0.092500,
                0.097500, 0.097500, 0.112500, 0.090000, 0.065000, 0.087500,
                0.175000, 0.240000
            ])

            self.assertAllClose(zcr_test.eval().flatten()[:50], output_true)
예제 #10
0
    def test_read_wav(self):
        wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

        with self.cached_session(use_gpu=False, force_gpu=False):
            read_wav = ReadWav.params({'sample_rate': 16000}).instantiate()
            audio_data, sample_rate = read_wav(wav_path)
            audio_data_true, sample_rate_true = librosa.load(wav_path,
                                                             sr=16000)
            self.assertAllClose(audio_data.eval() / 32768, audio_data_true)
            self.assertAllClose(sample_rate.eval(), sample_rate_true)
예제 #11
0
  def test_FbankPitch(self):
    wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

    with self.cached_session(use_gpu=False, force_gpu=False):
      read_wav = ReadWav.params().instantiate()
      input_data, sample_rate = read_wav(wav_path)
      config = {'window_length': 0.025, 'output_type': 1, 'frame_length': 0.010}
      fbank_pitch = FbankPitch.params(config).instantiate()
      fbank_pitch_test = fbank_pitch(input_data, sample_rate)

      self.assertEqual(tf.rank(fbank_pitch_test).eval(), 2)
      print(fbank_pitch_test.eval()[0:2])
예제 #12
0
    def test_read_wav(self):
        wav_path = str(
            Path(os.environ['MAIN_ROOT']).joinpath(
                'delta/layers/ops/data/sm1_cln.wav'))

        with self.cached_session(use_gpu=False, force_gpu=False):
            read_wav = ReadWav.params({'sample_rate': 16000.0}).instantiate()
            audio_data, sample_rate = read_wav(wav_path)
            audio_data_true, sample_rate_true = librosa.load(wav_path,
                                                             sr=16000)
            self.assertAllClose(audio_data.eval(), audio_data_true)
            self.assertAllClose(sample_rate.eval(), sample_rate_true)
예제 #13
0
  def test_fbank(self):
    wav_path = str(
        Path(PACKAGE_ROOT_DIR).joinpath(
            'layers/ops/data/sm1_cln.wav'))

    with self.cached_session(use_gpu=False, force_gpu=False):
      read_wav = ReadWav.params().instantiate()
      input_data, sample_rate = read_wav(wav_path)
      config = {'window_length': 0.025, 'output_type': 1, 'frame_length': 0.010}
      fbank = Fbank.params(config).instantiate()
      fbank_test = fbank(input_data, sample_rate)

      self.assertEqual(tf.rank(fbank_test).eval(), 3)
예제 #14
0
  def test_write_wav(self):
    wav_path = str(
      Path(os.environ['MAIN_ROOT']).joinpath('delta/layers/ops/data/sm1_cln.wav'))

    with self.session() as sess:
      read_wav = ReadWav.params().instantiate()
      input_data, sample_rate = read_wav(wav_path)
      write_wav = WriteWav.params().instantiate()
      new_path = str(Path(os.environ['MAIN_ROOT']).joinpath('delta/layers/ops/data/sm1_cln_new.wav'))
      writewav_op = write_wav(new_path, input_data, sample_rate)
      sess.run(writewav_op)
      test_data, test_sample_rate = read_wav(new_path)
      self.assertAllEqual(input_data.eval(), test_data.eval())
      self.assertAllEqual(sample_rate.eval(),test_sample_rate.eval())
예제 #15
0
    def test_write_wav(self):
        wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

        with self.cached_session(use_gpu=False, force_gpu=False) as sess:
            read_wav = ReadWav.params({'speed': 1.0}).instantiate()
            input_data, sample_rate = read_wav(wav_path)
            input_data = input_data
            write_wav = WriteWav.params().instantiate()
            new_path = str(
                Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln_speed.wav'))
            writewav_op = write_wav(new_path, input_data, sample_rate)
            sess.run(writewav_op)
            test_data, test_sample_rate = read_wav(new_path)
            self.assertAllEqual(input_data.eval(), test_data.eval())
            self.assertAllEqual(sample_rate.eval(), test_sample_rate.eval())
예제 #16
0
  def test_synthfiltbank(self):
    wav_path = str(
      Path(os.environ['MAIN_ROOT']).joinpath('delta/layers/ops/data/sm1_cln.wav'))

    with self.session():

      read_wav = ReadWav.params().instantiate()
      input_data, sample_rate = read_wav(wav_path)

      analyfiltbank = Analyfiltbank.params().instantiate()
      power_spc, phase_spc = analyfiltbank(input_data.eval(), sample_rate.eval())

      synthfiltbank = Synthfiltbank.params().instantiate()
      audio_data =synthfiltbank(power_spc, phase_spc, sample_rate.eval())

      self.assertAllClose(audio_data.eval().flatten()[500:550], input_data.eval().flatten()[500:550], rtol=1e-4, atol=1e-4)
예제 #17
0
  def test_cepstrum(self):

    wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

    with self.cached_session(use_gpu=False, force_gpu=False):
      read_wav = ReadWav.params().instantiate()
      input_data, sample_rate = read_wav.call(wav_path)
      input_data = input_data / 32768
      cepstrum = Cepstrum.params({'window_length': 0.025}).instantiate()
      cepstrum_test = cepstrum(input_data, sample_rate)

      output_true = np.array(
          [[0.525808, 0.579537, 0.159656, 0.014726, -0.1866810],
           [0.225988, 1.557304, 3.381828, 0.132935, 0.7128600],
           [-1.832759, -1.045178, 0.753158, 0.116107, -0.9307780],
           [-0.696277, 1.333355, 1.590942, 2.041829, -0.0805630],
           [-0.377375, 2.984320, 0.036302, 3.676640, 1.1709290]])
예제 #18
0
  def test_FbankPitch(self):
    wav_path = str(
      Path(os.environ['MAIN_ROOT']).joinpath('delta/layers/ops/data/sm1_cln.wav'))

    with self.session():
      read_wav = ReadWav.params().instantiate()
      input_data, sample_rate = read_wav(wav_path)
      config = {
        'window_length': 0.025,
        'output_type': 1,
        'frame_length': 0.010,
        'thres_autoc': 0.4
      }
      fbank_pitch = FbankPitch.params(config).instantiate()
      fbank_pitch_test = fbank_pitch(input_data)

      self.assertEqual(tf.rank(fbank_pitch_test).eval(), 2)
예제 #19
0
    def test_write_wav(self):
        wav_path = str(
            Path(PACKAGE_ROOT_DIR).joinpath('layers/ops/data/sm1_cln.wav'))

        with self.cached_session(use_gpu=False, force_gpu=False) as sess:
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path)
            input_data = input_data / 32768
            write_wav = WriteWav.params().instantiate()
            new_path = str(
                Path(PACKAGE_ROOT_DIR).joinpath(
                    'layers/ops/data/sm1_cln_new.wav'))
            writewav_op = write_wav(new_path, input_data, sample_rate)
            sess.run(writewav_op)
            test_data, test_sample_rate = read_wav(new_path)
            test_data = test_data / 32768
            self.assertAllEqual(input_data.eval(), test_data.eval())
            self.assertAllEqual(sample_rate.eval(), test_sample_rate.eval())
예제 #20
0
    def test_framepow(self):
        wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

        with self.cached_session(use_gpu=False, force_gpu=False):
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path)

            framepow = Framepow.params({
                'window_length': 0.025,
                'frame_length': 0.010
            }).instantiate()
            framepow_test = framepow(input_data, sample_rate)

            real_framepow_feats = np.array(
                [9.819611, 9.328745, 9.247337, 9.26451, 9.266059])

            self.assertEqual(tf.rank(framepow_test).eval(), 1)
            self.assertAllClose(framepow_test.eval()[0:5], real_framepow_feats)
예제 #21
0
    def test_spectrum(self):
        wav_path = str(
            Path(PACKAGE_ROOT_DIR).joinpath('layers/ops/data/sm1_cln.wav'))

        with self.cached_session(use_gpu=False, force_gpu=False):
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path)

            spectrum = Spectrum.params({'window_length': 0.025}).instantiate()
            spectrum_test = spectrum(input_data, sample_rate)

            output_true = np.array(
                [[-16.863441, -16.910473, -17.077059, -16.371634, -16.845686],
                 [-17.922068, -20.396345, -19.396944, -17.331493, -16.118851],
                 [-17.017776, -17.551350, -20.332376, -17.403994, -16.617926],
                 [-19.873854, -17.644503, -20.679525, -17.093716, -16.535091],
                 [-17.074402, -17.295971, -16.896650, -15.995432, -16.560730]])

            self.assertEqual(tf.rank(spectrum_test).eval(), 2)
            self.assertAllClose(spectrum_test.eval()[4:9, 4:9], output_true)
예제 #22
0
    def test_cepstrum(self):

        wav_path = str(
            Path(os.environ['MAIN_ROOT']).joinpath(
                'delta/layers/ops/data/sm1_cln.wav'))

        with self.cached_session(use_gpu=False, force_gpu=False):
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav.call(wav_path)
            cepstrum = Cepstrum.params({'window_length': 0.025}).instantiate()
            cepstrum_test = cepstrum(input_data, sample_rate)

            output_true = np.array(
                [[0.525808, 0.579537, 0.159656, 0.014726, -0.1866810],
                 [0.225988, 1.557304, 3.381828, 0.132935, 0.7128600],
                 [-1.832759, -1.045178, 0.753158, 0.116107, -0.9307780],
                 [-0.696277, 1.333355, 1.590942, 2.041829, -0.0805630],
                 [-0.377375, 2.984320, 0.036302, 3.676640, 1.1709290]])

            self.assertAllClose(cepstrum_test.eval()[15:20, 7:12], output_true)
예제 #23
0
    def test_mfcc(self):
        wav_path = str(
            Path(PACKAGE_ROOT_DIR).joinpath('layers/ops/data/sm1_cln.wav'))

        with self.session():
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path)
            # config = {}
            mfcc = Mfcc.params().instantiate()
            mfcc_test = mfcc(input_data, sample_rate)

            self.assertEqual(tf.rank(mfcc_test).eval(), 3)

            real_mfcc_feats = np.array(
                [[-30.58736, -7.088838, -10.67966, -1.646479, -4.36086],
                 [-30.73371, -6.128432, -7.930599, 3.208357, -1.086456]])

            self.assertAllClose(np.squeeze(mfcc_test.eval()[0, 0:2, 1:6]),
                                real_mfcc_feats,
                                rtol=1e-05,
                                atol=1e-05)
예제 #24
0
  def test_spectrum(self):
    wav_path = str(
        Path(PACKAGE_ROOT_DIR).joinpath('layers/ops/data/sm1_cln.wav'))

    with self.cached_session(use_gpu=False, force_gpu=False):
      read_wav = ReadWav.params().instantiate()
      input_data, sample_rate = read_wav(wav_path)

      spectrum = Spectrum.params({
          'window_length': 0.025,
          'snip_edges': 1
      }).instantiate()
      spectrum_test = spectrum(input_data, sample_rate)

      output_true = np.array(
          [[9.819611, 2.84503, 3.660894, 2.7779, 1.212233],
           [9.328745, 2.553949, 3.276319, 3.000918, 2.499342]])

      self.assertEqual(tf.rank(spectrum_test).eval(), 2)
      self.assertAllClose(
          spectrum_test.eval()[0:2, 0:5], output_true, rtol=1e-05, atol=1e-05)
예제 #25
0
    def test_fbank(self):
        wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

        with self.cached_session(use_gpu=False, force_gpu=False):
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path)
            config = {
                'window_length': 0.025,
                'output_type': 1,
                'frame_length': 0.010,
                'snip_edges': True
            }
            fbank = Fbank.params(config).instantiate()
            fbank_test = fbank(input_data, sample_rate)

            self.assertEqual(tf.rank(fbank_test).eval(), 3)

            if tf.executing_eagerly():
                print(fbank_test.numpy()[0:2, 0:6, 0])
            else:
                print(fbank_test.eval()[0:2, 0:6, 0])
예제 #26
0
    def test_synthfiltbank(self):
        wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

        with self.cached_session(use_gpu=False, force_gpu=False):

            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path)
            input_data = input_data / 32768

            analyfiltbank = Analyfiltbank.params().instantiate()
            power_spc, phase_spc = analyfiltbank(input_data.eval(),
                                                 sample_rate.eval())

            synthfiltbank = Synthfiltbank.params().instantiate()
            audio_data = synthfiltbank(power_spc, phase_spc,
                                       sample_rate.eval())

            self.assertAllClose(audio_data.eval().flatten()[500:550],
                                input_data.eval().flatten()[500:550],
                                rtol=1e-4,
                                atol=1e-4)
예제 #27
0
  def test_spectrum(self):
    wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

    with self.cached_session(use_gpu=False, force_gpu=False):
      read_wav = ReadWav.params().instantiate()
      input_data, sample_rate = read_wav(wav_path)

      pitch = Pitch.params({
          'window_length': 0.025,
          'soft_min_f0': 10.0
      }).instantiate()
      pitch_test = pitch(input_data, sample_rate)

      self.assertEqual(tf.rank(pitch_test).eval(), 2)

      output_true = [[-0.1366025, 143.8855], [-0.0226383, 143.8855],
                     [-0.08464742, 143.8855], [-0.08458386, 143.8855],
                     [-0.1208689, 143.8855]]

      self.assertAllClose(
          pitch_test.eval()[0:5, :], output_true, rtol=1e-05, atol=1e-05)
예제 #28
0
    def test_mfcc(self):
        wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

        with self.cached_session(use_gpu=False, force_gpu=False):
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav(wav_path)
            config = {'use_energy': True}
            mfcc = Mfcc.params(config).instantiate()
            mfcc_test = mfcc(input_data, sample_rate)

            self.assertEqual(tf.rank(mfcc_test).eval(), 3)

            real_mfcc_feats = np.array([[
                9.819611, -30.58736, -7.088838, -10.67966, -1.646479, -4.36086
            ], [
                9.328745, -30.73371, -6.128432, -7.930599, 3.208357, -1.086456
            ]])

            self.assertAllClose(np.squeeze(mfcc_test.eval()[0, 0:2, 0:6]),
                                real_mfcc_feats,
                                rtol=1e-05,
                                atol=1e-05)
예제 #29
0
 def __init__(self, config: dict):
     super().__init__(config)
     self.add_noise = Add_rir_noise_aecres(config)
     self.read_wav = ReadWav(config)
     self.write_wav = WriteWav(config)
예제 #30
0
  def test_analyfiltbank(self):
    wav_path = str(
        Path(os.environ['MAIN_ROOT']).joinpath(
            'delta/layers/ops/data/sm1_cln.wav'))

    with self.cached_session(use_gpu=False, force_gpu=False):

      read_wav = ReadWav.params().instantiate()
      audio_data, sample_rate = read_wav(wav_path)

      analyfiltbank = Analyfiltbank.params().instantiate()
      power_spc, phase_spc = analyfiltbank(audio_data.eval(),
                                           sample_rate.eval())

      power_spc_true = np.array(
          [[
              4.2182300e-04, 3.6964193e-04, 3.9906241e-05, 2.8196722e-05,
              3.3976138e-04, 3.7671626e-04, 2.2727624e-04, 7.2495081e-05,
              4.3451786e-05, 3.4654513e-06
          ],
           [
               1.4681223e-05, 2.8831255e-05, 3.5616580e-05, 3.9359711e-05,
               1.2714787e-04, 1.2794189e-04, 3.6509471e-05, 1.7578101e-05,
               5.9672035e-05, 2.9785692e-06
           ],
           [
               8.8715387e-05, 6.0998322e-05, 2.7695101e-05, 1.6866413e-04,
               4.6845453e-05, 3.3532990e-05, 5.7005627e-06, 5.1852752e-05,
               1.8390550e-05, 8.3459439e-05
           ],
           [
               1.1405386e-05, 1.8942148e-06, 1.6338145e-06, 1.8362705e-05,
               8.4106450e-06, 4.4174294e-06, 3.6533682e-05, 5.0541588e-05,
               1.6701326e-06, 1.8736981e-05
           ],
           [
               2.9108920e-05, 1.6862698e-05, 3.3437627e-05, 6.9332527e-05,
               5.0028186e-05, 5.9426224e-05, 2.1895030e-06, 2.3780794e-06,
               4.7786685e-05, 7.3811811e-05
           ],
           [
               1.6433882e-05, 9.5777386e-07, 2.0980822e-06, 4.8990279e-07,
               1.4232077e-05, 1.5986938e-05, 2.9042780e-05, 1.1719906e-05,
               2.4548817e-06, 5.3594176e-06
           ],
           [
               9.1289467e-06, 9.4249899e-06, 7.4781286e-07, 1.8923520e-05,
               6.5740237e-06, 4.3209452e-06, 3.9396346e-06, 1.2287317e-05,
               4.6807354e-06, 5.8512210e-06
           ],
           [
               1.6150383e-05, 2.6649790e-05, 1.8610657e-05, 2.2872716e-06,
               1.4209920e-05, 2.3279742e-06, 6.6038615e-06, 2.6169775e-05,
               2.8335158e-05, 1.7595910e-06
           ],
           [
               6.8095047e-05, 9.1859045e-05, 2.6713702e-05, 3.0580850e-05,
               1.4539381e-05, 4.2510033e-05, 2.2579852e-05, 1.4843822e-05,
               2.0883192e-05, 6.0624756e-05
           ],
           [
               1.6092306e-05, 1.4245335e-05, 2.4250150e-05, 6.0177539e-05,
               6.7926321e-06, 3.4922948e-07, 2.1843030e-06, 8.5554876e-07,
               2.6831965e-06, 2.0012436e-05
           ]])

      phase_spc_true = np.array(
          [[
              3.1415927, 3.1415927, 3.1415927, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
              3.1415927
          ],
           [
               0.01752237, 1.6688037, 1.4971976, 1.4470094, 2.0516894,
               -2.3112175, -0.7115377, 2.9614341, -1.2494497, -0.7055688
           ],
           [
               2.614648, 0.63351387, -2.0660093, 1.7626916, -1.1257634,
               3.017448, -2.892095, -1.2209401, 1.7407895, -1.0281658
           ],
           [
               1.02424, -1.8967879, -0.6139833, 2.587602, 3.0070715, 1.5781559,
               -1.899145, -1.1459525, -0.24284656, -0.8106653
           ],
           [
               -0.08220324, 0.5497215, 1.7031444, -2.8960562, -1.3680246,
               0.4349923, 2.0676146, 1.2389332, 2.6312854, -1.7511902
           ],
           [
               0.17763095, 2.7475302, -0.20671827, 1.0719725, -2.388657,
               1.189566, -1.0643665, 2.5955305, -0.69036585, -0.5287417
           ],
           [
               -0.9477449, -2.7059674, 0.53469753, 1.9289348, 0.24833842,
               0.03517391, -1.4778724, -0.16577117, -1.7509687, -0.46875867
           ],
           [
               1.5570146, -2.9596932, -0.7975963, 3.0060582, -1.038453,
               0.14911443, -1.5873562, 0.7229206, 2.679422, -1.1890441
           ],
           [
               -2.2543156, 0.47845784, -2.8412538, -0.5494534, 1.6583048,
               -1.4567885, 1.0724461, -2.70243, -0.2690962, 1.8831034
           ],
           [
               -0.32710192, 0.01503609, 0.29720783, -0.7409194, -2.183623,
               2.3637679, 0.6405145, 1.4975713, 0.18241015, 2.2659144
           ]])

      self.assertEqual(tf.rank(power_spc).eval(), 2)
      self.assertEqual(tf.rank(phase_spc).eval(), 2)
      self.assertAllClose(power_spc.eval().transpose()[:10, :10],
                          power_spc_true)
      self.assertAllClose(phase_spc.eval().transpose()[:10, :10],
                          phase_spc_true)