Ejemplo n.º 1
0
  def test_spectrum(self):
    wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

    with self.cached_session(use_gpu=False, force_gpu=False):
      read_wav = ReadWav.params().instantiate()
      input_data, sample_rate = read_wav(wav_path)

      pitch = Pitch.params({
          'window_length': 0.025,
          'soft_min_f0': 10.0
      }).instantiate()
      pitch_test = pitch(input_data, sample_rate)

      self.assertEqual(tf.rank(pitch_test).eval(), 2)

      output_true = np.array(
        [
          [0.03881124, 0.3000031, - 0.02324523],
          [0.006756478, 0.3000097, 0.01047742],
          [0.02455365, 0.3000154, 0.00695902],
          [0.02453586, 0.3000221, 0.008448198],
          [0.03455311, 0.3000307, - 0.07547269],
          [0.04293294, 0.3000422, - 0.04193667]
        ]
      )
Ejemplo n.º 2
0
    def test_pitch(self):

        wav_path = str(
            Path(PACKAGE_ROOT_DIR).joinpath('layers/ops/data/sm1_cln.wav'))
        with self.cached_session(use_gpu=False, force_gpu=False):
            read_wav = ReadWav.params().instantiate()
            input_data, sample_rate = read_wav.call(wav_path)
            input_data = input_data / 32768
            pitch = Pitch.params({
                'window_length': 0.025,
                'frame_length': 0.010,
                'thres_autoc': 0.3
            }).instantiate()
            pitch_test = pitch(input_data, sample_rate)

            output_true = np.array([
                0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
                0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
                0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
                0.000000, 0.000000, 122.823532, 117.647057, 116.788322,
                116.788322, 119.402985, 119.402985, 119.402985, 119.402985,
                119.402985, 123.076920, 124.031006, 125.000000, 132.065216,
                139.130432, 139.130432, 137.931030, 126.108368, 114.285713,
                115.107910, 122.070084, 129.032257, 130.081299, 130.081299,
                129.032257, 130.081299, 131.147537, 129.032257, 125.000000,
                120.300751, 115.107910
            ])

            self.assertAllClose(pitch_test.eval().flatten()[:50], output_true)
Ejemplo n.º 3
0
def compute_pitch():
    parser = get_parser()
    args = parser.parse_args()

    config = {}
    config['sample_rate'] = int(args.sample_rate)
    config['window_length'] = args.window_length
    config['frame_length'] = args.frame_length
    config['snip_edges'] = args.snip_edges
    config['preemph_coeff'] = args.preemph_coeff
    config['min_f0'] = args.min_f0
    config['max_f0'] = args.max_f0
    config['soft_min_f0'] = args.soft_min_f0
    config['penalty_factor'] = args.penalty_factor
    config['lowpass_cutoff'] = args.lowpass_cutoff
    config['resample_freq'] = args.resample_freq
    config['delta_pitch'] = args.delta_pitch
    config['nccf_ballast'] = args.nccf_ballast
    config['lowpass_filter_width'] = args.lowpass_filter_width
    config['upsample_filter_width'] = args.upsample_filter_width
    config['max_frames_latency'] = args.max_frames_latency
    config['frames_per_chunk'] = args.frames_per_chunk
    config['simulate_first_pass_online'] = args.simulate_first_pass_online
    config['recompute_frame'] = args.recompute_frame
    config['nccf_ballast_online'] = args.nccf_ballast_online

    pitch = Pitch.params(config).instantiate()

    with kaldiio.ReadHelper(args.rspecifier,
                            segments=args.segments) as reader, \
          KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames,
                      compress=args.compress, compression_method=args.compression_method) as writer:
        for utt_id, (sample_rate, array) in reader:
            if sample_rate != args.sample_rate:
                args.sample_rate = sample_rate
            array = array.astype(np.float32)
            audio_data = tf.constant(array, dtype=tf.float32)
            pitch_test = tf.squeeze(pitch(audio_data, args.sample_rate))
            sess = tf.Session()
            pitch_feats = pitch_test.eval(session=sess)
            writer[utt_id] = pitch_feats
Ejemplo n.º 4
0
  def test_spectrum(self):
    wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav'))

    with self.cached_session(use_gpu=False, force_gpu=False):
      read_wav = ReadWav.params().instantiate()
      input_data, sample_rate = read_wav(wav_path)

      pitch = Pitch.params({
          'window_length': 0.025,
          'soft_min_f0': 10.0
      }).instantiate()
      pitch_test = pitch(input_data, sample_rate)

      self.assertEqual(tf.rank(pitch_test).eval(), 2)

      output_true = [[-0.1366025, 143.8855], [-0.0226383, 143.8855],
                     [-0.08464742, 143.8855], [-0.08458386, 143.8855],
                     [-0.1208689, 143.8855]]

      self.assertAllClose(
          pitch_test.eval()[0:5, :], output_true, rtol=1e-05, atol=1e-05)
Ejemplo n.º 5
0
 def __init__(self, config: dict):
     super().__init__(config)
     self.fbank = Fbank(config)
     self.pitch = Pitch(config)