Esempio n. 1
0
  def call(self, audio_data, sample_rate=None):
    """
    Caculate pitch features of audio data.
    :param audio_data: the audio signal from which to compute spectrum. Should be an (1, N) tensor.
    :param sample_rate: [option]the samplerate of the signal we working with, default is 16kHz.
    :return: A float tensor of size (1, num_frames) containing pitch features of every frame in speech.
    """

    p = self.config
    with tf.name_scope('pitch'):

      if sample_rate == None:
        sample_rate = tf.constant(p.sample_rate, dtype=float)

      assert_op = tf.compat.v1.assert_equal(
          tf.constant(p.sample_rate), tf.cast(sample_rate, dtype=float))
      with tf.control_dependencies([assert_op]):

        pitch = py_x_ops.pitch(
            audio_data,
            sample_rate,
            window_length=p.window_length,
            frame_length=p.frame_length,
            thres_autoc=p.thres_autoc)

        pitch = tf.squeeze(pitch)
        pitch = tf.transpose(pitch[None, :])
        return pitch
  def test_pitch(self):
    ''' test pitch op'''
    with self.session():
      # read wave
      sample_rate, input_data = feat_lib.load_wav(self.wavpath, sr=16000)

      output = py_x_ops.pitch(input_data, sample_rate)

      output_true = np.array([
          0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
          0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
          0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
          122.823532, 117.647057, 116.788322, 116.788322, 119.402985,
          119.402985, 119.402985, 119.402985, 119.402985, 123.076920,
          124.031006, 125.000000, 132.065216, 139.130432, 139.130432,
          137.931030, 126.108368, 114.285713, 115.107910, 122.070084,
          129.032257, 130.081299, 130.081299, 129.032257, 130.081299,
          131.147537, 129.032257, 125.000000, 120.300751, 115.107910
      ])
      self.assertEqual(tf.rank(output).eval(), 1)
      self.assertAllClose(output.eval().flatten()[:50], output_true)