Ejemplo n.º 1
0
  def call(self, audio_data, sample_rate=None):
    """
    Caculate cepstrum of audio data.
    :param audio_data: the audio signal from which to compute spectrum. Should be an (1, N) tensor.
    :param sample_rate: [option]the samplerate of the signal we working with, default is 16kHz.
    :return:A float tensor of size (num_frames, ceps_subband_num) containing normalized cepstrum
          (tag_ceps_mean_norm = True) or cepstrum (tag_ceps_mean_norm = False) of every frame in speech.
    """

    p = self.config

    with tf.name_scope('cepstrum'):

      if sample_rate == None:
        sample_rate = tf.constant(p.sample_rate, dtype=float)

      assert_op = tf.assert_equal(
          tf.constant(p.sample_rate), tf.cast(sample_rate, dtype=float))
      with tf.control_dependencies([assert_op]):

        cepstrum = py_x_ops.cepstrum(
            audio_data,
            sample_rate,
            window_length=p.window_length,
            frame_length=p.frame_length,
            ceps_subband_num=p.ceps_subband_num,
            tag_ceps_mean_norm=p.tag_ceps_mean_norm)

        return cepstrum
Ejemplo n.º 2
0
    def test_cepstrum(self):
        ''' test cepstrum op'''
        with self.session():
            sample_rate, input_data = feat_lib.load_wav(self.wavpath, sr=16000)

            output = py_x_ops.cepstrum(input_data, sample_rate)

            output_true = np.array(
                [[0.525808, 0.579537, 0.159656, 0.014726, -0.1866810],
                 [0.225988, 1.557304, 3.381828, 0.132935, 0.7128600],
                 [-1.832759, -1.045178, 0.753158, 0.116107, -0.9307780],
                 [-0.696277, 1.333355, 1.590942, 2.041829, -0.0805630],
                 [-0.377375, 2.984320, 0.036302, 3.676640, 1.1709290]])
            self.assertEqual(tf.rank(output).eval(), 2)
            logging.info('Shape of cepstrum: {}'.format(output.shape))
            self.assertAllClose(output.eval()[15:20, 7:12], output_true)