def call(self, audio_data, sample_rate=None): """ Calculate the zero-crossing rate of speech. :param audio_data: the audio signal from which to compute spectrum. Should be an (1, N) tensor. :param sample_rate: [option]the samplerate of the signal we working with, default is 16kHz. :return: A tensor with shape (1, num_frames), containing zero-crossing rate of every frame in speech. """ p = self.config with tf.name_scope('zcr'): if sample_rate == None: sample_rate = tf.constant(p.sample_rate, dtype=tf.int32) assert_op = tf.assert_equal(tf.constant(p.sample_rate), tf.cast(sample_rate, dtype=tf.int32)) with tf.control_dependencies([assert_op]): sample_rate = tf.cast(sample_rate, dtype=float) zcr = py_x_ops.zcr(audio_data, sample_rate, window_length=p.window_length, frame_length=p.frame_length) return zcr
def test_zcr(self): ''' test zcr op''' with self.session(): sample_rate, input_data = feat_lib.load_wav(self.wavpath, sr=16000) output = py_x_ops.zcr(input_data, sample_rate) output_true = np.array([ 0.406250, 0.418750, 0.425000, 0.407500, 0.393750, 0.392500, 0.388750, 0.417500, 0.427500, 0.456250, 0.447500, 0.386250, 0.357500, 0.282500, 0.232500, 0.262500, 0.282500, 0.295000, 0.220000, 0.157500, 0.125000, 0.107500, 0.100000, 0.092500, 0.092500, 0.095000, 0.097500, 0.105000, 0.100000, 0.112500, 0.120000, 0.132500, 0.130000, 0.135000, 0.112500, 0.120000, 0.090000, 0.080000, 0.070000, 0.080000, 0.087500, 0.092500, 0.097500, 0.097500, 0.112500, 0.090000, 0.065000, 0.087500, 0.175000, 0.240000 ]) self.assertEqual(tf.rank(output).eval(), 1) logging.info('Shape of zero-cross-rate: {}'.format( output.eval().shape)) self.assertAllClose(output.eval().flatten()[:50], output_true)