def test_delta_delta(self): ''' test add delta detlas ''' #pylint: disable=invalid-name p = tffeat.speech_params(sr=self.sr_true, bins=40, cmvn=False, audio_desired_samples=1000, add_delta_deltas=False) with self.session(): wavfile = tf.constant(self.wavpath) audio, sample_rate = tffeat.read_wav(wavfile, self.hp) del sample_rate feature = tffeat.compute_mel_filterbank_features( audio, sample_rate=p.audio_sample_rate, preemphasis=p.audio_preemphasis, frame_length=p.audio_frame_length, frame_step=p.audio_frame_step, lower_edge_hertz=p.audio_lower_edge_hertz, upper_edge_hertz=p.audio_upper_edge_hertz, num_mel_bins=p.audio_num_mel_bins, apply_mask=False) feature = tffeat.delta_delta(feature, order=2) self.assertEqual(feature.eval().shape, (11, 40, 3))
def test_tf_spec(self): ''' test tensorflow spec feature interface ''' speech_feature.extract_feature((self.wavfile), winlen=self.winlen, winstep=self.winstep, sr=self.sr, feature_size=self.feature_size, feature_name='spec') feat = np.load(self.featfile) self.assertEqual(feat.shape, (425, 129, 1)) with self.session(use_gpu=False): feat = speech_ops.delta_delta(feat, 2) self.assertEqual(feat.eval().shape, (425, 129, 3))
def test_tf_feat(self): ''' test tensorflow fbank feature interface ''' speech_feature.extract_filterbank((self.wavfile), winlen=self.winlen, winstep=self.winstep, sr=self.sr, feature_size=self.feature_size) feat = np.load(self.featfile) self.assertEqual(feat.shape, (425, 40, 1)) with self.session(): feat = speech_ops.delta_delta(feat, 2) self.assertEqual(feat.eval().shape, (425, 40, 3))
def add_delta_delta(feat, feat_size, order=2): ''' add delta detla ''' graph = tf.Graph() #pylint: disable=not-context-manager with graph.as_default(): fbank = tf.placeholder(dtype=tf.float32, shape=[None, feat_size, 1], name='fbank') feat_with_delta_delta = speech_ops.delta_delta(fbank, order=order) sess = tf.Session(graph=graph) feat = sess.run(feat_with_delta_delta, feed_dict={'fbank:0': feat}) sess.close() return feat
def test_tf_fbank(self): ''' test tensorflow fbank feature interface ''' speech_feature.extract_feature((self.wavfile), winlen=self.winlen, winstep=self.winstep, sr=self.sr, feature_size=self.feature_size, feature_name='fbank') feat = np.load(self.featfile) logging.info(f"feat : {feat}") self.assertEqual(feat.shape, (425, 40, 1)) with self.session(use_gpu=False): feat = speech_ops.delta_delta(feat, 2) self.assertEqual(feat.eval().shape, (425, 40, 3))
def add_delta_delta(feat, feat_size, order=2): ''' add delta detla ''' feat_name = 'delta_delta' graph = None # get session if feat_name not in _global_sess: graph = tf.Graph() #pylint: disable=not-context-manager with graph.as_default(): fbank = tf.placeholder( dtype=tf.float32, shape=[None, feat_size, 1], name='fbank') feat_with_delta_delta = speech_ops.delta_delta(fbank, order=order) feat_with_delta_delta = tf.identity(feat_with_delta_delta, name=feat_name) sess = _get_session(feat_name, graph) feat = sess.run( _get_out_tensor_name(feat_name, 0), feed_dict={'fbank:0': feat}) return feat