def preprocess(self, inputs): ''' Speech preprocessing. ''' with tf.variable_scope('feature'): if self.input_type == 'samples': # FIXME: stub feats = None else: if 'cmvn_type' in self.audioconf: cmvn_type = self.audioconf['cmvn_type'] else: cmvn_type = 'global' logging.info('cmvn_type: %s' % (cmvn_type)) if cmvn_type == 'global': self.mean, self.std = utils.load_cmvn( self.audioconf['cmvn_path']) feats = utils.apply_cmvn(inputs, self.mean, self.std) elif cmvn_type == 'local': feats = utils.apply_local_cmvn(inputs) elif cmvn_type == 'sliding': raise ValueError('cmvn_type %s not implemented yet.' % (cmvn_type)) elif cmvn_type == 'none': feats = inputs else: raise ValueError('Error cmvn_type %s.' % (cmvn_type)) return feats
def testApplyLocalCmvn(self): #pylint: disable=invalid-name ''' test apply_local_cmvn() ''' np.random.seed(12) tf.set_random_seed(12) feat_size = 40 delta_deltas = True feat_shape = [2, 10, feat_size, 3 if delta_deltas else 1] feat = np.random.randn(*feat_shape) feat = feat.astype(np.float32) mean = np.mean(feat, axis=1, keepdims=True) var = np.var(feat, axis=1, keepdims=True) eps = 1e-9 feat_true = (feat - mean) / np.sqrt(var + eps) feat = tf.constant(feat) feat_out = utils.apply_local_cmvn(feat, epsilon=eps) with self.cached_session(use_gpu=False, force_gpu=False): self.assertAllClose(feat_out.eval(), feat_true)