def generate_cmvn(self, filelist=None, dry_run=False): # pylint: disable=unused-argument ''' Generate mean and vars of features. ''' sums, square, count = utils.create_cmvn_statis( self.taskconf['audio']['feature_size'], self.taskconf['audio']['add_delta_deltas']) self.sampler.chunk_size = 100000 self.sampler.pad_chunks = False num_done = 0 for inputs, _, _, _, _ in \ self.generate_data(): # update stats if inputs.ndim == 3: inputs = np.expand_dims(inputs, axis=0) sums, square, count = utils.update_cmvn_statis( inputs, sums, square, count, axis=(0, 1)) num_done += 1 if num_done % 100 == 0: logging.info('Done %d samples.' % (num_done)) if num_done > self.cmvn_max_samples: break # compute cmvn mean, var = utils.compute_cmvn(sums, square, count) if dry_run: logging.info('save cmvn:{}'.format(self._cmvn_path)) else: np.save(self._cmvn_path, (mean, var)) logging.info('generate cmvn done') logging.info(mean) logging.info(var)
def generate_cmvn(self, filelist=None, dry_run=False): del filelist assert self._stride == 1.0 batch_size = self.config['solver']['optimizer']['batch_size'] features, labels = self.input_fn( utils.INFER, batch_size, num_epoch=1)().make_one_shot_iterator().get_next() del labels suffix = self.taskconf['suffix'] if suffix == '.npy': logging.info('generate cmvn from numpy') feature = features['inputs'] else: logging.info('genearte cmvn from wav') # tf extractor graph params = feat_lib.speech_ops.speech_params( sr=self.taskconf['audio']['sr'], bins=self.taskconf['audio']['feature_size'], add_delta_deltas=self.taskconf['audio']['add_delta_deltas'], audio_frame_length=self.taskconf['audio']['winlen'], audio_frame_step=self.taskconf['audio']['winstep']) #[batch, Time] -> [batch, time, audio_channel] waveforms = tf.expand_dims(features['inputs'], axis=-1) #[batch, Time, feat_size, channles] feature = feat_lib.speech_ops.batch_extract_feature( waveforms, params) # create stats vars sums, square, count = utils.create_cmvn_statis( self.taskconf['audio']['feature_size'], self.taskconf['audio']['add_delta_deltas']) try: with tf.Session() as sess: while True: feat_np = sess.run(feature) # update stats sums, square, count = utils.update_cmvn_statis(feat_np, sums, square, count, axis=(0, 1)) except tf.errors.OutOfRangeError: pass # compute cmvn mean, var = utils.compute_cmvn(sums, square, count) logging.info('mean:{}'.format(mean)) logging.info('var:{}'.format(var)) if not dry_run: np.save(self._cmvn_path, (mean, var)) logging.info('save cmvn:{}'.format(self._cmvn_path)) logging.info('generate cmvn done')
def testComputeCmvn(self): #pylint: disable=invalid-name ''' test compute cmvn ''' np.random.seed(12) feat_size = 40 delta_deltas = True shape = [2, 10, feat_size, 3 if delta_deltas else 1] sums, square, count = utils.create_cmvn_statis(feat_size, delta_deltas) feat = np.random.randn(*shape) feat = feat.astype(np.float32) sums, square, count = utils.update_cmvn_statis(feat, sums, square, count) mean, var = utils.compute_cmvn(sums, square, count) mean_true, var_true = np.mean(feat, axis=(0, 1)), np.var(feat, axis=(0, 1)) self.assertAllEqual(mean.shape, [1] + shape[2:]) self.assertAllClose(np.squeeze(mean, axis=0), mean_true) self.assertAllClose(np.squeeze(var, axis=0), var_true)
def generate_cmvn(self, filelist=None, dry_run=False): ''' Generate mean and vars of features. ''' sums, square, count = utils.create_cmvn_statis( self.taskconf['audio']['feature_size'], self.taskconf['audio']['add_delta_deltas']) self.sampler.set_chunk_size(100000) self.sampler.set_pad_chunks(False) for inputs, _, _, _, _, _ in \ self.generate_data(): # update stats if inputs.ndim == 3: inputs = np.expand_dims(inputs, axis=0) sums, square, count = utils.update_cmvn_statis( inputs, sums, square, count, axis=(0, 1)) # compute cmvn mean, var = utils.compute_cmvn(sums, square, count) if dry_run: logging.info('save cmvn:{}'.format(self._cmvn_path)) else: np.save(self._cmvn_path, (mean, var)) logging.info('generate cmvn done') logging.info(mean) logging.info(var)