def transfer_bert_model(bert_model_dir, output_bert_model): graph = tf.Graph() max_seq_len = 512 num_labels = 2 use_one_hot_embeddings = False with graph.as_default(): with tf.Session() as sess: input_ids = tf.placeholder(tf.int32, (None, None), 'input_ids') input_mask = tf.placeholder(tf.int32, (None, None), 'input_mask') segment_ids = tf.placeholder(tf.int32, (None, None), 'segment_ids') bert_config = modeling.BertConfig.from_json_file(os.path.join(bert_model_dir, 'bert_config.json')) model = modeling.BertModel( config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) all_encoder_layers = model.get_all_encoder_layers() input_x_bert_cls = model.get_pooled_output() for idx, layer in enumerate(all_encoder_layers): layer = tf.identity(layer, "encoder_layers_" + str(idx)) print("layer:", layer) input_x_bert_cls = tf.identity(input_x_bert_cls, "input_x_bert_cls") print("input_x_bert_cls", input_x_bert_cls) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, bert_model_dir + "/bert_model.ckpt") saver.save(sess, output_bert_model)
def eval_or_infer_once(self, mode): """Do evaluation or inference once.""" model = self.build(mode) model.sess = tf.Session(config=self.session_conf) model.saver = tf.train.Saver() self.eval_or_infer_core(model, mode) model.sess.close()
def train_and_eval(self): # pylint: disable=too-many-locals """Train and evaluate the model.""" # train related g_train = tf.Graph() with g_train.as_default(): logging.info("Compiling train model ...") train_model = self.build(utils.TRAIN) # eval related g_eval = tf.Graph() with g_eval.as_default(): logging.info("Compiling eval model ...") eval_model = self.build(utils.EVAL) eval_model.sess = tf.Session(config=self.session_conf, graph=g_eval) eval_model.saver = tf.train.Saver() # start train with g_train.as_default(): # Supervisor with tf.name_scope("train"): global_step = tf.train.get_or_create_global_step() train_op = self.get_train_op(train_model.loss_op, global_step) checkpoint_dir = get_checkpoint_dir(self.config) # scaffold scaffold = self.get_scaffold(utils.TRAIN, global_step, train_model.iterator.initializer) with tf.train.MonitoredTrainingSession( checkpoint_dir=checkpoint_dir, scaffold=scaffold, save_checkpoint_steps=self.save_checkpoint_steps, config=self.session_conf) as sess: # Training loop. For each batch... train_data_size = self.config['data']['train_data_size'] num_batch = math.ceil(train_data_size * self.num_epochs / self.batch_size) num_batch_per_epoch = math.ceil(train_data_size / self.batch_size) logging.info("Total data size: {}, batch num: {}, " "batch num per epoch: {}".format( train_data_size, num_batch, num_batch_per_epoch)) for i in range(0, num_batch): if i % self.save_checkpoint_steps == 0 and i != 0: self.eval_or_infer_core(eval_model, utils.EVAL) _, _, out_loss = sess.run( [train_op, global_step, train_model.loss_op]) if i % self.print_every == 0 or i == num_batch - 1 or ( i + 1 ) % num_batch_per_epoch == 0 or i % num_batch_per_epoch == 0: logging.info( "Training for epoch {}: [ {:.2%} ] loss is {:g}" .format(int(i / num_batch_per_epoch), (i % num_batch_per_epoch) / num_batch_per_epoch, out_loss)) eval_model.sess.close()
def compute_mfcc(): parser = get_parser() args = parser.parse_args() config = {} config['sample_rate'] = int(args.sample_rate) config['upper_frequency_limit'] = float(args.upper_frequency_limit) config['lower_frequency_limit'] = float(args.lower_frequency_limit) config['filterbank_channel_count'] = float(args.filterbank_channel_count) config['window_length'] = args.window_length config['frame_length'] = args.frame_length config['output_type'] = args.output_type config['window_type'] = args.window_type config['snip_edges'] = args.snip_edges config['preeph_coeff'] = args.preeph_coeff config['remove_dc_offset'] = args.remove_dc_offset config['is_fbank'] = args.is_fbank config['cepstral_lifter'] = args.cepstral_lifter config['coefficient_count'] = args.coefficient_count mfcc = Mfcc.params(config).instantiate() with kaldiio.ReadHelper(args.rspecifier, segments=args.segments) as reader, \ KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames, compress=args.compress, compression_method=args.compression_method) as writer: for utt_id, (sample_rate, array) in reader: if sample_rate != args.sample_rate: args.sample_rate = sample_rate array = array.astype(np.float32) audio_data = tf.constant(array, dtype=tf.float32) mfcc_test = tf.squeeze(mfcc(audio_data, args.sample_rate)) sess = tf.Session() mfcc_feats = mfcc_test.eval(session=sess) writer[utt_id] = mfcc_feats
def compute_plp(): parser = get_parser() args = parser.parse_args() config = {} config['sample_rate'] = int(args.sample_rate) config['plp_order'] = int(args.plp_order) config['window_length'] = args.window_length config['frame_length'] = args.frame_length plp = Plp.params(config).instantiate() with kaldiio.ReadHelper(args.rspecifier, segments=args.segments) as reader, \ KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames, compress=args.compress, compression_method=args.compression_method) as writer: for utt_id, (sample_rate, array) in reader: if sample_rate != args.sample_rate: args.sample_rate = sample_rate array = array.astype(np.float32) audio_data = tf.constant(array, dtype=tf.float32) plp_test = plp(audio_data, args.sample_rate) sess = tf.Session() plp_feats = plp_test.eval(session=sess) writer[utt_id] = plp_feats
def compute_spectrum(): parser = get_parser() args = parser.parse_args() config = {} config['sample_rate'] = int(args.sample_rate) config['output_type'] = int(args.output_type) config['window_length'] = args.window_length config['frame_length'] = args.frame_length config['output_type'] = args.output_type config['window_type'] = args.window_type config['snip_edges'] = args.snip_edges config['preeph_coeff'] = args.preeph_coeff config['remove_dc_offset'] = args.remove_dc_offset config['is_fbank'] = args.is_fbank config['dither'] = args.dither spectrum = Spectrum.params(config).instantiate() with kaldiio.ReadHelper(args.rspecifier, segments=args.segments) as reader, \ KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames, compress=args.compress, compression_method=args.compression_method) as writer: for utt_id, (sample_rate, array) in reader: if sample_rate != args.sample_rate: args.sample_rate = sample_rate array = array.astype(np.float32) audio_data = tf.constant(array, dtype=tf.float32) spectrum_test = spectrum(audio_data, args.sample_rate) sess = tf.Session() spectrum_feats = spectrum_test.eval(session=sess) writer[utt_id] = spectrum_feats
def compute_stft(): parser = get_parser() args = parser.parse_args() config = {} config['sample_rate'] = int(args.sample_rate) config['window_length'] = args.window_length config['frame_length'] = args.frame_length stft = Analyfiltbank.params(config).instantiate() with kaldiio.ReadHelper(args.rspecifier, segments=args.segments) as reader, \ KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames, compress=args.compress, compression_method=args.compression_method) as writer: for utt_id, (sample_rate, array) in reader: if sample_rate != args.sample_rate: args.sample_rate = sample_rate array = array.astype(np.float32) audio_data = tf.constant(array, dtype=tf.float32) power_spectrum, phase_spectrum = stft(audio_data, args.sample_rate) sess = tf.Session() if args.output_type == 1: out_feats = power_spectrum.eval(session=sess) else: out_feats = phase_spectrum.eval(session=sess) writer[utt_id] = out_feats
def compute_fbank_pitch(): parser = get_parser() args = parser.parse_args() config = {} config['sample_rate'] = float(args.sample_rate) config['upper_frequency_limit'] = float(args.upper_frequency_limit) config['lower_frequency_limit'] = float(args.lower_frequency_limit) config['filterbank_channel_count'] = float(args.filterbank_channel_count) config['window_length'] = args.window_length config['frame_length'] = args.frame_length config['thres_autoc'] = args.thres_autoc config['output_type'] = args.output_type fbank_pitch = FbankPitch.params(config).instantiate() with kaldiio.ReadHelper(args.rspecifier, segments=args.segments) as reader, \ KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames, compress=args.compress, compression_method=args.compression_method) as writer: for utt_id, (sample_rate, array) in reader: if sample_rate != args.sample_rate: args.sample_rate = sample_rate array = array.astype(np.float32) audio_data = tf.constant(array, dtype=tf.float32) fbank_pitch_test = fbank_pitch(audio_data, args.sample_rate) sess = tf.Session() fbank_pitch_feats = fbank_pitch_test.eval(session=sess) writer[utt_id] = fbank_pitch_feats
def __init__(self, config): super().__init__(config) self.model_compiled = False self.model_path = config['solver']['saver']['model_path'] self.checkpoint_dir = get_checkpoint_dir(self.config) self.session_conf = get_session_conf(self.config) self.session = tf.Session(config=self.session_conf) tf.keras.backend.set_session(self.session) self.metrics = self.get_metrics()
def init_session(self, model, gpu_str): # The config for CPU usage config = tf.ConfigProto() if not gpu_str: config.gpu_options.visible_device_list = '' # pylint: disable=no-member else: config.gpu_options.visible_device_list = gpu_str # pylint: disable=no-member config.gpu_options.allow_growth = True # pylint: disable=no-member #check model dir if os.path.isdir(model): self._graph = tf.Graph() if tf.saved_model.maybe_saved_model_directory(model): #saved model logging.info('saved model dir: {}'.format(model)) self._sess = tf.Session(graph=self._graph, config=config) tf.saved_model.loader.load( self._sess, [tf.saved_model.tag_constants.SERVING], model) else: #checkpoint self._sess = tf.Session(graph=self._graph, config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=True)) ckpt_path = tf.train.latest_checkpoint(model) # self._graph, self._sess = utils.load_graph_session_from_ckpt(ckpt_path) model = ckpt_path + '.meta' logging.info("meta : {}".format(model)) saver = tf.train.import_meta_graph(model) saver.restore(self._sess, ckpt_path) else: if not os.path.exists(model): logging.info('{}, is not exist'.format(model)) logging.info("frozen_graph : {} not exist".format(model)) sys.exit(0) #frozen graph pb frozen_graph = model logging.info('frozen graph pb : {}'.format(frozen_graph)) self._graph = utils.load_frozen_graph(frozen_graph) self._sess = tf.Session(graph=self._graph, config=config)
def _get_session(feat_name, graph=None): global _global_sess sess = None if feat_name not in _global_sess: assert graph is not None sess = tf.Session(graph=graph) _global_sess[feat_name] = sess else: sess = _global_sess[feat_name] return sess
def generate_cmvn(self, filelist=None, dry_run=False): del filelist assert self._stride == 1.0 batch_size = self.config['solver']['optimizer']['batch_size'] features, labels = self.input_fn( utils.INFER, batch_size, num_epoch=1)().make_one_shot_iterator().get_next() del labels suffix = self.taskconf['suffix'] if suffix == '.npy': logging.info('generate cmvn from numpy') feature = features['inputs'] else: logging.info('genearte cmvn from wav') # tf extractor graph params = feat_lib.speech_ops.speech_params( sr=self.taskconf['audio']['sr'], bins=self.taskconf['audio']['feature_size'], add_delta_deltas=self.taskconf['audio']['add_delta_deltas'], audio_frame_length=self.taskconf['audio']['winlen'], audio_frame_step=self.taskconf['audio']['winstep']) #[batch, Time] -> [batch, time, audio_channel] waveforms = tf.expand_dims(features['inputs'], axis=-1) #[batch, Time, feat_size, channles] feature = feat_lib.speech_ops.batch_extract_feature( waveforms, params) # create stats vars sums, square, count = utils.create_cmvn_statis( self.taskconf['audio']['feature_size'], self.taskconf['audio']['add_delta_deltas']) try: with tf.Session() as sess: while True: feat_np = sess.run(feature) # update stats sums, square, count = utils.update_cmvn_statis(feat_np, sums, square, count, axis=(0, 1)) except tf.errors.OutOfRangeError: pass # compute cmvn mean, var = utils.compute_cmvn(sums, square, count) logging.info('mean:{}'.format(mean)) logging.info('var:{}'.format(var)) if not dry_run: np.save(self._cmvn_path, (mean, var)) logging.info('save cmvn:{}'.format(self._cmvn_path)) logging.info('generate cmvn done')
def test_ngram_op_2_order(self): ''' test ngram 2-order op''' ground_truth_2 = [0, 0, 0, 0, 0, 0, 0] word_ngram = 2 t_input = tf.placeholder(shape=(4,), dtype=tf.int32) t_ngram = py_x_ops.ngram( t_input, word_ngrams=word_ngram, vocab_size=5000, bucket_size=100000) logging.info("t_ngram: {}".format(t_ngram)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ngram_result = sess.run(t_ngram, feed_dict={t_input: self.testcase[0]}) self.assertAllEqual(ngram_result, ground_truth_2)
def run_dataset(self, data_iterator, batch_num): """Run the text pre-process pipeline, fetch data in numpy array format.""" data_after = [] data_t = data_iterator.get_next() with tf.Session(config=self.session_conf) as sess: sess.run(data_iterator.initializer, feed_dict=self.init_feed_dict) for _ in range(batch_num): try: data_after.append(sess.run(data_t)) except tf.errors.OutOfRangeError: break data_after_arr = np.concatenate(data_after, axis=0) return data_after_arr
def export_model(self): """Export a model to tensorflow SavedModel.""" mode = utils.INFER graph = tf.Graph() with graph.as_default(): infer_model = self.build_export_model() infer_model.sess = tf.Session(config=self.session_conf) infer_model.saver = tf.train.Saver() model_path = self.get_model_path(mode) infer_model.saver.restore(infer_model.sess, save_path=model_path) to_saved_model(self.config, infer_model.sess, infer_model.export_inputs, infer_model.output_dict)
def test_process_multi_label_dataset(self): label = ["O I-MISC I-MISC", "O B-MISC I-MISC"] label_filepath = tempfile.mktemp(suffix='label_file_for_unitest.txt') with open(label_filepath, mode='w', encoding='utf-8') as fobj: for token in label: fobj.write(token) fobj.write('\n') label_ds = tf.data.TextLineDataset(label_filepath) true_res = [[0, 8, 8], [0, 7, 8]] label_ds = process_multi_label_dataset(label_ds, self.config) iterator = label_ds.make_initializable_iterator() label_res = iterator.get_next() with tf.Session() as sess: sess.run(iterator.initializer) for i in range(len(label)): self.assertEqual(list(sess.run(label_res)[:3]), true_res[i])
def test_batch_ngram_op_2_order(self): ''' tset batch 2-order ngram ''' ground_truth_2 = [[0, 0, 0, 0, 0, 0, 0], [223, 0, 0, 0, 0, 0, 0], [0, 8, 5008, 0, 0, 0, 0], [4, 8, 102492, 0, 0, 0, 0], [0, 0, 10, 5000, 5010, 0, 0], [2, 5, 3, 103747, 51858, 0, 0], [7, 2, 1, 24, 50599, 103743, 54395]] word_ngram = 2 t_input = tf.placeholder(shape=(7, 4), dtype=tf.int32) t_ngram = py_x_ops.ngram( t_input, word_ngrams=word_ngram, vocab_size=5000, bucket_size=100000) logging.info("batch t_ngram: {}".format(t_ngram)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ngram_result = sess.run(t_ngram, feed_dict={t_input: self.testcase}) ngram_result = [list(res) for res in ngram_result] self.assertAllEqual(ngram_result, ground_truth_2)
def compute_pitch(): parser = get_parser() args = parser.parse_args() config = {} config['sample_rate'] = int(args.sample_rate) config['window_length'] = args.window_length config['frame_length'] = args.frame_length config['snip_edges'] = args.snip_edges config['preemph_coeff'] = args.preemph_coeff config['min_f0'] = args.min_f0 config['max_f0'] = args.max_f0 config['soft_min_f0'] = args.soft_min_f0 config['penalty_factor'] = args.penalty_factor config['lowpass_cutoff'] = args.lowpass_cutoff config['resample_freq'] = args.resample_freq config['delta_pitch'] = args.delta_pitch config['nccf_ballast'] = args.nccf_ballast config['lowpass_filter_width'] = args.lowpass_filter_width config['upsample_filter_width'] = args.upsample_filter_width config['max_frames_latency'] = args.max_frames_latency config['frames_per_chunk'] = args.frames_per_chunk config['simulate_first_pass_online'] = args.simulate_first_pass_online config['recompute_frame'] = args.recompute_frame config['nccf_ballast_online'] = args.nccf_ballast_online pitch = Pitch.params(config).instantiate() with kaldiio.ReadHelper(args.rspecifier, segments=args.segments) as reader, \ KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames, compress=args.compress, compression_method=args.compression_method) as writer: for utt_id, (sample_rate, array) in reader: if sample_rate != args.sample_rate: args.sample_rate = sample_rate array = array.astype(np.float32) audio_data = tf.constant(array, dtype=tf.float32) pitch_test = tf.squeeze(pitch(audio_data, args.sample_rate)) sess = tf.Session() pitch_feats = pitch_test.eval(session=sess) writer[utt_id] = pitch_feats
def main(_): if FLAGS.checkpoints: # Get the checkpoints list from flags and run some basic checks. checkpoints = [c.strip() for c in FLAGS.checkpoints.split(",")] checkpoints = [c for c in checkpoints if c] if not checkpoints: raise ValueError("No checkpoints provided for averaging.") if FLAGS.prefix: checkpoints = [FLAGS.prefix + c for c in checkpoints] else: assert FLAGS.num_last_checkpoints >= 1, "Must average at least one model" assert FLAGS.prefix, ("Prefix must be provided when averaging last" " N checkpoints") checkpoint_state = tf.train.get_checkpoint_state( os.path.dirname(FLAGS.prefix)) # Checkpoints are ordered from oldest to newest. checkpoints = checkpoint_state.all_model_checkpoint_paths[ -FLAGS.num_last_checkpoints:] checkpoints = [c for c in checkpoints if checkpoint_exists(c)] if not checkpoints: if FLAGS.checkpoints: raise ValueError("None of the provided checkpoints exist. %s" % FLAGS.checkpoints) else: raise ValueError("Could not find checkpoints at %s" % os.path.dirname(FLAGS.prefix)) # Read variables from all checkpoints and average them. logging.info("Reading variables and averaging checkpoints:") for c in checkpoints: logging.info("%s ", c) var_list = tf.train.list_variables(checkpoints[0]) var_values, var_dtypes = {}, {} for (name, shape) in var_list: if not name.startswith("global_step"): var_values[name] = np.zeros(shape) for checkpoint in checkpoints: reader = tf.train.load_checkpoint(checkpoint) for name in var_values: tensor = reader.get_tensor(name) var_dtypes[name] = tensor.dtype var_values[name] += tensor logging.info("Read from checkpoint %s", checkpoint) for name in var_values: # Average. var_values[name] /= len(checkpoints) with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): tf_vars = [ tf.get_variable(v, shape=var_values[v].shape, dtype=var_dtypes[v]) for v in var_values ] placeholders = [tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars] assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)] global_step = tf.Variable(0, name="global_step", trainable=False, dtype=tf.int64) saver = tf.train.Saver(tf.all_variables()) # Build a model consisting only of variables, set them to the average values. with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for p, assign_op, (name, value) in zip(placeholders, assign_ops, six.iteritems(var_values)): sess.run(assign_op, {p: value}) # Use the built saver to save the averaged checkpoint. saver.save(sess, FLAGS.output_path, global_step=global_step) logging.info("Averaged checkpoints saved in %s", FLAGS.output_path)
def get_session(sess_config): """load a new session""" return tf.Session(config=sess_config)