def model_fn(features, mode): input_ids, input_mask, label_ids = [features.get(k) for k in \ ("input_ids", "input_mask", "label_ids")] is_training = mode == tf.estimator.ModeKeys.TRAIN total_loss, per_example_loss, logits, probabilities = create_model( bert_config, is_training, input_ids, input_mask, num_labels, label_ids) tvars = tf.trainable_variables() assignment_map, _ = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: accu = tf.metrics.accuracy(labels=label_ids, predictions= \ tf.argmax(logits, axis=-1, output_type=tf.int32)) loss = tf.metrics.mean(values=per_example_loss) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, eval_metric_ops={ "eval_accu": accu, "eval_loss": loss }) else: output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions={"prob": probabilities}) return output_spec
def _model_builder(self): self.ids_placeholder = tf.placeholder( tf.int32, shape=[None, FLAGS.max_seq_length]) self.mask_placeholder = tf.placeholder( tf.int32, shape=[None, FLAGS.max_seq_length]) self.segment_placeholder = tf.placeholder( tf.int32, shape=[None, FLAGS.max_seq_length]) self.labels_placeholder = tf.placeholder( tf.int32, shape=[None, FLAGS.max_seq_length]) self.loss, self.logits, self.output = self.create_model() self.sess_config = tf.ConfigProto() self.sess_config.allow_soft_placement = True self.sess_config.log_device_placement = False self.sess_config.gpu_options.allow_growth = True self.sess_config.gpu_options.per_process_gpu_memory_fraction = 1 self.sess = tf.Session(config=self.sess_config) # if not self.is_training: # variables = tf.contrib.framework.get_variables_to_restore() # variables_to_restore = [v for v in variables if v.name.split('/')[0] == 'bert'] # saver = tf.train.Saver(variables_to_restore) # saver.restore(self.sess, self.init_checkpoint) # else: tvars = tf.trainable_variables() (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, self.init_checkpoint) tf.train.init_from_checkpoint(self.init_checkpoint, assignment_map) print("restore from the checkpoint {0}".format(self.init_checkpoint))
def fit(self): ''' 训练模型 :return: ''' train_data = self.get_input_feature(self.train_path) input_ids = tf.placeholder(shape=[None, None], dtype=tf.int32, name="input_ids") input_mask = tf.placeholder(shape=[None, None], dtype=tf.int32, name="input_mask") seq_lens = tf.placeholder(tf.int32, [None], name='seq_lens') labels = tf.placeholder(tf.int32, [None, None], name='labels') preds_seq, log_likelihood = self.model(input_ids, input_mask, seq_lens, labels) init_checkpoint = os.path.join(self.bert_path, 'bert_model.ckpt') tvars = tf.trainable_variables() assignment_map, _ = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.add_to_collection('preds_seq', preds_seq) loss = -log_likelihood / tf.cast(seq_lens, tf.float32) loss = tf.reduce_mean(loss) if 'sgd' == self.loss.lower(): train_op = tf.train.GradientDescentOptimizer( self.rate).minimize(loss) elif 'adam' == self.loss.lower(): train_op = tf.train.AdamOptimizer(self.rate).minimize(loss) else: train_op = tf.train.GradientDescentOptimizer( self.rate).minimize(loss) saver = tf.train.Saver(tf.global_variables()) with tf.Session(config=self.tf_config) as sess: sess.run(tf.global_variables_initializer()) for i in range(self.epoch): for step, (input_ids_batch, input_mask_batch, seq_lens_batch, labels_batch) in enumerate( self.batch_yield(train_data)): _, curr_loss = sess.run( [train_op, loss], feed_dict={ input_ids: input_ids_batch, input_mask: input_mask_batch, seq_lens: seq_lens_batch, labels: labels_batch }) if step % 10 == 0: self.logger.info( 'epoch:%d, batch: %d, current loss: %f' % (i, step + 1, curr_loss)) saver.save(sess, self.model_path) tf.summary.FileWriter(self.summary_path, sess.graph) self.evaluate(sess, input_ids, input_mask, seq_lens, labels, preds_seq)
def optimize_graph(params: Params): config = tf.ConfigProto(device_count={'GPU': 0}, allow_soft_placement=True) init_checkpoint = params.ckpt_dir tf.logging.info('build graph...') # input placeholders, not sure if they are friendly to XLA input_ids = tf.placeholder(tf.int32, (None, params.max_seq_len), 'input_ids') input_mask = tf.placeholder(tf.int32, (None, params.max_seq_len), 'input_mask') input_type_ids = tf.placeholder(tf.int32, (None, params.max_seq_len), 'segment_ids') jit_scope = tf.contrib.compiler.jit.experimental_jit_scope with jit_scope(): features = {} features['input_ids'] = input_ids features['input_mask'] = input_mask features['segment_ids'] = input_type_ids model = BertMultiTask(params) hidden_feature = model.body(features, tf.estimator.ModeKeys.PREDICT) pred = model.top(features, hidden_feature, tf.estimator.ModeKeys.PREDICT) output_tensors = [pred[k] for k in pred] tvars = tf.trainable_variables() (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tmp_g = tf.get_default_graph().as_graph_def() with tf.Session(config=config) as sess: tf.logging.info('load parameters from checkpoint...') sess.run(tf.global_variables_initializer()) tf.logging.info('freeze...') tmp_g = tf.graph_util.convert_variables_to_constants( sess, tmp_g, [n.name[:-2] for n in output_tensors]) tmp_file = os.path.join(params.ckpt_dir, 'export_model') tf.logging.info('write graph to a tmp file: %s' % tmp_file) with tf.gfile.GFile(tmp_file, 'wb') as f: f.write(tmp_g.SerializeToString()) return tmp_file
def model_fn(features, mode): input_ids = features["input_ids"] input_mask = features["input_mask"] proba = create_model(bert_config, False, input_ids, input_mask, num_labels) tvars = tf.trainable_variables() assignment_map, _ = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) export_outputs = {'predict': tf.estimator.export.PredictOutput(proba)} output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions=proba, export_outputs=export_outputs) return output_spec
def _model_builder(self): self.ids_placeholder = tf.placeholder(tf.int32, shape=[None, FLAGS.max_seq_length]) self.mask_placeholder = tf.placeholder(tf.int32, shape=[None, FLAGS.max_seq_length]) self.segment_placeholder = tf.placeholder(tf.int32, shape=[None, FLAGS.max_seq_length]) self.labels_placeholder = tf.placeholder(tf.float32, shape=[None, self.num_labels]) self.loss, self.logits, self.sigmoid_logits = self.create_model() self.sess_config = tf.ConfigProto() self.sess_config.allow_soft_placement = True self.sess_config.log_device_placement = False self.sess_config.gpu_options.allow_growth = True self.sess_config.gpu_options.per_process_gpu_memory_fraction = 1 self.sess = tf.Session(config=self.sess_config) tvars = tf.trainable_variables() (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, self.init_checkpoint) tf.train.init_from_checkpoint(self.init_checkpoint, assignment_map) print("restore from the checkpoint {0}".format(self.init_checkpoint))
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.DTIBertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) total_loss = masked_lm_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None # masked_lm_predictions = tf.argmax( # masked_lm_log_probs, axis=-1, output_type=tf.int32) # masked_lm_accuracy = tf.metrics.accuracy( # labels=tf.reshape(masked_lm_ids, [-1]), # predictions=masked_lm_predictions, # weights=tf.reshape(masked_lm_weights, [-1])) # logging_hook = tf.train.LoggingTensorHook({"loss": total_loss, # "accuracy": masked_lm_accuracy[1]}, every_n_iter=100) # logging_hook = tf.train.LoggingTensorHook({"loss": total_loss}, every_n_iter=100) def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, } if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, # training_hooks=[logging_hook], training_hooks=[], scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): logging.info("*** Features ***") for name in sorted(features.keys()): logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) if FLAGS.crf: (total_loss, logits, predicts) = create_model(bert_config, is_training, input_ids, mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) else: (total_loss, logits, predicts) = create_model(bert_config, is_training, input_ids, mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None initialized_variable_names = None if init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(label_ids, logits, num_labels, mask): predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32) cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels - 1, weights=mask) return {"confusion_matrix": cm} # eval_metrics = (metric_fn, [label_ids, logits, num_labels, mask]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predicts, scaffold_fn=scaffold_fn) return output_spec
def optimize_graph(params): config = tf.ConfigProto(device_count={'GPU': 0}, allow_soft_placement=True) init_checkpoint = params.ckpt_dir tf.logging.info('build graph...') # input placeholders, not sure if they are friendly to XLA input_ids = tf.placeholder(tf.int32, (None, params.max_seq_len), 'input_ids') input_mask = tf.placeholder(tf.int32, (None, params.max_seq_len), 'input_mask') input_type_ids = tf.placeholder(tf.int32, (None, params.max_seq_len), 'segment_ids') jit_scope = tf.contrib.compiler.jit.experimental_jit_scope with jit_scope(): features = {} features['input_ids'] = input_ids features['input_mask'] = input_mask features['segment_ids'] = input_type_ids model = BertMultiTask(params) hidden_feature = model.body(features, tf.estimator.ModeKeys.PREDICT) pred = model.top(features, hidden_feature, tf.estimator.ModeKeys.PREDICT) output_tensors = [pred[k] for k in pred] tvars = tf.trainable_variables() (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tmp_g = tf.get_default_graph().as_graph_def() input_node_names = ['input_ids', 'input_mask', 'segment_ids'] output_node_names = [ '%s_top/%s_predict' % (params.share_top[problem], params.share_top[problem]) for problem in params.problem_list ] transforms = [ 'remove_nodes(op=Identity)', 'fold_constants(ignore_errors=true)', 'fold_batch_norms', # 'quantize_weights', # 'quantize_nodes', 'merge_duplicate_nodes', 'strip_unused_nodes', 'sort_by_execution_order' ] with tf.Session(config=config) as sess: tf.logging.info('load parameters from checkpoint...') sess.run(tf.global_variables_initializer()) tf.logging.info('freeze...') tmp_g = tf.graph_util.convert_variables_to_constants( sess, tmp_g, [n.name[:-2] for n in output_tensors]) tmp_g = TransformGraph(tmp_g, input_node_names, output_node_names, transforms) tmp_file = os.path.join(params.ckpt_dir, 'export_model') tf.logging.info('write graph to a tmp file: %s' % tmp_file) with tf.gfile.GFile(tmp_file, 'wb') as f: f.write(tmp_g.SerializeToString()) return tmp_file