def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] tf.logging.info('shape of input_ids', input_ids.shape) is_training = (mode == tf.estimator.ModeKeys.TRAIN) # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示 total_loss, logits, trans, pred_ids = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, False, args.dropout_rate, args.lstm_size, args.cell, args.num_layers) tvars = tf.trainable_variables() # 加载BERT模型 if init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, False) hook_dict = {} hook_dict['loss'] = total_loss hook_dict['global_steps'] = tf.train.get_or_create_global_step() logging_hook = tf.train.LoggingTensorHook( hook_dict, every_n_iter=args.save_summary_steps) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: # 针对NER ,进行了修改 def metric_fn(label_ids, pred_ids): return { "eval_loss": tf.metrics.mean_squared_error(labels=label_ids, predictions=pred_ids), } eval_metrics = metric_fn(label_ids, pred_ids) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metrics ) else: output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=pred_ids ) return output_spec
def eval_model(): return create_model(bert_config, False, input_ids, input_mask, segment_ids, label_ids, num_labels, False, args.dropout_rate, args.lstm_size, args.cell, args.num_layers, reuse=True)
with graph.as_default(): print("going to restore checkpoint") # sess.run(tf.global_variables_initializer()) input_ids_p = tf.placeholder(tf.int32, [batch_size, args.max_seq_length], name="input_ids") input_mask_p = tf.placeholder(tf.int32, [batch_size, args.max_seq_length], name="input_mask") bert_config = modeling.BertConfig.from_json_file( os.path.join(bert_dir, 'bert_config.json')) (total_loss, logits, trans, pred_ids) = create_model(bert_config=bert_config, is_training=False, input_ids=input_ids_p, input_mask=input_mask_p, segment_ids=None, labels=None, num_labels=num_labels, use_one_hot_embeddings=False, dropout_rate=1.0) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(model_dir)) tokenizer = tokenization.FullTokenizer(vocab_file=os.path.join( bert_dir, 'vocab.txt'), do_lower_case=args.do_lower_case) def predict_online(): """
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] print('shape of input_ids', input_ids.shape) # label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示 #全部损失,分数,,预测类别 total_loss, logits, trans, pred_ids = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, False, args.dropout_rate, args.lstm_size, args.cell, args.num_layers) # tf.summary.scalar('total_loss', total_loss) # tf.summary.scalar('logits',logits) # tf.summary.scalar('trans',trans) # tf.summary.scalar('pred_ids',pred_ids) #所有需要训练的变量 tvars = tf.trainable_variables() # 加载BERT模型,assignmen_map,加载的预训练变量值 if init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # 打印变量名 # logger.info("**** Trainable Variables ****") # # # 打印加载模型的参数 # for var in tvars: # init_string = "" # if var.name in initialized_variable_names: # init_string = ", *INIT_FROM_CKPT*" # logger.info(" name = %s, shape = %s%s", var.name, var.shape, # init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: #train_op = optimizer.optimizer(total_loss, learning_rate, num_train_steps) train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, False) hook_dict = {} hook_dict['loss'] = total_loss hook_dict['global_steps'] = tf.train.get_or_create_global_step() logging_hook = tf.train.LoggingTensorHook( hook_dict, every_n_iter=args.save_summary_steps) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: # 针对NER ,进行了修改 def metric_fn(label_ids, pred_ids): return { "eval_loss": tf.metrics.mean_squared_error(labels=label_ids, predictions=pred_ids), } eval_metrics = metric_fn(label_ids, pred_ids) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metrics) else: output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions=pred_ids) return output_spec