def model_fn(features, labels, mode, params): logger.info("*** Features ***") if isinstance(features, dict): features = features['words'], features['words_seq'], features[ 'text_length'] input_ids, input_word_ids, text_length_list = features is_training = (mode == tf.estimator.ModeKeys.TRAIN) tag_model = BILSTMONLY(params) # def metric_fn(label_ids, pred_ids): # return { # 'precision': precision(label_ids, pred_ids, params["num_labels"]), # 'recall': recall(label_ids, pred_ids, params["num_labels"]), # 'f1': f1(label_ids, pred_ids, params["num_labels"]) # } # # eval_metrics = metric_fn(labels, pred_ids) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: loss, pred_ids, weight = tag_model(input_ids, input_word_ids, labels, text_length_list, is_training) train_op = optimization.create_optimizer(loss, args.lr, params["decay_steps"], args.clip_norm) hook_dict = {} # precision_score, precision_update_op = precision(labels=labels, predictions=pred_ids, # num_classes=params["num_labels"], weights=weight) # # recall_score, recall_update_op = recall(labels=labels, # predictions=pred_ids, num_classes=params["num_labels"], # weights=weight) hook_dict['loss'] = loss hook_dict['global_steps'] = tf.train.get_or_create_global_step() logging_hook = tf.train.LoggingTensorHook( hook_dict, every_n_iter=args.print_log_steps) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: loss, pred_ids, weight = tag_model(input_ids, input_word_ids, labels, text_length_list, is_training) # pred_ids = tf.argmax(logits, axis=-1, output_type=tf.int32) # weight = tf.sequence_mask(text_length_list) # precision_score, precision_update_op = precision(labels=labels,predictions=pred_ids,num_classes=params["num_labels"],weights=weight) # # recall_score, recall_update_op =recall(labels=labels, # predictions=pred_ids,num_classes=params["num_labels"],weights=weight) f1_score_val, f1_update_op_val = f1( labels=labels, predictions=pred_ids, num_classes=params["num_labels"], weights=weight, average="micro") # acc_score_val,acc_score_op_val = tf.metrics.accuracy(labels=labels,predictions=pred_ids,weights=weight) eval_metric_ops = {"f1": (f1_score_val, f1_update_op_val)} eval_hook_dict = {"f1": f1_score_val, "loss": loss} eval_logging_hook = tf.train.LoggingTensorHook( eval_hook_dict, at_end=True, every_n_iter=args.print_log_steps) output_spec = tf.estimator.EstimatorSpec( eval_metric_ops=eval_metric_ops, mode=mode, loss=loss, evaluation_hooks=[eval_logging_hook]) else: pred_ids = tag_model(input_ids, input_word_ids, labels, text_length_list, is_training, True) output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions=pred_ids) return output_spec
def model_fn(features, labels, mode, params): logger.info("*** Features ***") if isinstance(features, dict): features = features['words'], features['text_length'], features[ 'query_length'], features['token_type_ids'] print(features) input_ids, text_length_list, query_length_list, token_type_id_list = features if labels is not None: start_labels, end_labels = labels else: start_labels, end_labels = None, None is_training = (mode == tf.estimator.ModeKeys.TRAIN) is_testing = (mode == tf.estimator.ModeKeys.PREDICT) bert_config = modeling.BertConfig.from_json_file(bert_config_file) tag_model = bertMRC(params, bert_config) # input_ids,labels,token_type_ids_list,query_len_list,text_length_list,is_training,is_testing=False if is_testing: pred_start_ids, pred_end_ids, weight = tag_model( input_ids, start_labels, end_labels, token_type_id_list, query_length_list, text_length_list, is_training, is_testing) else: loss, pred_start_ids, pred_end_ids, weight = tag_model( input_ids, start_labels, end_labels, token_type_id_list, query_length_list, text_length_list, is_training) # def metric_fn(label_ids, pred_ids): # return { # 'precision': precision(label_ids, pred_ids, params["num_labels"]), # 'recall': recall(label_ids, pred_ids, params["num_labels"]), # 'f1': f1(label_ids, pred_ids, params["num_labels"]) # } # # eval_metrics = metric_fn(labels, pred_ids) tvars = tf.trainable_variables() # 加载BERT模型 if init_checkpoints: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoints) tf.train.init_from_checkpoint(init_checkpoints, assignment_map) output_spec = None # f1_score_val, f1_update_op_val = f1(labels=labels, predictions=pred_ids, num_classes=params["num_labels"], # weights=weight) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(loss, args.lr, params["decay_steps"], args.clip_norm, use_tpu=False) hook_dict = {} # precision_score, precision_update_op = precision(labels=labels, predictions=pred_ids, # num_classes=params["num_labels"], weights=weight) # # recall_score, recall_update_op = recall(labels=labels, # predictions=pred_ids, num_classes=params["num_labels"], # weights=weight) hook_dict['loss'] = loss hook_dict['global_steps'] = tf.train.get_or_create_global_step() logging_hook = tf.train.LoggingTensorHook( hook_dict, every_n_iter=args.print_log_steps) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: # pred_ids = tf.argmax(logits, axis=-1, output_type=tf.int32) # weight = tf.sequence_mask(text_length_list) # precision_score, precision_update_op = precision(labels=labels,predictions=pred_ids,num_classes=params["num_labels"],weights=weight) # # recall_score, recall_update_op =recall(labels=labels, # predictions=pred_ids,num_classes=params["num_labels"],weights=weight) f1_start_val, f1_update_op_val = f1(labels=start_labels, predictions=pred_start_ids, num_classes=2, weights=weight, average="macro") f1_end_val, f1_end_update_op_val = f1(labels=end_labels, predictions=pred_end_ids, num_classes=2, weights=weight, average="macro") # f1_score_val_micro,f1_update_op_val_micro = f1(labels=labels,predictions=pred_ids,num_classes=params["num_labels"],weights=weight,average="micro") # acc_score_val,acc_score_op_val = tf.metrics.accuracy(labels=labels,predictions=pred_ids,weights=weight) # eval_loss = tf.metrics.mean_squared_error(labels=labels, predictions=pred_ids,weights=weight) eval_metric_ops = { "f1_start_micro": (f1_start_val, f1_update_op_val), "f1_end_micro": (f1_end_val, f1_end_update_op_val) } # eval_hook_dict = {"f1":f1_score_val,"loss":loss} # eval_logging_hook = tf.train.LoggingTensorHook( # at_end=True,every_n_iter=args.print_log_steps) output_spec = tf.estimator.EstimatorSpec( eval_metric_ops=eval_metric_ops, mode=mode, loss=loss) else: output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions={ "start_ids": pred_start_ids, "end_ids": pred_end_ids }) return output_spec
def model_fn(features, labels, mode, params): logger.info("*** Features ***") if isinstance(features, dict): features = features['words'], features['token_type_ids'], features[ 'text_length'] # print(features) # input_ids,token_type_ids,text_length_list = features input_ids, token_type_ids, text_length_list = features is_training = (mode == tf.estimator.ModeKeys.TRAIN) is_testing = (mode == tf.estimator.ModeKeys.PREDICT) bert_config = modeling.BertConfig.from_json_file(bert_config_file) # tag_model = bertEventType(params,bert_config) tag_model = bertEventType(params, bert_config) if is_testing: # pred_ids = tag_model(input_ids, labels, text_length_list, token_type_ids,is_training,is_testing) pred_ids = tag_model(input_ids, labels, text_length_list, token_type_ids, is_training, is_testing) else: per_example_loss, loss, pred_ids = tag_model( input_ids, labels, text_length_list, token_type_ids, is_training) tvars = tf.trainable_variables() # 加载BERT模型 if init_checkpoints: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoints) tf.train.init_from_checkpoint(init_checkpoints, assignment_map) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(loss, args.lr, params["decay_steps"], None, False) hook_dict = {} hook_dict['loss'] = loss hook_dict['global_steps'] = tf.train.get_or_create_global_step() logging_hook = tf.train.LoggingTensorHook( hook_dict, every_n_iter=args.print_log_steps) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: pred_ids = tf.where(pred_ids > 0.5, tf.ones_like(pred_ids), tf.zeros_like(pred_ids)) print(pred_ids) print(labels) f1_score_val_micro, f1_update_op_val_micro = f1( labels=labels, predictions=pred_ids, num_classes=2) eval_metrics = { "f1_score_micro": (f1_score_val_micro, f1_update_op_val_micro) } eval_metrics['eval_loss'] = tf.metrics.mean( values=per_example_loss) output_spec = tf.estimator.EstimatorSpec( eval_metric_ops=eval_metrics, mode=mode, loss=loss) else: output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions=pred_ids) return output_spec
def model_fn(features, labels, mode, params): logger.info("*** Features ***") if isinstance(features, dict): features = features['words'], features['text_length'], features[ 'query_length'], features['token_type_ids'] print(features) input_ids, text_length_list, query_length_list, token_type_id_list = features if labels is not None: start_labels, end_labels, has_answer_label = labels else: start_labels, end_labels, has_answer_label = None, None, None is_training = (mode == tf.estimator.ModeKeys.TRAIN) is_testing = (mode == tf.estimator.ModeKeys.PREDICT) bert_config = modeling.BertConfig.from_json_file(bert_config_file) tag_model = VerfiyMRC(params, bert_config) # input_ids,labels,token_type_ids_list,query_len_list,text_length_list,is_training,is_testing=False if is_testing: pred_start_ids, pred_end_ids, weight, predict_start_prob, predict_end_prob, has_answer_prob = tag_model( input_ids, start_labels, end_labels, token_type_id_list, query_length_list, text_length_list, has_answer_label, is_training, is_testing) # predict_ids,weight,predict_prob = tag_model(input_ids,labels,token_type_id_list,query_length_list,text_length_list,is_training,is_testing) else: loss, pred_start_ids, pred_end_ids, weight, predict_start_prob, predict_end_prob, has_answer_prob = tag_model( input_ids, start_labels, end_labels, token_type_id_list, query_length_list, text_length_list, has_answer_label, is_training) # loss,predict_ids,weight,predict_prob = tag_model(input_ids,labels,token_type_id_list,query_length_list,text_length_list,is_training,is_testing) # def metric_fn(label_ids, pred_ids): # return { # 'precision': precision(label_ids, pred_ids, params["num_labels"]), # 'recall': recall(label_ids, pred_ids, params["num_labels"]), # 'f1': f1(label_ids, pred_ids, params["num_labels"]) # } # # eval_metrics = metric_fn(labels, pred_ids) tvars = tf.trainable_variables() # 加载BERT模型 if init_checkpoints: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoints) tf.train.init_from_checkpoint(init_checkpoints, assignment_map) output_spec = None # f1_score_val, f1_update_op_val = f1(labels=labels, predictions=pred_ids, num_classes=params["num_labels"], # weights=weight) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( loss, args.lr, params["train_steps"], params["num_warmup_steps"], args.clip_norm) hook_dict = {} # precision_score, precision_update_op = precision(labels=labels, predictions=pred_ids, # num_classes=params["num_labels"], weights=weight) # # recall_score, recall_update_op = recall(labels=labels, # predictions=pred_ids, num_classes=params["num_labels"], # weights=weight) hook_dict['loss'] = loss hook_dict['global_steps'] = tf.train.get_or_create_global_step() logging_hook = tf.train.LoggingTensorHook( hook_dict, every_n_iter=args.print_log_steps) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: has_answer_pred = tf.where(has_answer_prob > 0.5, tf.ones_like(has_answer_prob), tf.zeros_like(has_answer_prob)) # pred_ids = tf.argmax(logits, axis=-1, output_type=tf.int32) # weight = tf.sequence_mask(text_length_list) # precision_score, precision_update_op = precision(labels=labels,predictions=pred_ids,num_classes=params["num_labels"],weights=weight) # # recall_score, recall_update_op =recall(labels=labels, # predictions=pred_ids,num_classes=params["num_labels"],weights=weight) # def metric_fn(per_example_loss, label_ids, probabilities): # logits_split = tf.split(probabilities, params["num_labels"], axis=-1) # label_ids_split = tf.split(label_ids, params["num_labels"], axis=-1) # # metrics change to auc of every class # eval_dict = {} # for j, logits in enumerate(logits_split): # label_id_ = tf.cast(label_ids_split[j], dtype=tf.int32) # current_auc, update_op_auc = tf.metrics.auc(label_id_, logits) # eval_dict[str(j)] = (current_auc, update_op_auc) # eval_dict['eval_loss'] = tf.metrics.mean(values=per_example_loss) # return eval_dict # eval_metrics = metric_fn(per_example_loss, labels, pred_ids) f1_start_val, f1_update_op_val = f1(labels=start_labels, predictions=pred_start_ids, num_classes=2, weights=weight, average="macro") f1_end_val, f1_end_update_op_val = f1(labels=end_labels, predictions=pred_end_ids, num_classes=2, weights=weight, average="macro") # f1_val,f1_update_op_val = f1(labels=labels,predictions=predict_ids,num_classes=3,weights=weight,average="macro") has_answer_label = tf.cast(has_answer_label, tf.float32) f1_has_val, f1_has_update_op_val = f1(labels=has_answer_label, predictions=has_answer_pred, num_classes=2) # f1_score_val_micro,f1_update_op_val_micro = f1(labels=labels,predictions=pred_ids,num_classes=params["num_labels"],weights=weight,average="micro") # acc_score_val,acc_score_op_val = tf.metrics.accuracy(labels=labels,predictions=pred_ids,weights=weight) # eval_loss = tf.metrics.mean_squared_error(labels=labels, predictions=pred_ids,weights=weight) eval_metric_ops = { "f1_start_macro": (f1_start_val, f1_update_op_val), "f1_end_macro": (f1_end_val, f1_end_update_op_val), "f1_has_answer_macro": (f1_has_val, f1_has_update_op_val), "eval_loss": tf.metrics.mean(values=loss) } # eval_metric_ops = { # "f1_macro":(f1_val,f1_update_op_val), # "eval_loss":tf.metrics.mean(values=loss)} # eval_hook_dict = {"f1":f1_score_val,"loss":loss} # eval_logging_hook = tf.train.LoggingTensorHook( # at_end=True,every_n_iter=args.print_log_steps) output_spec = tf.estimator.EstimatorSpec( eval_metric_ops=eval_metric_ops, mode=mode, loss=loss) else: output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions={ "start_ids": pred_start_ids, "end_ids": pred_end_ids, "start_probs": predict_start_prob, "end_probs": predict_end_prob, "has_answer_probs": has_answer_prob }) # output_spec = tf.estimator.EstimatorSpec( # mode=mode, # predictions={"pred_ids":predict_ids,"pred_probs":predict_prob} # ) return output_spec