def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] next_sentence_labels = features["next_sentence_labels"] t = input_ids + input_mask + segment_ids t = t * next_sentence_labels total_loss = tf.reduce_sum(tf.cast(t[:, 0], tf.float32)) scaffold_fn = None eval_metrics = (lambda x: x, [ input_ids, ]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, scaffold_fn=scaffold_fn) return output_spec
def main(_): tf_logging.info("Train MLM with alternative embedding2") config = JsonConfig.from_json_file(FLAGS.model_config_file) train_config = TrainConfigEx.from_flags(FLAGS) is_training = FLAGS.do_train input_files = get_input_files_from_flags(FLAGS) input_fn = input_fn_builder_alt_emb2_classification( input_files, FLAGS, is_training) def model_constructor(config, is_training, input_ids, input_mask, token_type_ids, use_one_hot_embeddings, features): return EmbeddingReplacer2(config, is_training, input_ids, input_mask, token_type_ids, use_one_hot_embeddings, features) special_flags = FLAGS.special_flags.split(",") special_flags.append("feed_features") special_flags.append("ask_tvar") model_fn = model_fn_classification(config, train_config, model_constructor, special_flags) run_estimator(model_fn, input_fn)
def train_LMS(bert_hp, train_config, lms_config: LMSConfigI, save_dir, nli_data, modeling_option, init_fn): tf_logging.info("train_pairing ENTRY") train_batches, dev_batches = nli_data max_steps = train_config.max_steps num_gpu = train_config.num_gpu lms_model = LMSModel(modeling_option, bert_hp, lms_config, num_gpu) train_cls = lms_model.get_train_op(bert_hp.lr, max_steps) global_step = tf.train.get_or_create_global_step() run_name = os.path.basename(save_dir) train_writer, test_writer = setup_summary_writer(run_name) sess = init_session() sess.run(tf.global_variables_initializer()) init_fn(sess) # make explain train_op does not increase global step def fetch_global_step(): step, = sess.run([global_step]) return step train_classification = partial(train_fn_factory, sess, lms_model.loss_tensor, lms_model.per_layer_loss, train_cls, lms_model.batch2feed_dict) eval_acc = partial(eval_fn_factory, sess, dev_batches[:20], lms_model.loss_tensor, lms_model.per_layer_loss, lms_model.ex_score_tensor, lms_model.per_layer_logit_tensor, global_step, lms_model.batch2feed_dict, test_writer) save_fn = partial(save_fn_factory, sess, save_dir, global_step) init_step, = sess.run([global_step]) def train_fn(batch, step_i): loss_val, acc = train_classification(batch, step_i) summary = tf.Summary() summary.value.add(tag='loss', simple_value=loss_val) train_writer.add_summary(summary, fetch_global_step()) train_writer.flush() return loss_val, acc def valid_fn(): eval_acc() tf_logging.info("Initialize step to {}".format(init_step)) tf_logging.info("{} train batches".format(len(train_batches))) valid_freq = 100 save_interval = 300 loss, _ = step_runner(train_batches, train_fn, init_step, valid_fn, valid_freq, save_fn, save_interval, max_steps) return save_fn()
def run_estimator(model_fn, input_fn, host_call=None): tf_logging.setLevel(logging.INFO) if FLAGS.log_debug: tf_logging.setLevel(logging.DEBUG) #FLAGS.init_checkpoint = auto_resolve_init_checkpoint(FLAGS.init_checkpoint) tf.io.gfile.makedirs(FLAGS.output_dir) if FLAGS.do_predict: tf_logging.addFilter(CounterFilter()) tpu_cluster_resolver = None config = tf.compat.v1.ConfigProto(allow_soft_placement=False, ) is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.compat.v1.estimator.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, keep_checkpoint_max=FLAGS.keep_checkpoint_max, session_config=config, tf_random_seed=FLAGS.random_seed, ) if FLAGS.random_seed is not None: tf_logging.info("Using random seed : {}".format(FLAGS.random_seed)) tf.random.set_seed(FLAGS.random_seed) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.compat.v1.estimator.Estimator(model_fn=model_fn, config=run_config, params={'batch_size': 16}) if FLAGS.do_train: tf_logging.info("***** Running training *****") tf_logging.info(" Batch size = %d", FLAGS.train_batch_size) estimator.train(input_fn=input_fn, max_steps=FLAGS.num_train_steps)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] next_sentence_labels = features["next_sentence_labels"] initializer = tf.compat.v1.truncated_normal_initializer(stddev=0.02) vocab_size = 40000 embedding_size = 512 embedding_table = tf.compat.v1.get_variable( name="embedding", shape=[vocab_size, embedding_size], initializer=initializer) input_shape = get_shape_list(input_ids) flat_input_ids = tf.reshape(input_ids, [-1]) one_hot_input_ids = tf.one_hot(flat_input_ids, depth=vocab_size) output = tf.matmul(one_hot_input_ids, embedding_table) output = tf.reshape(output, input_shape + [embedding_size]) t_list = [] n_of_t = 10 for j in range(n_of_t): t_list.append(output + j) dense = tf.keras.layers.Dense(embedding_size, kernel_initializer=initializer, name="MDense") if modeling == "B": dense_list = [] for j in range(n_of_t): dense_list.append( tf.keras.layers.Dense(embedding_size, kernel_initializer=initializer, name="MDense_{}".format(j))) for i in range(20): if modeling == "A": t = tf.stack(t_list, 0) with tf.compat.v1.variable_scope("scope_A", reuse=i > 0): t = dense(t) t = tf.nn.dropout(t, rate=0.5) t_0 = 0 for j in range(1, n_of_t): t_0 += t[j] new_t_list = [t_0] for j in range(1, n_of_t): new_t_list.append(t[j]) t_list = new_t_list else: with tf.compat.v1.variable_scope("scope_B", reuse=i > 0): temp_t = [] for j in range(n_of_t): t = dense_list[j](t_list[j]) t = tf.nn.dropout(t, rate=0.5) temp_t.append(t) t_0 = 0 for j in range(1, n_of_t): t_0 += temp_t[j] new_t_list = [t_0] for j in range(1, n_of_t): new_t_list.append(temp_t[j]) t_list = new_t_list t = t_list[0] total_loss = tf.reduce_mean(t) for t in tf.compat.v1.trainable_variables(): print(t) train_op = create_optimizer(total_loss, 1e-4, 1000, 1000, True) scaffold_fn = None output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, train_op=train_op, loss=total_loss, training_hooks=[OomReportingHook()], scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] if mode == tf.estimator.ModeKeys.PREDICT: label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32) else: label_ids = features["label_ids"] label_ids = tf.reshape(label_ids, [-1]) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) domain_ids = features["domain_ids"] domain_ids = tf.reshape(domain_ids, [-1]) is_valid_label = features["is_valid_label"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model_1 = BertModel( config=model_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled = model_1.get_pooled_output() if is_training: pooled = dropout(pooled, 0.1) logits = tf.keras.layers.Dense(train_config.num_classes, name="cls_dense")(pooled) pred_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=label_ids) num_domain = 2 pooled_for_domain = grad_reverse(pooled) domain_logits = tf.keras.layers.Dense( num_domain, name="domain_dense")(pooled_for_domain) domain_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=domain_logits, labels=domain_ids) pred_loss = tf.reduce_mean(pred_losses * tf.cast(is_valid_label, tf.float32)) domain_loss = tf.reduce_mean(domain_losses) tf.compat.v1.summary.scalar('domain_loss', domain_loss) tf.compat.v1.summary.scalar('pred_loss', pred_loss) alpha = model_config.alpha loss = pred_loss + alpha * domain_loss tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn( train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: tvars = None train_op = optimization.create_optimizer_from_config( loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (classification_metric_fn, [logits, label_ids, is_real_example]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "logits": logits, } if "data_id" in features: predictions['data_id'] = features['data_id'] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" log_features(features) def reform_a_input(raw_input): return tf.reshape(raw_input, [dict_run_config.inner_batch_size, -1]) def reform_b_input(raw_input): return tf.reshape(raw_input, [dict_run_config.def_per_batch, -1]) input_ids = reform_a_input(features["input_ids"]) input_mask = reform_a_input(features["input_mask"]) segment_ids = reform_a_input(features["segment_ids"]) tf_logging.info("input_ids, input_mask") # input_ids = features["input_ids"] # input_mask = features["input_mask"] # segment_ids = features["segment_ids"] if mode == tf.estimator.ModeKeys.PREDICT: tf.random.set_seed(0) seed = 0 else: seed = None # tf_logging.info("Doing dynamic masking (random)") # masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \ # = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed) # if dict_run_config.prediction_op == "loss_fixed_mask" or train_config.fixed_mask: masked_input_ids = input_ids masked_lm_positions = reform_a_input(features["masked_lm_positions"]) masked_lm_ids = reform_a_input(features["masked_lm_ids"]) masked_lm_weights = reform_a_input(features["masked_lm_weights"]) is_training = (mode == tf.estimator.ModeKeys.TRAIN) if model_name == "APR": model = APR( masked_input_ids, input_mask, segment_ids, is_training, train_config.use_one_hot_embeddings, bert_config, ssdr_config, dict_run_config.def_per_batch, dict_run_config.inner_batch_size, dict_run_config.max_def_length, ) elif model_name == "BERT": model = BertModel( config=bert_config, is_training=is_training, input_ids=masked_input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) else: assert False masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \ = get_masked_lm_output(bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) loss = masked_lm_loss tvars = tf.compat.v1.trainable_variables() assignment_fn = dict_model_fn.get_bert_assignment_map_for_dict initialized_variable_names, init_fn = align_checkpoint_twice( tvars, train_config.init_checkpoint, assignment_fn) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec if mode == tf.estimator.ModeKeys.TRAIN: if ssdr_config.compare_attrib_value_safe("use_two_lr", True): tf_logging.info("Using two lr for each parts") train_op = create_optimizer_with_separate_lr( loss, train_config) else: tf_logging.info("Using single lr ") train_op = optimization.create_optimizer_from_config( loss, train_config) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, training_hooks=[OomReportingHook()], scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn_lm, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, ]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "masked_input_ids": masked_input_ids, "masked_lm_ids": masked_lm_ids, "masked_lm_example_loss": masked_lm_example_loss, "masked_lm_positions": masked_lm_positions, } output_spec = TPUEstimatorSpec(mode=mode, loss=loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument tf_logging.info("model_fn_apr_lm") """The `model_fn` for TPUEstimator.""" log_features(features) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] if mode == tf.estimator.ModeKeys.PREDICT: tf.random.set_seed(0) seed = 0 else: seed = None tf_logging.info("Doing dynamic masking (random)") masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \ = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed) is_training = (mode == tf.estimator.ModeKeys.TRAIN) tf_logging.info("Using masked_input_ids") model = APR( masked_input_ids, input_mask, segment_ids, is_training, train_config.use_one_hot_embeddings, bert_config, ssdr_config, dict_run_config.def_per_batch, dict_run_config.inner_batch_size, dict_run_config.max_def_length, # MainTransformer, # SecondTransformerEmbeddingLess, ) masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \ = get_masked_lm_output(bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) loss = masked_lm_loss tvars = tf.compat.v1.trainable_variables() assignment_fn = dict_model_fn.get_bert_assignment_map_for_dict initialized_variable_names, init_fn = align_checkpoint_twice( tvars, train_config.init_checkpoint, assignment_fn) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec if mode == tf.estimator.ModeKeys.TRAIN: if ssdr_config.compare_attrib_value_safe("use_two_lr", True): tf_logging.info("Using two lr for each parts") train_op = create_optimizer_with_separate_lr( loss, train_config) else: tf_logging.info("Using single lr ") train_op = optimization.create_optimizer_from_config( loss, train_config) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, training_hooks=[OomReportingHook()], scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn_lm, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, ]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "masked_input_ids": masked_input_ids, "masked_lm_ids": masked_lm_ids, "masked_lm_example_loss": masked_lm_example_loss, "masked_lm_positions": masked_lm_positions, } output_spec = TPUEstimatorSpec(mode=mode, loss=loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) q_input_ids_1 = features["q_input_ids_1"] q_input_mask_1 = features["q_input_mask_1"] d_input_ids_1 = features["d_input_ids_1"] d_input_mask_1 = features["d_input_mask_1"] q_input_ids_2 = features["q_input_ids_2"] q_input_mask_2 = features["q_input_mask_2"] d_input_ids_2 = features["d_input_ids_2"] d_input_mask_2 = features["d_input_mask_2"] q_input_ids = tf.stack([q_input_ids_1, q_input_ids_2], axis=0) q_input_mask = tf.stack([q_input_mask_1, q_input_mask_2], axis=0) q_segment_ids = tf.zeros_like(q_input_ids, tf.int32) d_input_ids = tf.stack([d_input_ids_1, d_input_ids_2], axis=0) d_input_mask = tf.stack([d_input_mask_1, d_input_mask_2], axis=0) d_segment_ids = tf.zeros_like(d_input_ids, tf.int32) label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) with tf.compat.v1.variable_scope("query"): model_q = model_class( config=model_config, is_training=is_training, input_ids=q_input_ids, input_mask=q_input_mask, token_type_ids=q_segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) with tf.compat.v1.variable_scope("document"): model_d = model_class( config=model_config, is_training=is_training, input_ids=d_input_ids, input_mask=d_input_mask, token_type_ids=d_segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled_q = model_q.get_pooled_output() pooled_d = model_d.get_pooled_output() logits = tf.matmul(pooled_q, pooled_d, transpose_b=True) y = tf.cast(label_ids, tf.float32) * 2 - 1 losses = tf.maximum(1.0 - logits * y, 0) loss = tf.reduce_mean(losses) pred = tf.cast(logits > 0, tf.int32) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn( train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: if "simple_optimizer" in special_flags: tf_logging.info("using simple optimizer") train_op = create_simple_optimizer(loss, train_config.learning_rate, train_config.use_tpu) else: train_op = optimization.create_optimizer_from_config( loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (classification_metric_fn, [pred, label_ids, is_real_example]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "q_input_ids": q_input_ids, "d_input_ids": d_input_ids, "score": logits } useful_inputs = ["data_id", "input_ids2", "data_ids"] for input_name in useful_inputs: if input_name in features: predictions[input_name] = features[input_name] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] if mode == tf.estimator.ModeKeys.PREDICT: label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32) else: label_ids = features["label_ids"] label_ids = tf.reshape(label_ids, [-1]) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) input_ids2 = features["input_ids2"] input_mask2 = features["input_mask2"] segment_ids2 = features["segment_ids2"] with tf.compat.v1.variable_scope(dual_model_prefix1): model_1 = BertModel( config=model_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled = model_1.get_pooled_output() if is_training: pooled = dropout(pooled, 0.1) logits = tf.keras.layers.Dense(train_config.num_classes, name="cls_dense")(pooled) with tf.compat.v1.variable_scope(dual_model_prefix2): model_2 = BertModel( config=model_config, is_training=is_training, input_ids=input_ids2, input_mask=input_mask2, token_type_ids=segment_ids2, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled = model_2.get_pooled_output() if is_training: pooled = dropout(pooled, 0.1) conf_probs = tf.keras.layers.Dense( train_config.num_classes, name="cls_dense", activation=tf.keras.activations.softmax)(pooled) confidence = conf_probs[:, 1] confidence_loss = 1 - confidence cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=label_ids) k = model_config.k alpha = model_config.alpha loss_arr = cls_loss * confidence + confidence_loss * k loss_arr = apply_weighted_loss(loss_arr, label_ids, alpha) loss = tf.reduce_mean(input_tensor=loss_arr) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn( train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec def metric_fn(log_probs, label, is_real_example, confidence): r = classification_metric_fn(log_probs, label, is_real_example) r['confidence'] = tf.compat.v1.metrics.mean(confidence) return r output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: tvars = None train_op = optimization.create_optimizer_from_config( loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn, [logits, label_ids, is_real_example, confidence]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "logits": logits, "confidence": confidence, } if "data_id" in features: predictions['data_id'] = features['data_id'] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info("name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] label_masks = features["label_masks"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) logits = tf.keras.layers.Dense(train_config.num_classes, name="token_regression")( model.get_sequence_output()) per_ex_losses = tf.keras.losses.MAE(tf.expand_dims(label_ids, 2), logits) masked_losses = per_ex_losses * tf.cast(label_masks, tf.float32) losses_sum = tf.reduce_sum(masked_losses, axis=1) denom = tf.reduce_sum(tf.cast(label_masks, tf.float32), axis=1) + 1e-5 losses = losses_sum / denom total_loss = tf.reduce_mean(losses) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: assignment_map, initialized_variable_names = get_bert_assignment_map( tvars, train_config.init_checkpoint) if train_config.use_tpu: def tpu_scaffold(): tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, assignment_map) return tf.compat.v1.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, assignment_map) tf_logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf_logging.info("name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config( total_loss, train_config) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(logits, label_ids, label_masks): logits_reduced = tf.squeeze(logits, 2) is_neg_correct = tf.logical_and(tf.less(label_ids, 0.), tf.less(logits_reduced, 0.)) is_pos_correct = tf.logical_and(tf.less(0., label_ids), tf.less(0., logits_reduced)) is_correct = tf.logical_or(is_neg_correct, is_pos_correct) float_masks = tf.cast(label_masks, tf.float32) num_correct = tf.reduce_sum(tf.cast(is_correct, tf.float32) * float_masks, axis=1) num_problems = tf.reduce_sum(float_masks, axis=1) + 1e-5 acc_list = num_correct / num_problems mean_acc = tf.compat.v1.metrics.mean(values=acc_list) return {'mean_acc': mean_acc} eval_metrics = (metric_fn, [logits, label_ids, label_masks]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "logits": logits, "input_ids": input_ids, "labels": label_ids, "label_masks": label_masks, } output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) label_ids = features["label_ids"] label_ids = tf.reshape(label_ids, [-1]) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=model_config, is_training=is_training, use_one_hot_embeddings=train_config.use_one_hot_embeddings, features=features, ) probs = model.get_prob() print(probs) prob0 = 1-probs print(prob0) prob2d = tf.stack([prob0, probs], 1) print(prob2d) epsilon = 1e-6 probs_ad = tf.clip_by_value(prob2d, epsilon, 1.0 - epsilon) logits = tf.math.log(probs_ad) loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=label_ids) loss = tf.reduce_mean(input_tensor=loss_arr) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn(train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: tvars = None train_op = optimization.create_optimizer_from_config(loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (classification_metric_fn, [ prob2d, label_ids, is_real_example ]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "label_ids": label_ids, "logits": logits, } if "data_id" in features: predictions['data_id'] = features['data_id'] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) label_ids = features["label_ids"] label_ids = tf.reshape(label_ids, [-1]) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=model_config, is_training=is_training, use_one_hot_embeddings=train_config.use_one_hot_embeddings, features=features, ) logits = model.get_logits() loss = model.get_loss(label_ids) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn( train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: tvars = None train_op = optimization.create_optimizer_from_config( loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (classification_metric_fn, [logits, label_ids, is_real_example]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "label_ids": label_ids, "logits": logits, "is_first_window": model.is_first_window, "num_content_tokens": model.num_content_tokens, "has_enough_evidence": model.has_enough_evidence, "is_valid_window": model.is_valid_window, "is_valid_window_mask": model.is_valid_window_mask } if "data_id" in features: predictions['data_id'] = features['data_id'] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info("name = %s, shape = %s" % (name, features[name].shape)) input_ids, input_mask, segment_ids = combine_paired_input_features( features) strict_good = features["strict_good"] strict_bad = features["strict_bad"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled = model.get_pooled_output() losses, logits, pair_logits = pairwise_model(pooled, strict_good, strict_bad) total_loss = tf.reduce_mean(losses) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: assignment_map, initialized_variable_names = get_bert_assignment_map( tvars, train_config.init_checkpoint) if train_config.use_tpu: def tpu_scaffold(): tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, assignment_map) return tf.compat.v1.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, assignment_map) tf_logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf_logging.info("name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config( total_loss, train_config) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(pair_logits, strict_good, strict_bad): diff = pair_logits[:, 0] - pair_logits[:, 1] pairwise_correct = tf.less(0.0, diff) strict_good_correct_raw = tf.reshape( tf.less(1.0, pair_logits[:, 0]), [-1, 1]) strict_good_correct = cast_float_multiply( strict_good_correct_raw, strict_good) strict_bad_correct_raw = tf.reshape( tf.less(pair_logits[:, 1], -1.0), [-1, 1]) strict_bad_correct = cast_float_multiply( strict_bad_correct_raw, strict_bad) pairwise_acc_raw = tf.cast(pairwise_correct, tf.float32) mean_acc = tf.compat.v1.metrics.mean(values=pairwise_acc_raw) def strict_accuracy(correctness, gold): return tf.compat.v1.metrics.accuracy( labels=tf.ones_like(gold, tf.int32), predictions=tf.cast(correctness, tf.int32), weights=tf.cast(gold, tf.float32)) return { 'mean_acc': mean_acc, 'strict_good_acc': strict_accuracy(strict_good_correct, strict_good), 'strict_bad_acc': strict_accuracy(strict_bad_correct, strict_bad) } eval_metrics = (metric_fn, [pair_logits, strict_good, strict_bad]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "logits": logits, "input_ids": input_ids, "strict_good": strict_good, "strict_bad": strict_bad, } output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def __init__(self, modeling_option, bert_hp, lms_config, num_gpu): ex_modeling_class = { 'ce': CrossEntropyModeling, 'co': CorrelationModeling }[modeling_option] def build_model_fn(): return build_model(ex_modeling_class, bert_hp, lms_config) self.num_gpu = num_gpu self.match_predictor = None self.match_predictor_list = None self.bert_hp = bert_hp if num_gpu == 1: tf_logging.info("Using single GPU") task_model_, ex_model_, match_predictor_3way = build_model_fn() loss_tensor = match_predictor_3way.loss # List[Tensor[num_layer]] per_layer_loss_list = match_predictor_3way.all_losses batch2feed_dict = task_model_.batch2feed_dict logits = task_model_.logits ex_score_tensor = ex_model_.get_scores() # List[List[Tensor[batch, 2]]] per_layer_logit_list = match_predictor_3way.per_layer_logits_list self.match_predictor = match_predictor_3way else: main_models, ex_models, match_predictor_list = zip( *get_multiple_models(build_model_fn, num_gpu)) loss_tensor = get_avg_loss(match_predictor_list) per_layer_loss_list = [ get_avg_tensors_from_models( match_predictor_list, lambda match_predictor: match_predictor.per_layer_loss_list[i]) for i in range(lms_config.num_tags) ] batch2feed_dict = get_batch2feed_dict_for_multi_gpu(main_models) logits = get_concat_tensors_from_models(main_models, lambda model: model.logits) def get_loss_tensor(model): t = tf.expand_dims(tf.stack(model.get_losses()), 0) return t ex_score_tensor = get_concat_tensors_from_models( ex_models, lambda model: model.get_scores()) per_layer_logit_list = [ get_concat_tensors_list_from_models( match_predictor_list, lambda model: model.per_layer_logit_list[i]) for i in range(lms_config.num_tags) ] self.match_predictor_list = match_predictor_list self.logits = logits self.batch2feed_dict = batch2feed_dict self.ex_score_tensor = ex_score_tensor self.loss_tensor = loss_tensor # List[List[Tensor[batch_size, 2]]] self.per_layer_logit_list = per_layer_logit_list # List[List[Tensor[1]]] self.per_layer_loss_list = per_layer_loss_list
def init_from_nli(sess): tf_logging.info( "Initializing model with nli(main) and bert(dict reader)") init_dict_model_with_nli_and_bert(sess, nli_checkpoint_path, get_bert_full_path())
def load_last_saved_model(self, model_path): last_saved = get_latest_model_path_from_dir_path(model_path) load_model(self.sess, last_saved) tf_logging.info("Loading previous model from {}".format(last_saved))
def eval_fn_factory(sess, dev_batches, loss_tensor, per_layer_loss, ex_scores_tensor, per_layer_logits_tensor, global_step_tensor, batch2feed_dict, test_writer): loss_list = [] all_ex_scores: List[np.array] = [] all_per_layer_logits: List[List[np.array]] = [] input_mask_list = [] per_layer_loss_list = [] segment_ids_list = [] label_list = [] for batch in dev_batches: input_ids, input_mask, segment_ids, label = batch input_mask_list.append(input_mask) segment_ids_list.append(segment_ids) label_list.append(label) loss_val, per_layer_loss_val, ex_scores, per_layer_logits, g_step_val \ = sess.run([loss_tensor, per_layer_loss, ex_scores_tensor, per_layer_logits_tensor, global_step_tensor], feed_dict=batch2feed_dict(batch) ) loss_list.append(loss_val) all_ex_scores.append(ex_scores) per_layer_loss_list.append(per_layer_loss_val) all_per_layer_logits.append(per_layer_logits) all_segment_ids = np.concatenate(segment_ids_list, axis=0) all_input_mask = np.concatenate(input_mask_list, axis=0) all_ex_scores_np = np.concatenate(all_ex_scores, axis=0) all_per_layer_loss = np.stack(per_layer_loss_list) all_label = np.concatenate(label_list, axis=0) num_layer = len(per_layer_logits_tensor) logits_grouped_by_layer = [] for layer_no in range(num_layer): t = np.concatenate([batch[layer_no] for batch in all_per_layer_logits], axis=0) logits_grouped_by_layer.append(t) avg_loss = np.average(loss_list) summary = tf.Summary() tf_logging.info("Step dev step={0} loss={1:.04f}".format( g_step_val, avg_loss)) summary.value.add(tag='loss', simple_value=avg_loss) gold_ex_binary = all_ex_scores_np > 0.5 for layer_no, logits in enumerate(logits_grouped_by_layer): pred_binary = np.less(logits[:, :, 0], logits[:, :, 1]) # score_per_data_point = [] score_list_d = defaultdict(list) for data_point_idx in range(len(gold_ex_binary)): # per_data_point = get_acc_prec_recall(pred_binary[data_point_idx], gold_ex_binary[data_point_idx]) # score_per_data_point.append(per_data_point) padding_start = find_padding(all_input_mask[data_point_idx]) seg2_start = find_seg2(all_segment_ids[data_point_idx]) assert 1 < seg2_start < padding_start for st, ed, name in [(0, len(pred_binary), "all"), (0, padding_start, "non-padding"), (1, seg2_start, "seg1"), (seg2_start, padding_start, "seg2")]: conditioned_score = get_acc_prec_recall( pred_binary[data_point_idx][st:ed], gold_ex_binary[data_point_idx][st:ed]) score_list_d[name].append(conditioned_score) if all_label[data_point_idx] == 1: st = seg2_start ed = padding_start conditioned_score = get_acc_prec_recall( pred_binary[data_point_idx][st:ed], gold_ex_binary[data_point_idx][st:ed]) score_list_d["neutral"].append(conditioned_score) for condition_name in score_list_d: score_list = score_list_d[condition_name] scores = {} for metric in ["accuracy", "precision", "recall", "f1"]: # with tf.name_scope(metric): scores[metric] = average([d[metric] for d in score_list]) tag_name = '{}-{}/Layer{}'.format(condition_name, metric, layer_no) summary.value.add(tag=tag_name, simple_value=scores[metric]) if condition_name == "all": tf_logging.info( "Layer {0} acc={1:.02f}, prec={2:.02f} recall={3:.02f} f1={4:.02f}" .format(layer_no, scores['accuracy'], scores['precision'], scores['recall'], scores['f1'])) layer_loss = np.mean(all_per_layer_loss[:, layer_no]) summary.value.add(tag="loss/Layer{}".format(layer_no), simple_value=layer_loss) test_writer.add_summary(summary, g_step_val) test_writer.flush() return avg_loss
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] if "next_sentence_labels" in features: next_sentence_labels = features["next_sentence_labels"] else: next_sentence_labels = get_dummy_next_sentence_labels(input_ids) if mode == tf.estimator.ModeKeys.PREDICT: tf.random.set_seed(0) seed = 0 print("Seed as zero") else: seed = None tf_logging.info("Doing dynamic masking (random)") special_tokens = [LABEL_UNK, LABEL_0, LABEL_1, LABEL_2] masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \ = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed, special_tokens) masked_input_ids, masked_lm_positions_label, masked_label_ids_label, is_test_inst \ = get_label_indices(masked_input_ids) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=model_config, is_training=is_training, input_ids=masked_input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output_fn( model_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) with tf.compat.v1.variable_scope("label_token"): (masked_lm_loss_label, masked_lm_example_loss_label, masked_lm_log_probs_label) = get_masked_lm_output_fn( model_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions_label, masked_label_ids_label, is_test_inst) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( model_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss + masked_lm_loss_label * model_config.ratio tvars = tf.compat.v1.trainable_variables() initialized_variable_names, initialized_variable_names2, init_fn\ = align_checkpoint_for_lm(tvars, train_config.checkpoint_type, train_config.init_checkpoint, train_config.second_init_checkpoint, ) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names, initialized_variable_names2) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config( total_loss, train_config) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[OomReportingHook()], scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, masked_lm_example_loss_label, masked_lm_log_probs_label, masked_label_ids_label, is_test_inst ]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "masked_input_ids": masked_input_ids, "masked_lm_ids": masked_lm_ids, "masked_lm_example_loss": masked_lm_example_loss, "masked_lm_positions": masked_lm_positions, "masked_lm_example_loss_label": masked_lm_example_loss_label, "masked_lm_log_probs_label": masked_lm_log_probs_label, "masked_label_ids_label": masked_label_ids_label, "is_test_inst": is_test_inst, } output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] if "next_sentence_labels" in features: next_sentence_labels = features["next_sentence_labels"] else: next_sentence_labels = get_dummy_next_sentence_labels(input_ids) tlm_prefix = "target_task" with tf.compat.v1.variable_scope(tlm_prefix): priority_score = tf.stop_gradient(priority_model(features)) priority_score = priority_score * target_model_config.amp masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights\ = biased_masking(input_ids, input_mask, priority_score, target_model_config.alpha, train_config.max_predictions_per_seq, MASK_ID) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=bert_config, is_training=is_training, input_ids=masked_input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss + next_sentence_loss all_vars = tf.compat.v1.all_variables() tf_logging.info("We assume priority model is from v2") if train_config.checkpoint_type == "v2": assignment_map, initialized_variable_names = assignment_map_v2_to_v2( all_vars, train_config.init_checkpoint) assignment_map2, initialized_variable_names2 = get_assignment_map_remap_from_v2( all_vars, tlm_prefix, train_config.second_init_checkpoint) else: assignment_map, assignment_map2, initialized_variable_names \ = get_tlm_assignment_map_v2(all_vars, tlm_prefix, train_config.init_checkpoint, train_config.second_init_checkpoint) initialized_variable_names2 = None def init_fn(): if train_config.init_checkpoint: tf.compat.v1.train.init_from_checkpoint( train_config.init_checkpoint, assignment_map) if train_config.second_init_checkpoint: tf.compat.v1.train.init_from_checkpoint( train_config.second_init_checkpoint, assignment_map2) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) tvars = [v for v in all_vars if not v.name.startswith(tlm_prefix)] log_var_assignments(tvars, initialized_variable_names, initialized_variable_names2) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config( total_loss, train_config, tvars) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels ]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "masked_input_ids": masked_input_ids, "priority_score": priority_score, "lm_loss1": features["loss1"], "lm_loss2": features["loss2"], } output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) q_input_ids = features["q_input_ids"] q_input_mask = features["q_input_mask"] d_input_ids = features["d_input_ids"] d_input_mask = features["d_input_mask"] input_shape = get_shape_list(q_input_ids, expected_rank=2) batch_size = input_shape[0] doc_length = model_config.max_doc_length num_docs = model_config.num_docs d_input_ids_unpacked = tf.reshape(d_input_ids, [-1, num_docs, doc_length]) d_input_mask_unpacked = tf.reshape(d_input_mask, [-1, num_docs, doc_length]) d_input_ids_flat = tf.reshape(d_input_ids_unpacked, [-1, doc_length]) d_input_mask_flat = tf.reshape(d_input_mask_unpacked, [-1, doc_length]) q_segment_ids = tf.zeros_like(q_input_ids, tf.int32) d_segment_ids = tf.zeros_like(d_input_ids_flat, tf.int32) label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) with tf.compat.v1.variable_scope(dual_model_prefix1): q_model_config = copy.deepcopy(model_config) q_model_config.max_seq_length = model_config.max_sent_length model_q = model_class( config=model_config, is_training=is_training, input_ids=q_input_ids, input_mask=q_input_mask, token_type_ids=q_segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) with tf.compat.v1.variable_scope(dual_model_prefix2): d_model_config = copy.deepcopy(model_config) d_model_config.max_seq_length = model_config.max_doc_length model_d = model_class( config=model_config, is_training=is_training, input_ids=d_input_ids_flat, input_mask=d_input_mask_flat, token_type_ids=d_segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled_q = model_q.get_pooled_output() # [batch, vector_size] pooled_d_flat = model_d.get_pooled_output( ) # [batch, num_window, vector_size] pooled_d = tf.reshape(pooled_d_flat, [batch_size, num_docs, -1]) pooled_q_t = tf.expand_dims(pooled_q, 1) pooled_d_t = tf.transpose(pooled_d, [0, 2, 1]) all_logits = tf.matmul(pooled_q_t, pooled_d_t) # [batch, 1, num_window] if "hinge_all" in special_flags: apply_loss_modeing = hinge_all elif "sigmoid_all" in special_flags: apply_loss_modeing = sigmoid_all else: apply_loss_modeing = hinge_max logits, loss = apply_loss_modeing(all_logits, label_ids) pred = tf.cast(logits > 0, tf.int32) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn( train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: if "simple_optimizer" in special_flags: tf_logging.info("using simple optimizer") train_op = create_simple_optimizer(loss, train_config.learning_rate, train_config.use_tpu) else: train_op = optimization.create_optimizer_from_config( loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (classification_metric_fn, [pred, label_ids, is_real_example]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "q_input_ids": q_input_ids, "d_input_ids": d_input_ids, "logits": logits } useful_inputs = ["data_id", "input_ids2", "data_ids"] for input_name in useful_inputs: if input_name in features: predictions[input_name] = features[input_name] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def run_estimator(model_fn, input_fn, host_call=None): tf_logging.setLevel(logging.INFO) if FLAGS.log_debug: tf_logging.setLevel(logging.DEBUG) #FLAGS.init_checkpoint = auto_resolve_init_checkpoint(FLAGS.init_checkpoint) tf.io.gfile.makedirs(FLAGS.output_dir) if FLAGS.do_predict: tf_logging.addFilter(CounterFilter()) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) print("FLAGS.save_checkpoints_steps", FLAGS.save_checkpoints_steps) config = tf.compat.v1.ConfigProto(allow_soft_placement=False, ) is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.compat.v1.estimator.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, keep_checkpoint_max=FLAGS.keep_checkpoint_max, session_config=config, tf_random_seed=FLAGS.random_seed, tpu_config=tf.compat.v1.estimator.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) if FLAGS.random_seed is not None: tf_logging.info("Using random seed : {}".format(FLAGS.random_seed)) tf.random.set_seed(FLAGS.random_seed) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.compat.v1.estimator.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.eval_batch_size, ) if FLAGS.do_train: tf_logging.info("***** Running training *****") tf_logging.info(" Batch size = %d", FLAGS.train_batch_size) estimator.train(input_fn=input_fn, max_steps=FLAGS.num_train_steps) if FLAGS.do_eval: tf_logging.info("***** Running evaluation *****") tf_logging.info(" Batch size = %d", FLAGS.eval_batch_size) if FLAGS.initialize_to_predict: checkpoint = FLAGS.init_checkpoint else: checkpoint = None result = estimator.evaluate(input_fn=input_fn, steps=FLAGS.max_eval_steps, checkpoint_path=checkpoint) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.io.gfile.GFile(output_eval_file, "w") as writer: tf_logging.info("***** Eval results *****") for key in sorted(result.keys()): tf_logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) return result if FLAGS.do_predict: tf_logging.info("***** Running prediction *****") tf_logging.info(" Batch size = %d", FLAGS.eval_batch_size) if not FLAGS.initialize_to_predict: verify_checkpoint(estimator.model_dir) checkpoint = None time.sleep(1) else: checkpoint = FLAGS.init_checkpoint result = estimator.predict(input_fn=input_fn, checkpoint_path=checkpoint, yield_single_examples=False) pickle.dump(list(result), open(FLAGS.out_file, "wb")) tf_logging.info("Prediction saved at {}".format(FLAGS.out_file))
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] instance_id = features["instance_id"] next_sentence_labels = get_dummy_next_sentence_labels(input_ids) tf_logging.info("Doing dynamic masking (random)") masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \ = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = model_class( config=model_config, is_training=is_training, input_ids=masked_input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( model_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) total_loss = masked_lm_loss tvars = tf.compat.v1.trainable_variables() use_multiple_checkpoint = is_multiple_checkpoint( train_config.checkpoint_type) initialized_variable_names, initialized_variable_names2, init_fn\ = align_checkpoint_for_lm(tvars, train_config.checkpoint_type, train_config.init_checkpoint, train_config.second_init_checkpoint, use_multiple_checkpoint) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names, initialized_variable_names2) output_spec = None if mode == tf.estimator.ModeKeys.PREDICT: predictions = { "input_ids": input_ids, "masked_lm_example_loss": masked_lm_example_loss, "instance_id": instance_id } output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def log_features(features): tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape))
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" log_features(features) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] # # # d_input_ids, d_input_mask, d_segment_ids, d_location_ids, ab_mapping, ab_mapping_mask \ # = get_dummy_apr_input(input_ids, input_mask, # dict_run_config.def_per_batch, # dict_run_config.inner_batch_size, # ssdr_config.max_loc_length, # dict_run_config.max_def_length) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = APR(input_ids, input_mask, segment_ids, is_training, train_config.use_one_hot_embeddings, bert_config, ssdr_config, dict_run_config.def_per_batch, dict_run_config.inner_batch_size, dict_run_config.max_def_length) # # model = model_class( # config=bert_config, # ssdr_config=ssdr_config, # is_training=is_training, # input_ids=input_ids, # input_mask=input_mask, # token_type_ids=segment_ids, # d_input_ids=d_input_ids, # d_input_mask=d_input_mask, # d_segment_ids=d_segment_ids, # d_location_ids=d_location_ids, # ab_mapping=ab_mapping, # ab_mapping_mask=ab_mapping_mask, # use_one_hot_embeddings=train_config.use_one_hot_embeddings, # ) task = Classification(3, features, model.get_pooled_output(), is_training) loss = task.loss tvars = tf.compat.v1.trainable_variables() assignment_fn = tlm.training.assignment_map.get_assignment_map_as_is initialized_variable_names, init_fn = get_init_fn( tvars, train_config.init_checkpoint, assignment_fn) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) output_spec = None TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec if mode == tf.estimator.ModeKeys.TRAIN: if ssdr_config.compare_attrib_value_safe("use_two_lr", True): tf_logging.info("Using two lr for each parts") train_op = create_optimizer_with_separate_lr( loss, train_config) else: tf_logging.info("Using single lr ") train_op = optimization.create_optimizer_from_config( loss, train_config) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: output_spec = TPUEstimatorSpec(mode=model, loss=loss, eval_metrics=task.eval_metrics(), scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: output_spec = TPUEstimatorSpec(mode=model, loss=loss, predictions={"loss": task.loss_arr}, scaffold_fn=scaffold_fn) return output_spec
def train_nli_w_dict(run_name, model: DictReaderInterface, model_path, model_config, data_feeder_loader, model_init_fn): print("Train nil :", run_name) batch_size = FLAGS.train_batch_size f_train_lookup = "lookup" in FLAGS.train_op tf_logging.debug("Building graph") with tf.compat.v1.variable_scope("optimizer"): lr = FLAGS.learning_rate lr2 = lr * 0.1 if model_config.compare_attrib_value_safe("use_two_lr", True): tf_logging.info("Using two lr for each parts") train_cls, global_step = get_train_op_sep_lr( model.get_cls_loss(), lr, 5, "dict") else: train_cls, global_step = train_module.get_train_op( model.get_cls_loss(), lr) train_lookup_op, global_step = train_module.get_train_op( model.get_lookup_loss(), lr2, global_step) sess = train_module.init_session() sess.run(tf.compat.v1.global_variables_initializer()) train_writer, test_writer = setup_summary_writer(run_name) last_saved = get_latest_model_path_from_dir_path(model_path) if last_saved: tf_logging.info("Loading previous model from {}".format(last_saved)) load_model(sess, last_saved) elif model_init_fn is not None: model_init_fn(sess) log = log_module.train_logger() train_data_feeder = data_feeder_loader.get_train_feeder() dev_data_feeder = data_feeder_loader.get_dev_feeder() lookup_train_feeder = train_data_feeder valid_runner = WSSDRRunner(model, dev_data_feeder.augment_dict_info, sess) dev_batches = [] n_dev_batch = 100 dev_batches_w_dict = dev_data_feeder.get_all_batches(batch_size, True)[:n_dev_batch] for _ in range(n_dev_batch): dev_batches.append(dev_data_feeder.get_random_batch(batch_size)) dev_batches_w_dict.append(dev_data_feeder.get_lookup_batch(batch_size)) def get_summary_obj(loss, acc): summary = tf.compat.v1.Summary() summary.value.add(tag='loss', simple_value=loss) summary.value.add(tag='accuracy', simple_value=acc) return summary def get_summary_obj_lookup(loss, p_at_1): summary = tf.compat.v1.Summary() summary.value.add(tag='lookup_loss', simple_value=loss) summary.value.add(tag='P@1', simple_value=p_at_1) return summary def train_lookup(step_i): batches, info = lookup_train_feeder.get_lookup_train_batches( batch_size) if not batches: raise NoLookupException() def get_cls_loss(batch): return sess.run([model.get_cls_loss_arr()], feed_dict=model.batch2feed_dict(batch)) loss_array = get_loss_from_batches(batches, get_cls_loss) supervision_for_lookup = train_data_feeder.get_lookup_training_batch( loss_array, batch_size, info) def lookup_train(batch): return sess.run( [model.get_lookup_loss(), model.get_p_at_1(), train_lookup_op], feed_dict=model.batch2feed_dict(batch)) avg_loss, p_at_1, _ = lookup_train(supervision_for_lookup) train_writer.add_summary(get_summary_obj_lookup(avg_loss, p_at_1), step_i) log.info("Step {0} lookup loss={1:.04f}".format(step_i, avg_loss)) return avg_loss def train_classification(step_i): batch = train_data_feeder.get_random_batch(batch_size) loss_val, acc, _ = sess.run( [model.get_cls_loss(), model.get_acc(), train_cls], feed_dict=model.batch2feed_dict(batch)) log.info("Step {0} train loss={1:.04f} acc={2:.03f}".format( step_i, loss_val, acc)) train_writer.add_summary(get_summary_obj(loss_val, acc), step_i) return loss_val, acc lookup_loss_window = MovingWindow(20) def train_classification_w_lookup(step_i): data_indices, batch = train_data_feeder.get_lookup_batch(batch_size) logits, = sess.run([model.get_lookup_logits()], feed_dict=model.batch2feed_dict(batch)) term_ranks = np.flip(np.argsort(logits[:, :, 1], axis=1)) batch = train_data_feeder.augment_dict_info(data_indices, term_ranks) loss_val, acc, _ = sess.run( [model.get_cls_loss(), model.get_acc(), train_cls], feed_dict=model.batch2feed_dict(batch)) log.info("ClsW]Step {0} train loss={1:.04f} acc={2:.03f}".format( step_i, loss_val, acc)) train_writer.add_summary(get_summary_obj(loss_val, acc), step_i) return loss_val, acc def lookup_enabled(lookup_loss_window, step_i): return step_i > model_config.lookup_min_step\ and lookup_loss_window.get_average() < model_config.lookup_threshold def train_fn(step_i): if lookup_enabled(lookup_loss_window, step_i): loss, acc = train_classification_w_lookup((step_i)) else: loss, acc = train_classification(step_i) if f_train_lookup and step_i % model_config.lookup_train_frequency == 0: try: lookup_loss = train_lookup(step_i) lookup_loss_window.append(lookup_loss, 1) except NoLookupException: log.warning("No possible lookup found") return loss, acc def debug_fn(batch): y_lookup, = sess.run([ model.y_lookup, ], feed_dict=model.batch2feed_dict(batch)) print(y_lookup) return 0, 0 def valid_fn(step_i): if lookup_enabled(lookup_loss_window, step_i): valid_fn_w_lookup(step_i) else: valid_fn_wo_lookup(step_i) def valid_fn_wo_lookup(step_i): loss_val, acc = valid_runner.run_batches_wo_lookup(dev_batches) log.info("Step {0} Dev loss={1:.04f} acc={2:.03f}".format( step_i, loss_val, acc)) test_writer.add_summary(get_summary_obj(loss_val, acc), step_i) return acc def valid_fn_w_lookup(step_i): loss_val, acc = valid_runner.run_batches_w_lookup(dev_batches_w_dict) log.info("Step {0} DevW loss={1:.04f} acc={2:.03f}".format( step_i, loss_val, acc)) test_writer.add_summary(get_summary_obj(loss_val, acc), step_i) return acc def save_fn(): op = tf.compat.v1.assign(global_step, step_i) sess.run([op]) return save_model_to_dir_path(sess, model_path, global_step) n_data = train_data_feeder.get_data_len() step_per_epoch = int((n_data + batch_size - 1) / batch_size) tf_logging.debug("{} data point -> {} batches / epoch".format( n_data, step_per_epoch)) train_steps = step_per_epoch * FLAGS.num_train_epochs tf_logging.debug("Max train step : {}".format(train_steps)) valid_freq = 100 save_interval = 60 * 20 last_save = time.time() init_step, = sess.run([global_step]) print("Initial step : ", init_step) for step_i in range(init_step, train_steps): if dev_fn is not None: if (step_i + 1) % valid_freq == 0: valid_fn(step_i) if save_fn is not None: if time.time() - last_save > save_interval: save_fn() last_save = time.time() loss, acc = train_fn(step_i) return save_fn()
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] if mode == tf.estimator.ModeKeys.PREDICT: label_ids = tf.ones([input_ids.shape[0]], dtype=tf.float32) else: label_ids = features["label_ids"] label_ids = tf.reshape(label_ids, [-1]) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = BertModel( config=model_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) pooled = model.get_pooled_output() if is_training: pooled = dropout(pooled, 0.1) logits = tf.keras.layers.Dense(train_config.num_classes, name="cls_dense")(pooled) scale = model_config.scale label_ids = scale * label_ids weight = tf.abs(label_ids) loss_arr = tf.keras.losses.MAE(y_true=label_ids, y_pred=logits) loss_arr = loss_arr * weight loss = tf.reduce_mean(input_tensor=loss_arr) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn(train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec def metric_fn(logits, label, is_real_example): mae = tf.compat.v1.metrics.mean_absolute_error( labels=label, predictions=logits, weights=is_real_example) return { "mae": mae } output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: tvars = None train_op = optimization.create_optimizer_from_config(loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn, [ logits, label_ids, is_real_example ]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "logits": logits, } if "data_id" in features: predictions['data_id'] = features['data_id'] output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def init_from_bert(sess): tf_logging.info("Initializing model with bert ") init_dict_model_with_bert(sess, get_bert_full_path())
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf_logging.info("*** Features ***") for name in sorted(features.keys()): tf_logging.info(" name = %s, shape = %s" % (name, features[name].shape)) query = features["query"] doc = features["doc"] doc_mask = features["doc_mask"] data_ids = features["data_id"] segment_len = max_seq_length - query_len - 3 step_size = model_config.step_size input_ids, input_mask, segment_ids, n_segments = \ iterate_over(query, doc, doc_mask, total_doc_len, segment_len, step_size) if mode == tf.estimator.ModeKeys.PREDICT: label_ids = tf.ones([input_ids.shape[0]], dtype=tf.int32) else: label_ids = features["label_ids"] label_ids = tf.reshape(label_ids, [-1]) if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) if "feed_features" in special_flags: model = model_class( config=model_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, features=features, ) else: model = model_class( config=model_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=train_config.use_one_hot_embeddings, ) if "new_pooling" in special_flags: pooled = mimic_pooling(model.get_sequence_output(), model_config.hidden_size, model_config.initializer_range) else: pooled = model.get_pooled_output() if train_config.checkpoint_type != "bert_nli" and train_config.use_old_logits: tf_logging.info("Use old version of logistic regression") if is_training: pooled = dropout(pooled, 0.1) logits = tf.keras.layers.Dense(train_config.num_classes, name="cls_dense")(pooled) else: tf_logging.info("Use fixed version of logistic regression") output_weights = tf.compat.v1.get_variable( "output_weights", [train_config.num_classes, model_config.hidden_size], initializer=tf.compat.v1.truncated_normal_initializer( stddev=0.02)) output_bias = tf.compat.v1.get_variable( "output_bias", [train_config.num_classes], initializer=tf.compat.v1.zeros_initializer()) if is_training: pooled = dropout(pooled, 0.1) logits = tf.matmul(pooled, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=label_ids) if "bias_loss" in special_flags: tf_logging.info("Using special_flags : bias_loss") loss_arr = reweight_zero(label_ids, loss_arr) loss = tf.reduce_mean(input_tensor=loss_arr) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if train_config.init_checkpoint: initialized_variable_names, init_fn = get_init_fn( train_config, tvars) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: if "simple_optimizer" in special_flags: tf_logging.info("using simple optimizer") train_op = create_simple_optimizer(loss, train_config.learning_rate, train_config.use_tpu) else: if "ask_tvar" in special_flags: tvars = model.get_trainable_vars() else: tvars = None train_op = optimization.create_optimizer_from_config( loss, train_config, tvars) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (classification_metric_fn, [logits, label_ids, is_real_example]) output_spec = TPUEstimatorSpec(mode=model, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "logits": logits, "doc": doc, "data_ids": data_ids, } useful_inputs = ["data_id", "input_ids2", "data_ids"] for input_name in useful_inputs: if input_name in features: predictions[input_name] = features[input_name] if override_prediction_fn is not None: predictions = override_prediction_fn(predictions, model) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument tf_logging.info("model_fn_nli_lm") """The `model_fn` for TPUEstimator.""" log_features(features) input_ids = features["input_ids"] # [batch_size, seq_length] input_mask = features["input_mask"] segment_ids = features["segment_ids"] batch_size, seq_max = get_shape_list2(input_ids) if "nli_input_ids" in features: nli_input_ids = features[ "nli_input_ids"] # [batch_size, seq_length] nli_input_mask = features["nli_input_mask"] nli_segment_ids = features["nli_segment_ids"] else: nli_input_ids = input_ids nli_input_mask = input_mask nli_segment_ids = segment_ids features["label_ids"] = tf.ones([batch_size], tf.int32) if mode == tf.estimator.ModeKeys.PREDICT: tf.random.set_seed(0) seed = 0 else: seed = None is_training = (mode == tf.estimator.ModeKeys.TRAIN) tf_logging.info("Doing dynamic masking (random)") masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights \ = random_masking(input_ids, input_mask, train_config.max_predictions_per_seq, MASK_ID, seed) sharing_model = sharing_model_factory( config, train_config.use_one_hot_embeddings, is_training, masked_input_ids, input_mask, segment_ids, nli_input_ids, nli_input_mask, nli_segment_ids) sequence_output_lm = sharing_model.lm_sequence_output() nli_feature = sharing_model.get_tt_feature() masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs \ = get_masked_lm_output(config, sequence_output_lm, sharing_model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) masked_lm_log_probs = tf.reshape(masked_lm_log_probs, [batch_size, -1]) masked_lm_per_inst_loss = tf.reshape(masked_lm_example_loss, [batch_size, -1]) task = Classification(3, features, nli_feature, is_training) nli_loss = task.loss task_prob = tf.nn.softmax(task.logits, axis=-1) arg_like = task_prob[:, 1] + task_prob[:, 2] vars = sharing_model.model.all_layer_outputs grads_1 = tf.gradients(ys=masked_lm_loss, xs=vars) # List[ batch_szie, grads_2 = tf.gradients(ys=arg_like, xs=vars) l = [] for g1, g2 in zip(grads_1, grads_2): if g1 is not None and g2 is not None: a = tf.reshape(g1, [batch_size * 2, seq_max, -1])[:batch_size] a = a / masked_lm_per_inst_loss b = tf.reshape(g2, [batch_size * 2, seq_max, -1])[batch_size:] l.append(tf.abs(a * b)) h_overlap = tf.stack(l, axis=1) h_overlap = tf.reduce_sum(h_overlap, axis=2) loss = combine_loss_fn(masked_lm_loss, nli_loss) tvars = tf.compat.v1.trainable_variables() assignment_fn = get_bert_assignment_map initialized_variable_names, init_fn = get_init_fn( tvars, train_config.init_checkpoint, assignment_fn) scaffold_fn = get_tpu_scaffold_or_init(init_fn, train_config.use_tpu) log_var_assignments(tvars, initialized_variable_names) TPUEstimatorSpec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer_from_config( loss, train_config) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: eval_metrics = (metric_fn_lm, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, ]) output_spec = TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: predictions = { "input_ids": input_ids, "masked_input_ids": masked_input_ids, "masked_lm_ids": masked_lm_ids, "masked_lm_example_loss": masked_lm_example_loss, "masked_lm_positions": masked_lm_positions, "masked_lm_log_probs": masked_lm_log_probs, "h_overlap": h_overlap, } output_spec = TPUEstimatorSpec(mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec