def model_fn(features, labels, mode, params): num_train_steps = 100 lr = 0.0001 k = 5 weights = tf.Variable(tf.random_normal_initializer()(shape=[k, k], dtype=tf.float32)) cvals = tf.constant(np.random.uniform(-1, 1), tf.float32) reader_module_path = '/data/hldai/data/realm_data/cc_news_pretrained/bert' # vals_ragged = tf.ragged.constant(list(features)) # vals_tensor = vals_ragged.to_tensor() # vals_tensor = features + global_vals vals_tensor = features['vals'] # tok_id_seq_batch = features['tok_id_seq_batch'].to_tensor() # input_mask = features['input_mask'] z = tf.matmul(vals_tensor, weights) + cvals predictions = z loss = tf.reduce_mean(z) eval_metric_ops = None # rand_vals = tf.constant(np.random.randint(100, 105, (4, 5)), dtype=tf.int32) # tok_id_seq_batch = tf.concat((features['tok_id_seq_batch'], rand_vals), axis=1).to_tensor() # with tf.device("/cpu:0"): # blocks_np = datautils.load_pickle_data(os.path.join(config.DATA_DIR, 'realm_data/blocks_tok_id_seqs.pkl')) # blocks = tf.ragged.constant(blocks_np) # # blocks = tf.ragged.constant([[3, 4], [1], [2, 3, 7], [4]], dtype=tf.int32, ragged_rank=1) # retrieved_block_ids = tf.constant([0, 2]) # retrieved_blocks = tf.gather(blocks, retrieved_block_ids).to_tensor() # logging_hook = tf.estimator.LoggingTensorHook({"pred": predictions, 'feat': features}, every_n_iter=1) # logging_hook = tf.estimator.LoggingTensorHook( # {"pred": predictions, 'labels': labels, 'feat': features['tok_id_seq_batch'], # 'ids': retrieved_block_ids}, every_n_iter=1) ls = tf.reduce_sum(labels, axis=1) logging_hook = tf.estimator.LoggingTensorHook({ 'z': z, 'ls': ls}, every_n_iter=1) train_op = optimization.create_optimizer( loss=loss, init_lr=lr, num_train_steps=num_train_steps, num_warmup_steps=min(10000, max(100, int(num_train_steps / 10))), use_tpu=False) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, predictions=predictions, # training_hooks=[logging_hook], evaluation_hooks=[logging_hook], eval_metric_ops=eval_metric_ops)
def model_fn(features, labels, mode, params): """Model function.""" del labels # ============================== # Input features # ============================== # [batch_size, query_seq_len] query_inputs = features["query_inputs"] # [batch_size, num_candidates, candidate_seq_len] candidate_inputs = features["candidate_inputs"] # [batch_size, num_candidates, query_seq_len + candidate_seq_len] joint_inputs = features["joint_inputs"] # [batch_size, num_masks] mlm_targets = features["mlm_targets"] mlm_positions = features["mlm_positions"] mlm_mask = features["mlm_mask"] # ============================== # Create modules. # ============================== bert_module = hub.Module( spec=params["bert_hub_module_handle"], name="locbert", tags={"train"} if mode == tf.estimator.ModeKeys.TRAIN else {}, trainable=True) hub.register_module_for_export(bert_module, "locbert") embedder_module = hub.Module( spec=params["embedder_hub_module_handle"], name="embedder", tags={"train"} if mode == tf.estimator.ModeKeys.TRAIN else {}, trainable=True) hub.register_module_for_export(embedder_module, "embedder") if params["share_embedders"]: query_embedder_module = embedder_module else: query_embedder_module = hub.Module( spec=params["embedder_hub_module_handle"], name="embedder", tags={"train"} if mode == tf.estimator.ModeKeys.TRAIN else {}, trainable=True) hub.register_module_for_export(embedder_module, "query_embedder") # ============================== # Retrieve. # ============================== # [batch_size, projected_size] query_emb = query_embedder_module( inputs=dict( input_ids=query_inputs.token_ids, input_mask=query_inputs.mask, segment_ids=query_inputs.segment_ids), signature="projected") # [batch_size * num_candidates, candidate_seq_len] flat_candidate_inputs, unflatten = flatten_bert_inputs( candidate_inputs) # [batch_size * num_candidates, projected_size] flat_candidate_emb = embedder_module( inputs=dict( input_ids=flat_candidate_inputs.token_ids, input_mask=flat_candidate_inputs.mask, segment_ids=flat_candidate_inputs.segment_ids), signature="projected") # [batch_size, num_candidates, projected_size] unflattened_candidate_emb = unflatten(flat_candidate_emb) # [batch_size, num_candidates] retrieval_score = tf.einsum("BD,BND->BN", query_emb, unflattened_candidate_emb) # ============================== # Read. # ============================== # [batch_size * num_candidates, query_seq_len + candidate_seq_len] flat_joint_inputs, unflatten = flatten_bert_inputs(joint_inputs) # [batch_size * num_candidates, num_masks] flat_mlm_positions, _ = tensor_utils.flatten( tf.tile( tf.expand_dims(mlm_positions, 1), [1, params["num_candidates"], 1])) batch_size, num_masks = tensor_utils.shape(mlm_targets) # [batch_size * num_candidates, query_seq_len + candidates_seq_len] flat_joint_bert_outputs = bert_module( inputs=dict( input_ids=flat_joint_inputs.token_ids, input_mask=flat_joint_inputs.mask, segment_ids=flat_joint_inputs.segment_ids, mlm_positions=flat_mlm_positions), signature="mlm", as_dict=True) # [batch_size, num_candidates] candidate_score = retrieval_score # [batch_size, num_candidates] candidate_log_probs = tf.math.log_softmax(candidate_score) # ============================== # Compute marginal log-likelihood. # ============================== # [batch_size * num_candidates, num_masks] flat_mlm_logits = flat_joint_bert_outputs["mlm_logits"] # [batch_size, num_candidates, num_masks, vocab_size] mlm_logits = tf.reshape( flat_mlm_logits, [batch_size, params["num_candidates"], num_masks, -1]) mlm_log_probs = tf.math.log_softmax(mlm_logits) # [batch_size, num_candidates, num_masks] tiled_mlm_targets = tf.tile( tf.expand_dims(mlm_targets, 1), [1, params["num_candidates"], 1]) # [batch_size, num_candidates, num_masks, 1] tiled_mlm_targets = tf.expand_dims(tiled_mlm_targets, -1) # [batch_size, num_candidates, num_masks, 1] gold_log_probs = tf.batch_gather(mlm_log_probs, tiled_mlm_targets) # [batch_size, num_candidates, num_masks] gold_log_probs = tf.squeeze(gold_log_probs, -1) # [batch_size, num_candidates, num_masks] joint_gold_log_probs = ( tf.expand_dims(candidate_log_probs, -1) + gold_log_probs) # [batch_size, num_masks] marginal_gold_log_probs = tf.reduce_logsumexp(joint_gold_log_probs, 1) # [batch_size, num_masks] float_mlm_mask = tf.cast(mlm_mask, tf.float32) # [] loss = -tf.div_no_nan( tf.reduce_sum(marginal_gold_log_probs * float_mlm_mask), tf.reduce_sum(float_mlm_mask)) # ============================== # Optimization # ============================== num_warmup_steps = min(10000, max(100, int(params["num_train_steps"] / 10))) train_op = optimization.create_optimizer( loss=loss, init_lr=params["learning_rate"], num_train_steps=params["num_train_steps"], num_warmup_steps=num_warmup_steps, use_tpu=params["use_tpu"]) # ============================== # Evaluation # ============================== eval_metric_ops = None if params["use_tpu"] else dict() if mode != tf.estimator.ModeKeys.PREDICT: # [batch_size, num_masks] retrieval_utility = marginal_gold_log_probs - gold_log_probs[:, 0] retrieval_utility *= tf.cast(features["mlm_mask"], tf.float32) # [] retrieval_utility = tf.div_no_nan( tf.reduce_sum(retrieval_utility), tf.reduce_sum(float_mlm_mask)) add_mean_metric("retrieval_utility", retrieval_utility, eval_metric_ops) has_timestamp = tf.cast( tf.greater(features["export_timestamp"], 0), tf.float64) off_policy_delay_secs = ( tf.timestamp() - tf.cast(features["export_timestamp"], tf.float64)) off_policy_delay_mins = off_policy_delay_secs / 60.0 off_policy_delay_mins *= tf.cast(has_timestamp, tf.float64) add_mean_metric("off_policy_delay_mins", off_policy_delay_mins, eval_metric_ops) # Create empty predictions to avoid errors when running in prediction mode. predictions = dict() if params["use_tpu"]: return tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, predictions=predictions) else: if eval_metric_ops is not None: # Make sure the eval metrics are updated during training so that we get # quick feedback from tensorboard summaries when debugging locally. with tf.control_dependencies([u for _, u in eval_metric_ops.values()]): loss = tf.identity(loss) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, predictions=predictions)
def model_fn(features, labels, mode, params): # print('MMMMMMMMMMMMMMMMMModel_fn', mode) embedder_module_path = params['embedder_module_path'] reader_module_path = params['reader_module_path'] # embedder_module_path = os.path.join(config.DATA_DIR, 'realm_data/cc_news_pretrained/embedder') # reader_module_path = os.path.join(config.DATA_DIR, 'realm_data/cc_news_pretrained/bert') lr = params['lr'] num_train_steps = params['num_train_steps'] max_seq_len = params['max_seq_len'] bert_dim = params['bert_dim'] n_types = params['n_types'] sep_tok_id = params['sep_tok_id'] retriever_beam_size = params['retriever_beam_size'] block_records_path = params['block_records_path'] num_block_records = params['num_block_records'] train_log_steps = params['train_log_steps'] eval_log_steps = params['eval_log_steps'] # token_ids = tf.constant([[101, 2002, 2003, 1037, 3836, 1012, 102]], dtype=tf.int32) tok_id_seq_batch_tensor = features['tok_id_seq_batch'].to_tensor() input_mask = 1 - tf.cast(tf.equal(tok_id_seq_batch_tensor, tf.constant(0)), tf.int32) block_labels_np = pre_load_data.get('labels', None) block_labels = tf.constant( block_labels_np, tf.int32) if block_labels_np is not None else None # input_mask = features['input_mask'] with tf.device("/cpu:0"): # retriever_outputs = retrieve(tok_id_seq_batch, input_mask, embedder_module_path, mode, retriever_beam_size) scaffold, retrieved_block_ids, retrieved_blocks, zx_logits = retrieve( tok_id_seq_batch_tensor, input_mask, embedder_module_path, mode, block_records_path, retriever_beam_size, num_block_records) # scaffold, question_emb, retrieved_block_ids = retrieve( # tok_id_seq_batch, input_mask, embedder_module_path, mode, retriever_beam_size) retrieved_labels = tf.gather( block_labels, retrieved_block_ids[0]) if block_labels is not None else None retrieved_label_vecs = get_one_hot_label_vecs(retrieved_labels, n_types) tokenizer, vocab_lookup_table = bert_utils.get_tf_tokenizer( reader_module_path) block_tok_id_seqs = tokenizer.tokenize(retrieved_blocks) block_tok_id_seqs = tf.cast( block_tok_id_seqs.merge_dims(2, 3).to_tensor(), tf.int32) # batch_size = tf.shape(tok_id_seq_batch_tensor)[0] blocks_max_seq_len = tf.shape(block_tok_id_seqs)[-1] block_tok_id_seqs_flat = tf.reshape(block_tok_id_seqs, (-1, blocks_max_seq_len)) tok_id_seqs_repeat = features['tok_id_seqs_repeat'] # tok_id_seqs_repeat = features['tok_id_seqs_repeat'].to_tensor() q_doc_tok_id_seqs = tf.concat((tok_id_seqs_repeat, block_tok_id_seqs_flat), axis=1).to_tensor() q_doc_tok_id_seqs = q_doc_tok_id_seqs[:, :max_seq_len - 1] q_doc_tok_id_seqs = pad_sep_to_tensor(q_doc_tok_id_seqs, sep_tok_id) q_doc_input_mask = 1 - tf.cast( tf.equal(q_doc_tok_id_seqs, tf.constant(0, dtype=tf.int32)), tf.int32) reader_module = hub.Module( reader_module_path, tags={"train"} if mode == tf.estimator.ModeKeys.TRAIN else {}, trainable=True) # input_mask = tf.sequence_mask(lengths, seqs_shape[1]) # input_mask = tf.cast(input_mask, tf.int32) concat_outputs = reader_module( dict( # input_ids=tok_id_seq_batch, # input_mask=tf.ones_like(tok_id_seq_batch), # segment_ids=tf.zeros_like(tok_id_seq_batch) # segment_ids=concat_inputs.segment_ids input_ids=q_doc_tok_id_seqs, # input_mask=tf.ones_like(r_tok_id_seqs), input_mask=q_doc_input_mask, segment_ids=tf.zeros_like(q_doc_tok_id_seqs)), signature="tokens", as_dict=True) # # predictions = retriever_outputs.logits # # concat_token_emb = concat_outputs["sequence_output"] qd_reps = concat_outputs['sequence_output'][:, 0, :] # dense_layer = tf.keras.layers.Dense(n_types) # yzx_logits = dense_layer(qd_reps) dense_weights = tf.Variable(initial_value=np.random.uniform( -0.1, 0.1, (bert_dim, n_types)), trainable=True, dtype=tf.float32) zx_logits = tf.reshape(zx_logits, (-1, retriever_beam_size)) log_softmax_zx_logits = tf.nn.log_softmax(zx_logits, axis=1) yzx_logits = tf.matmul(qd_reps, dense_weights) # weight_sum = tf.reduce_sum(dense_layer.weights) yzx_logits = tf.reshape(yzx_logits, (-1, retriever_beam_size, n_types)) log_sig_yzx_logits = tf.math.log_sigmoid(yzx_logits) z_log_probs = log_sig_yzx_logits + tf.expand_dims(log_softmax_zx_logits, 2) log_probs = tf.reduce_logsumexp(z_log_probs, axis=1) log_neg_probs = tfp.math.log1mexp(log_probs) # prob_sum = tf.math.exp(log_probs) + tf.math.exp(log_neg_probs) # kernel_initializer = tf.truncated_normal_initializer(stddev=0.02) # projection = tf.layers.dense( # qd_reps, # bert_dim, # kernel_initializer=kernel_initializer) # yzx_logits = probs = tf.exp(log_probs) # loss = tf.reduce_mean(predictions) loss = None eval_metric_ops = None train_op = None if mode != tf.estimator.ModeKeys.PREDICT: loss_samples = -tf.reduce_sum( labels * log_probs + (1 - labels) * log_neg_probs, axis=1) loss = tf.reduce_mean(loss_samples) # loss = tf.reduce_mean(loss_samples) + 0.00001 * tf.reduce_mean(question_emb) train_op = optimization.create_optimizer( loss=loss, init_lr=lr, num_train_steps=num_train_steps, num_warmup_steps=min(10000, max(100, int(num_train_steps / 10))), use_tpu=False) small_constant = tf.constant(0.00001) pos_preds = tf.cast(tf.less(tf.constant(0.5), probs), tf.float32) n_pred_pos = tf.reduce_sum(pos_preds, axis=1) + small_constant n_true_pos = tf.reduce_sum(labels, axis=1) + small_constant n_corrects = tf.reduce_sum(pos_preds * labels, axis=1) precision = tf.reduce_mean(n_corrects / n_pred_pos) recall = tf.reduce_mean(n_corrects / n_true_pos) p_mean, p_op = tf.compat.v1.metrics.mean(precision) r_mean, r_op = tf.compat.v1.metrics.mean(recall) f1 = 2 * p_mean * r_mean / (p_mean + r_mean + small_constant) eval_metric_ops = { # 'precision': tf.compat.v1.metrics.mean(precision), # 'recall': tf.compat.v1.metrics.mean(recall) 'precision': (p_mean, p_op), 'recall': (r_mean, r_op), 'f1': (f1, tf.group(p_op, r_op)) } # tmp_blocks = tf.constant(['i you date', 'sh ij ko', 'day in day']) # blocks_has_answer = orqa_ops.has_answer(blocks=tmp_blocks, answers=features['labels'][0]) # tmp_blocks = tf.constant([[1, 2], [3, 4]], tf.int32) # blocks_has_answer = orqa_ops.zero_out(features['tmp']) train_logging_hook = tf.estimator.LoggingTensorHook( { 'batch_id': features['batch_id'], 'loss': loss, 'ws': tf.reduce_sum(dense_weights), 'yzx_logits': tf.reduce_sum(yzx_logits), 'tmp': retrieved_label_vecs, 'tmp1': retrieved_labels, }, every_n_iter=train_log_steps) logging_hook = tf.estimator.LoggingTensorHook( { 'batch_id': features['batch_id'], 'loss': loss, # 'pred': tf.reduce_mean(predictions), # 'pred': log_probs, 'tmp': tf.reduce_sum(dense_weights), }, every_n_iter=eval_log_steps) pred_logging_hook = tf.estimator.LoggingTensorHook( { 'labels': features['labels'][0], # 'ha': blocks_has_answer, # 'hatmp': features['tmp'], # 'bl': retrieved_blocks }, every_n_iter=eval_log_steps) predictions = { 'probs': probs, 'text_ids': features['text_ids'], 'block_ids': retrieved_block_ids } return tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, predictions=predictions, training_hooks=[train_logging_hook], evaluation_hooks=[logging_hook], # prediction_hooks=[pred_logging_hook], eval_metric_ops=eval_metric_ops, scaffold=scaffold)
def model_fn(features, labels, mode, params): """Model function.""" if labels is None: labels = tf.constant([""]) reader_beam_size = params["reader_beam_size"] if mode == tf.estimator.ModeKeys.PREDICT: retriever_beam_size = reader_beam_size else: retriever_beam_size = params["retriever_beam_size"] assert reader_beam_size <= retriever_beam_size with tf.device("/cpu:0"): retriever_outputs = retrieve(features=features, retriever_beam_size=retriever_beam_size, mode=mode, params=params) with tf.variable_scope("reader"): reader_outputs = read( features=features, retriever_logits=retriever_outputs.logits[:reader_beam_size], blocks=retriever_outputs.blocks[:reader_beam_size], mode=mode, params=params, labels=labels) predictions = get_predictions(reader_outputs, params) if mode == tf.estimator.ModeKeys.PREDICT: loss = None train_op = None eval_metric_ops = None else: # [retriever_beam_size] retriever_correct = orqa_ops.has_answer( blocks=retriever_outputs.blocks, answers=labels) # [reader_beam_size, num_candidates] reader_correct = compute_correct_candidates( candidate_starts=reader_outputs.candidate_starts, candidate_ends=reader_outputs.candidate_ends, gold_starts=reader_outputs.gold_starts, gold_ends=reader_outputs.gold_ends) eval_metric_ops = compute_eval_metrics( labels=labels, predictions=predictions, retriever_correct=retriever_correct, reader_correct=reader_correct) # [] loss = compute_loss(retriever_logits=retriever_outputs.logits, retriever_correct=retriever_correct, reader_logits=reader_outputs.logits, reader_correct=reader_correct) train_op = optimization.create_optimizer( loss=loss, init_lr=params["learning_rate"], num_train_steps=params["num_train_steps"], num_warmup_steps=min(10000, max(100, int(params["num_train_steps"] / 10))), use_tpu=False) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, predictions=predictions, eval_metric_ops=eval_metric_ops)