def base_model(model_config, features, labels, mode, target, reuse=None): input_ids = features["input_ids_{}".format(target)] input_mask = features["input_mask_{}".format(target)] segment_ids = features["segment_ids_{}".format(target)] if mode == tf.estimator.ModeKeys.TRAIN: hidden_dropout_prob = model_config.hidden_dropout_prob attention_probs_dropout_prob = model_config.attention_probs_dropout_prob dropout_prob = model_config.dropout_prob else: hidden_dropout_prob = 0.0 attention_probs_dropout_prob = 0.0 dropout_prob = 0.0 model = bert.Bert(model_config) model.build_embedder(input_ids, segment_ids, hidden_dropout_prob, attention_probs_dropout_prob, reuse=reuse) model.build_encoder(input_ids, input_mask, hidden_dropout_prob, attention_probs_dropout_prob, reuse=reuse) model.build_pooler(reuse=reuse) return model
def bert_encoder(model_config, features, labels, mode, target, reuse=None, **kargs): if target: input_ids = features["input_ids_{}".format(target)] input_mask = features["input_mask_{}".format(target)] segment_ids = features["segment_ids_{}".format(target)] else: input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] if kargs.get('ues_token_type', 'yes') == 'yes': tf.logging.info(" using segment embedding with different types ") else: tf.logging.info(" using segment embedding with same types ") segment_ids = tf.zeros_like(segment_ids) if mode == tf.estimator.ModeKeys.TRAIN: hidden_dropout_prob = model_config.hidden_dropout_prob attention_probs_dropout_prob = model_config.attention_probs_dropout_prob dropout_prob = model_config.dropout_prob else: hidden_dropout_prob = 0.0 attention_probs_dropout_prob = 0.0 dropout_prob = 0.0 if kargs.get('use_token_type', True): tf.logging.info(" use token type ") else: tf.logging.info(" not use token type ") model = bert.Bert(model_config) model.build_embedder(input_ids, segment_ids, hidden_dropout_prob, attention_probs_dropout_prob, use_token_type=kargs.get('use_token_type', True), reuse=reuse) model.build_encoder(input_ids, input_mask, hidden_dropout_prob, attention_probs_dropout_prob, reuse=reuse, attention_type=kargs.get('attention_type', 'normal_attention')) model.build_pooler(reuse=reuse) return model
def model_fn(features, labels, mode): print(features) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] if mode == tf.estimator.ModeKeys.TRAIN: hidden_dropout_prob = model_config.hidden_dropout_prob attention_probs_dropout_prob = model_config.attention_probs_dropout_prob dropout_prob = model_config.dropout_prob else: hidden_dropout_prob = 0.0 attention_probs_dropout_prob = 0.0 dropout_prob = 0.0 model = bert.Bert(model_config) model.build_embedder(input_ids, segment_ids, hidden_dropout_prob, attention_probs_dropout_prob, reuse=reuse) model.build_encoder(input_ids, input_mask, hidden_dropout_prob, attention_probs_dropout_prob, reuse=reuse) model.build_pooler(reuse=reuse) if model_io_config.fix_lm == True: scope = model_config.scope + "_finetuning" else: scope = model_config.scope with tf.variable_scope(scope, reuse=reuse): (loss, per_example_loss, logits) = classifier.classifier(model_config, model.get_pooled_output(), num_labels, label_ids, dropout_prob) # model_io_fn = model_io.ModelIO(model_io_config) pretrained_tvars = model_io_fn.get_params(model_config.scope, not_storage_params=not_storage_params) if load_pretrained: model_io_fn.load_pretrained(pretrained_tvars, init_checkpoint, exclude_scope=exclude_scope) tvars = pretrained_tvars model_io_fn.set_saver(var_lst=tvars) if mode == tf.estimator.ModeKeys.TRAIN: model_io_fn.print_params(tvars, string=", trainable params") update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer_fn = optimizer.Optimizer(opt_config) train_op = optimizer_fn.get_train_op(loss, tvars, opt_config.init_lr, opt_config.num_train_steps) return [train_op, loss, per_example_loss, logits] else: model_io_fn.print_params(tvars, string=", trainable params") return [loss, loss, per_example_loss, logits]
def model_fn(features, labels, mode): print(features) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] if mode == tf.estimator.ModeKeys.TRAIN: hidden_dropout_prob = model_config.hidden_dropout_prob attention_probs_dropout_prob = model_config.attention_probs_dropout_prob dropout_prob = model_config.dropout_prob else: hidden_dropout_prob = 0.0 attention_probs_dropout_prob = 0.0 dropout_prob = 0.0 model = bert.Bert(model_config) model.build_embedder(input_ids, segment_ids, hidden_dropout_prob, attention_probs_dropout_prob, reuse=reuse) model.build_encoder(input_ids, input_mask, hidden_dropout_prob, attention_probs_dropout_prob, reuse=reuse) model.build_pooler(reuse=reuse) if model_io_config.fix_lm == True: scope = model_config.scope + "_finetuning" else: scope = model_config.scope with tf.variable_scope(scope, reuse=reuse): (loss, per_example_loss, logits) = classifier.classifier(model_config, model.get_pooled_output(), num_labels, label_ids, dropout_prob) # model_io_fn = model_io.ModelIO(model_io_config) pretrained_tvars = model_io_fn.get_params( model_config.scope, not_storage_params=not_storage_params) if load_pretrained: model_io_fn.load_pretrained(pretrained_tvars, init_checkpoint, exclude_scope=exclude_scope) tvars = pretrained_tvars model_io_fn.set_saver(var_lst=tvars) if mode == tf.estimator.ModeKeys.TRAIN: model_io_fn.print_params(tvars, string=", trainable params") update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # optimizer_fn = optimizer.Optimizer(opt_config) train_op = optimizer_fn.get_train_op( loss, tvars, opt_config.init_lr, opt_config.num_train_steps) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.PREDICT: predictions = { # Generate predictions (for PREDICT and EVAL mode) "classes": tf.argmax(input=logits, axis=1), # Add `softmax_tensor` to the graph. It is used for PREDICT and by the # `logging_hook`. "probabilities": tf.exp(tf.nn.log_softmax(logits, name="softmax_tensor")) } return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) elif mode == tf.estimator.ModeKeys.EVAL: """ needs to manaually write metric ops see https://github.com/google/seq2seq/blob/7f485894d412e8d81ce0e07977831865e44309ce/seq2seq/metrics/metric_specs.py """ return None
def model_fn(features, labels, mode): input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] if mode == tf.estimator.ModeKeys.TRAIN: hidden_dropout_prob = model_config.hidden_dropout_prob attention_probs_dropout_prob = model_config.attention_probs_dropout_prob dropout_prob = model_config.dropout_prob else: hidden_dropout_prob = 0.0 attention_probs_dropout_prob = 0.0 dropout_prob = 0.0 model = bert.Bert(model_config) model.build_embedder(input_ids, segment_ids, hidden_dropout_prob, attention_probs_dropout_prob, reuse=reuse) model.build_encoder(input_ids, input_mask, hidden_dropout_prob, attention_probs_dropout_prob, reuse=reuse) model.build_pooler(reuse=reuse) if model_io_config.fix_lm == True: scope = model_config.scope + "_finetuning" else: scope = model_config.scope with tf.variable_scope(scope, reuse=reuse): (loss, per_example_loss, logits) = classifier.classifier(model_config, model.get_pooled_output(), num_labels, label_ids, dropout_prob) # model_io_fn = model_io.ModelIO(model_io_config) pretrained_tvars = model_io_fn.get_params(model_config.scope) if load_pretrained: model_io_fn.load_pretrained(pretrained_tvars, init_checkpoint) tvars = model_io_fn.get_params(scope) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if mode == tf.estimator.ModeKeys.TRAIN: model_io_fn.print_params(tvars, string=", trainable params") with tf.control_dependencies(update_ops): optimizer_fn = optimizer.Optimizer(opt_config) train_op = optimizer_fn.get_train_op( loss, tvars, opt_config.init_lr, opt_config.num_train_steps) print(logits.get_shape(), "===logits shape===") pred_label = tf.argmax(logits, axis=-1, output_type=tf.int32) prob = tf.nn.softmax(logits) max_prob = tf.reduce_max(prob, axis=-1) output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions={ 'pred_label': pred_label, "label_ids": label_ids, "max_prob": max_prob }) return output_spec