def create_model(albert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, task_name, entity_a, entity_b): """Creates a classification model.""" model = modeling.AlbertModel( config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # In the demo, we are doing a simple classification task on the entire # segment. # # If you want to use the token-level output, use model.get_sequence_output() # instead. output_layer = model.get_pooled_output() tf.logging.info('entities type: %s,%s'%(entity_a.dtype,entity_b.dtype)) entity_cos = tf.multiply(entity_a,entity_b) output_layer = tf.concat([output_layer, entity_cos], 1) hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) epsilon = tf.constant(1e-8) logits = logits + epsilon if task_name != "regression": probabilities = tf.nn.softmax(logits, axis=-1) predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32) log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) else: probabilities = logits logits = tf.squeeze(logits, [-1]) predictions = logits per_example_loss = tf.square(logits - labels) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, probabilities, logits, predictions)
def create_model(albert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, max_seq_length, dropout_prob): """Creates a classification model.""" bsz_per_core = tf.shape(input_ids)[0] model = modeling.AlbertModel( config=albert_config, is_training=is_training, input_ids=tf.reshape(input_ids, [bsz_per_core * num_labels, max_seq_length]), input_mask=tf.reshape(input_mask, [bsz_per_core * num_labels, max_seq_length]), token_type_ids=tf.reshape(segment_ids, [bsz_per_core * num_labels, max_seq_length]), use_one_hot_embeddings=use_one_hot_embeddings) # In the demo, we are doing a simple classification task on the entire # segment. # # If you want to use the token-level output, use model.get_sequence_output() # instead. output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [1, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [1], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [bsz_per_core, num_labels]) probabilities = tf.nn.softmax(logits, axis=-1) predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32) log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=tf.cast(num_labels, dtype=tf.int32), dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, probabilities, logits, predictions)
def _create_model_from_scratch(albert_config, is_training, input_ids, input_mask, segment_ids, use_one_hot_embeddings): """Creates an ALBERT model from scratch/config.""" model = modeling.AlbertModel(config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) return (model.get_pooled_output(), model.get_sequence_output())
def _create_model_from_scratch(albert_config, is_training, input_ids, input_mask, segment_ids, use_one_hot_embeddings): """Creates an ALBERT model from scratch (as opposed to hub).""" model = modeling.AlbertModel( config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) output_layer = model.get_pooled_output() return output_layer
def create_model(albert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """Creates a classification model.""" model = modeling.AlbertModel( config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # In the demo, we are doing a simple classification task on the entire # segment. # # If you want to use the token-level output, use model.get_sequence_output() # instead. if FLAGS.use_pooled_output: tf.logging.info("using pooled output") output_layer = model.get_pooled_output() else: tf.logging.info("using meaned output") output_layer = tf.reduce_mean(model.get_sequence_output(), axis=1) hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) probabilities = tf.nn.softmax(logits, axis=-1) log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, probabilities, predictions)
def module_fn(is_training): """Module function.""" input_ids = tf.placeholder(tf.int32, [None, None], "input_ids") input_mask = tf.placeholder(tf.int32, [None, None], "input_mask") segment_ids = tf.placeholder(tf.int32, [None, None], "segment_ids") mlm_positions = tf.placeholder(tf.int32, [None, None], "mlm_positions") albert_config = modeling.AlbertConfig.from_json_file( os.path.join(FLAGS.albert_directory, "albert_config.json")) model = modeling.AlbertModel(config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=False) mlm_logits = get_mlm_logits(model, albert_config, mlm_positions) assert tf.gfile.Exists(FLAGS.vocab_path) vocab_file = tf.constant(value=FLAGS.vocab_path, dtype=tf.string, name="vocab_file") # By adding `vocab_file` to the ASSET_FILEPATHS collection, TF-Hub will # rewrite this tensor so that this asset is portable. tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, vocab_file) hub.add_signature(name="tokens", inputs=dict(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids), outputs=dict(sequence_output=model.get_sequence_output(), pooled_output=model.get_pooled_output())) hub.add_signature(name="mlm", inputs=dict(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, mlm_positions=mlm_positions), outputs=dict(sequence_output=model.get_sequence_output(), pooled_output=model.get_pooled_output(), mlm_logits=mlm_logits)) hub.add_signature(name="tokenization_info", inputs={}, outputs=dict(vocab_file=vocab_file, do_lower_case=tf.constant( FLAGS.do_lower_case)))
def build_model(sess): """Module function.""" input_ids = tf.placeholder(tf.int32, [None, None], "input_ids") input_mask = tf.placeholder(tf.int32, [None, None], "input_mask") segment_ids = tf.placeholder(tf.int32, [None, None], "segment_ids") mlm_positions = tf.placeholder(tf.int32, [None, None], "mlm_positions") albert_config_path = os.path.join(FLAGS.albert_directory, "albert_config.json") albert_config = modeling.AlbertConfig.from_json_file(albert_config_path) model = modeling.AlbertModel( config=albert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=False, ) get_mlm_logits( model.get_sequence_output(), albert_config, mlm_positions, model.get_embedding_table(), ) get_sentence_order_logits(model.get_pooled_output(), albert_config) checkpoint_path = os.path.join(FLAGS.albert_directory, FLAGS.checkpoint_name) tvars = tf.trainable_variables() ( assignment_map, initialized_variable_names, ) = modeling.get_assignment_map_from_checkpoint(tvars, checkpoint_path) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) tf.train.init_from_checkpoint(checkpoint_path, assignment_map) init = tf.global_variables_initializer() sess.run(init) return sess
def create_model(self): input_ids = AlbertModelTest.ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_mask = None if self.use_input_mask: input_mask = AlbertModelTest.ids_tensor( [self.batch_size, self.seq_length], vocab_size=2) token_type_ids = None if self.use_token_type_ids: token_type_ids = AlbertModelTest.ids_tensor( [self.batch_size, self.seq_length], self.type_vocab_size) config = modeling.AlbertConfig( vocab_size=self.vocab_size, embedding_size=self.embedding_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, intermediate_size=self.intermediate_size, hidden_act=self.hidden_act, hidden_dropout_prob=self.hidden_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, type_vocab_size=self.type_vocab_size, initializer_range=self.initializer_range) model = modeling.AlbertModel( config=config, is_training=self.is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids, scope=self.scope) outputs = { "embedding_output": model.get_embedding_output(), "sequence_output": model.get_sequence_output(), "pooled_output": model.get_pooled_output(), "all_encoder_layers": model.get_all_encoder_layers(), } return outputs
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, dropout_rate=1.0, lstm_size=1, cell='lstm', num_layers=1): """create a ner model.""" model = modeling.AlbertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # embedding.shape = [batch_size, seq_length, embedding_size] embedding = model.get_sequence_output() max_seq_length = embedding.shape[1].value # 算序列真实长度 used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum( used, reduction_indices=1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 # add CRF output layer blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=lstm_size, cell_type=cell, num_layers=num_layers, dropout_rate=dropout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) rst = blstm_crf.add_blstm_crf_layer(crf_only=True) return rst
def build_model(topk, albert_config_path, checkpoint_path): """Module function.""" input_ids = tf.placeholder(tf.int32, [None, None], "input_ids") input_mask = tf.placeholder(tf.int32, [None, None], "input_mask") segment_ids = tf.placeholder(tf.int32, [None, None], "segment_ids") mlm_positions = tf.placeholder(tf.int32, [None, None], "mlm_positions") albert_config = modeling.AlbertConfig.from_json_file(albert_config_path) model = modeling.AlbertModel(config=albert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=False) mlm_logits = get_mlm_logits(model.get_sequence_output(), albert_config, mlm_positions, model.get_embedding_table()) nsp_logits = get_sentence_order_logits(model.get_pooled_output(), albert_config) mlm_scores = tf.nn.softmax(mlm_logits) nsp_scores = tf.nn.softmax(nsp_logits) mlm_topk_scores, mlm_topk_indices = tf.math.top_k(mlm_scores, k=topk) nsp_predictions = nsp_scores[:, 0] tvars = tf.trainable_variables() (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint( tvars, checkpoint_path) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) tf.train.init_from_checkpoint(checkpoint_path, assignment_map) init = tf.global_variables_initializer() return init, (mlm_topk_scores, mlm_topk_indices), nsp_predictions
def build_model(): """Module function.""" input_ids = tf.placeholder(tf.int32, [None, None], "input_ids") # input_mask = tf.placeholder(tf.int32, [None, None], "input_mask") # segment_ids = tf.placeholder(tf.int32, [None, None], "segment_ids") mlm_positions = tf.placeholder(tf.int32, [None, None], "mlm_positions") mlm_ids = tf.placeholder(tf.int32, [None, None], "mlm_ids") mlm_weights = tf.placeholder(tf.float32, [None, None], "mlm_weights") albert_config_path = os.path.join(FLAGS.config_dir, "albert_config.json") albert_config = modeling.AlbertConfig.from_json_file(albert_config_path) model = modeling.AlbertModel( config=albert_config, is_training=False, input_ids=input_ids, # input_mask=input_mask, # token_type_ids=segment_ids, use_one_hot_embeddings=False) loss = get_mlm_output(model.get_sequence_output(), albert_config, mlm_positions, model.get_embedding_table(), mlm_ids, mlm_weights) return loss
def module_fn(is_training): """Module function.""" input_ids = tf.placeholder(tf.int32, [None, None], "input_ids") input_mask = tf.placeholder(tf.int32, [None, None], "input_mask") segment_ids = tf.placeholder(tf.int32, [None, None], "segment_ids") mlm_positions = tf.placeholder(tf.int32, [None, None], "mlm_positions") albert_config_path = os.path.join(FLAGS.albert_directory, "albert_config.json") albert_config = modeling.AlbertConfig.from_json_file(albert_config_path) model = modeling.AlbertModel(config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=False, use_einsum=FLAGS.use_einsum) mlm_logits = get_mlm_logits(model, albert_config, mlm_positions) sop_log_probs = get_sop_log_probs(model, albert_config) vocab_model_path = os.path.join(FLAGS.albert_directory, "30k-clean.model") vocab_file_path = os.path.join(FLAGS.albert_directory, "30k-clean.vocab") config_file = tf.constant(value=albert_config_path, dtype=tf.string, name="config_file") vocab_model = tf.constant(value=vocab_model_path, dtype=tf.string, name="vocab_model") # This is only for visualization purpose. vocab_file = tf.constant(value=vocab_file_path, dtype=tf.string, name="vocab_file") # By adding `config_file, vocab_model and vocab_file` # to the ASSET_FILEPATHS collection, TF-Hub will # rewrite this tensor so that this asset is portable. tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, config_file) tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, vocab_model) tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, vocab_file) hub.add_signature(name="tokens", inputs=dict(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids), outputs=dict(sequence_output=model.get_sequence_output(), pooled_output=model.get_pooled_output())) hub.add_signature(name="sop", inputs=dict(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids), outputs=dict(sequence_output=model.get_sequence_output(), pooled_output=model.get_pooled_output(), sop_log_probs=sop_log_probs)) hub.add_signature(name="mlm", inputs=dict(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, mlm_positions=mlm_positions), outputs=dict(sequence_output=model.get_sequence_output(), pooled_output=model.get_pooled_output(), mlm_logits=mlm_logits)) hub.add_signature(name="tokenization_info", inputs={}, outputs=dict(vocab_file=vocab_model, do_lower_case=tf.constant( FLAGS.do_lower_case)))
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] # Note: We keep this feature name `next_sentence_labels` to be compatible # with the original data created by lanzhzh@. However, in the ALBERT case # it does represent sentence_order_labels. sentence_order_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.AlbertModel( config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( albert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (sentence_order_loss, sentence_order_example_loss, sentence_order_log_probs) = get_sentence_order_output( albert_config, model.get_pooled_output(), sentence_order_labels) total_loss = masked_lm_loss + sentence_order_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: tf.logging.info("number of hidden group %d to initialize", albert_config.num_hidden_groups) num_of_initialize_group = 1 if FLAGS.init_from_group0: num_of_initialize_group = albert_config.num_hidden_groups if albert_config.net_structure_type > 0: num_of_initialize_group = albert_config.num_hidden_layers (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint, num_of_initialize_group) if use_tpu: def tpu_scaffold(): for gid in range(num_of_initialize_group): tf.logging.info("initialize the %dth layer", gid) tf.logging.info(assignment_map[gid]) tf.train.init_from_checkpoint(init_checkpoint, assignment_map[gid]) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: for gid in range(num_of_initialize_group): tf.logging.info("initialize the %dth layer", gid) tf.logging.info(assignment_map[gid]) tf.train.init_from_checkpoint(init_checkpoint, assignment_map[gid]) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu, optimizer, poly_power, start_warmup_step) logging_hook = tf.train.LoggingTensorHook({"loss": total_loss}, every_n_iter=10) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook], scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(*args): """Computes the loss and accuracy of the model.""" (masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, sentence_order_example_loss, sentence_order_log_probs, sentence_order_labels) = args[:7] masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) metrics = { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, } sentence_order_log_probs = tf.reshape( sentence_order_log_probs, [-1, sentence_order_log_probs.shape[-1]]) sentence_order_predictions = tf.argmax( sentence_order_log_probs, axis=-1, output_type=tf.int32) sentence_order_labels = tf.reshape(sentence_order_labels, [-1]) sentence_order_accuracy = tf.metrics.accuracy( labels=sentence_order_labels, predictions=sentence_order_predictions) sentence_order_mean_loss = tf.metrics.mean( values=sentence_order_example_loss) metrics.update({ "sentence_order_accuracy": sentence_order_accuracy, "sentence_order_loss": sentence_order_mean_loss }) return metrics metric_values = [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, sentence_order_example_loss, sentence_order_log_probs, sentence_order_labels ] eval_metrics = (metric_fn, metric_values) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def __init__(self,is_training): # Training or not self.is_training = is_training # Placeholder self.input_ids = tf.placeholder(tf.int32, shape=[None, hp.sequence_length], name='input_ids') self.input_masks = tf.placeholder(tf.int32, shape=[None, hp.sequence_length], name='input_masks') self.segment_ids = tf.placeholder(tf.int32, shape=[None, hp.sequence_length], name='segment_ids') self.label_ids = tf.placeholder(tf.float32, shape=[None,hp.num_labels], name='label_ids') # Load BERT model self.model = modeling.AlbertModel( config=bert_config, is_training=self.is_training, input_ids=self.input_ids, input_mask=self.input_masks, token_type_ids=self.segment_ids, use_one_hot_embeddings=False) # Get the feature vector by BERT output_layer = self.model.get_pooled_output() print("***********************") print(output_layer.shape) print("****************************") print(self.model.get_sequence_output().shape) # Hidden size hidden_size = output_layer.shape[-1].value with tf.name_scope("Full-connection"): output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer()) logits = tf.nn.bias_add(tf.matmul(output_layer, output_weights, transpose_b=True), output_bias) # Prediction sigmoid(Multi-label) self.probabilities = tf.nn.sigmoid(logits) with tf.variable_scope("Prediction"): # Prediction zero = tf.zeros_like(self.probabilities) one = tf.ones_like(self.probabilities) self.predictions = tf.where(self.probabilities < 0.5, x=zero, y=one) with tf.variable_scope("loss"): # Summary for tensorboard if self.is_training: self.accuracy = tf.reduce_mean(tf.to_float(tf.equal(self.predictions, self.label_ids))) tf.summary.scalar('accuracy', self.accuracy) # Initial embedding by BERT ckpt = tf.train.get_checkpoint_state(hp.saved_model_path) checkpoint_suffix = ".index" if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path + checkpoint_suffix): print('='*10,'Restoring model from checkpoint!','='*10) print("%s - Restoring model from checkpoint ~%s" % (time_now_string(), ckpt.model_checkpoint_path)) else: print('='*10,'First time load BERT model!','='*10) tvars = tf.trainable_variables() if hp.init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, hp.init_checkpoint) tf.train.init_from_checkpoint(hp.init_checkpoint, assignment_map) # Loss and Optimizer if self.is_training: # Global_step self.global_step = tf.Variable(0, name='global_step', trainable=False) per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.label_ids,logits=logits) self.loss = tf.reduce_mean(per_example_loss) # Optimizer BERT train_examples = processor.get_train_examples(hp.data_dir) num_train_steps = int( len(train_examples) / hp.batch_size * hp.num_train_epochs) #num_train_steps = 10000 num_warmup_steps = int(num_train_steps * hp.warmup_proportion) print('num_train_steps',num_train_steps) self.optimizer = optimization.create_optimizer(self.loss, hp.learning_rate, num_train_steps, num_warmup_steps, hp.use_tpu, Global_step=self.global_step) # Summary for tensorboard tf.summary.scalar('loss', self.loss) self.merged = tf.summary.merge_all()
def __init__(self, is_training): self.is_training = is_training self.input_ids = tf.compat.v1.placeholder( tf.int32, shape=[None, hp.sequence_length], name='input_ids') self.input_masks = tf.compat.v1.placeholder( tf.int32, shape=[None, hp.sequence_length], name='input_masks') self.segment_ids = tf.compat.v1.placeholder( tf.int32, shape=[None, hp.sequence_length], name='segment_ids') self.label_ids = tf.compat.v1.placeholder(tf.int32, shape=[None], name='label_ids') # Load BERT Pre-training LM self.model = modeling.AlbertModel(config=bert_config, is_training=self.is_training, input_ids=self.input_ids, input_mask=self.input_masks, token_type_ids=self.segment_ids, use_one_hot_embeddings=False) # Get the feature vector with size 3D:(batch_size,sequence_length,hidden_size) output_layer_init = self.model.get_sequence_output() # Cell textcnn output_layer = cell_textcnn(output_layer_init, self.is_training) # Hidden size #hidden_size = output_layer.shape[-1].value hidden_size = output_layer.shape[-1] # Dense with tf.name_scope("Full-connection"): output_weights = tf.compat.v1.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.compat.v1.truncated_normal_initializer( stddev=0.02)) output_bias = tf.compat.v1.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer()) # Logit logits = tf.matmul(output_layer, output_weights, transpose_b=True) self.logits = tf.nn.bias_add(logits, output_bias) print('logits: ', self.logits) self.probabilities = tf.nn.softmax(self.logits, axis=-1) # Prediction with tf.compat.v1.variable_scope("Prediction"): self.preds = tf.argmax(self.logits, axis=-1, output_type=tf.int32) print('preds:', self.preds) # Summary for tensorboard with tf.compat.v1.variable_scope("Loss"): if self.is_training: self.accuracy = tf.reduce_mean( tf.compat.v1.to_float(tf.equal(self.preds, self.label_ids))) tf.summary.scalar('Accuracy', self.accuracy) # Check whether has loaded model ckpt = tf.train.get_checkpoint_state(hp.saved_model_path) checkpoint_suffix = ".index" if ckpt and tf.compat.v1.gfile.Exists(ckpt.model_checkpoint_path + checkpoint_suffix): print('=' * 10, 'Restoring model from checkpoint!', '=' * 10) print("%s - Restoring model from checkpoint ~%s" % (time_now_string(), ckpt.model_checkpoint_path)) else: # Load BERT Pre-training LM print('=' * 10, 'First time load BERT model!', '=' * 10) tvars = tf.compat.v1.trainable_variables() if hp.init_checkpoint: (assignment_map, initialized_variable_names) = \ modeling.get_assignment_map_from_checkpoint(tvars, hp.init_checkpoint) tf.compat.v1.train.init_from_checkpoint( hp.init_checkpoint, assignment_map) # Optimization if self.is_training: # Global_step self.global_step = tf.Variable(0, name='global_step', trainable=False) # Loss log_probs = tf.nn.log_softmax(self.logits, axis=-1) #预测的结果 one_hot_labels = tf.one_hot( self.label_ids, depth=num_labels, dtype=tf.float32) #标签的onehot(用于后续做loss和acc) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) self.loss = tf.reduce_mean(per_example_loss) # Optimizer train_examples = processor.get_train_examples(hp.data_dir) num_train_steps = int( len(train_examples) / hp.batch_size * hp.num_train_epochs) num_warmup_steps = int(num_train_steps * hp.warmup_proportion) print('num_train_steps', num_train_steps) self.optimizer = optimization.create_optimizer( self.loss, hp.learning_rate, num_train_steps, num_warmup_steps, hp.use_tpu, ) # Summary for tensorboard tf.summary.scalar('loss', self.loss) testvalue = tf.compat.v1.summary.merge_all() self.merged = tf.compat.v1.summary.merge_all() # Compte the parameters count_model_params() vs = tf.compat.v1.trainable_variables() for l in vs: print(l) print('=' * 40)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] hydrophobicities = features["hydrophobicities"] solubilities = features["solubilities"] charges = features["charges"] pks = features["pks"] masked_lm_weights = features["masked_lm_weights"] hydrophobicity_weights = features["hydrophobicity_weights"] solubility_weights = features["solubility_weights"] charge_weights = features["charge_weights"] pk_weights = features["pk_weights"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) k = albert_config.k #!NOTE: this is the length of the kmer model = modeling.AlbertModel( config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output(albert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) if do_hydro: (hydrophobicity_loss, hydrophobicity_example_loss, hydrophobicity_log_probs) = get_hydrophobicity_output( albert_config, model.get_sequence_output(), masked_lm_positions, hydrophobicities, hydrophobicity_weights) else: (hydrophobicity_loss, hydrophobicity_example_loss, hydrophobicity_log_probs) = (0, 0, None) if do_charge: (charge_loss, charge_example_loss, charge_log_probs) = get_charge_output( albert_config, model.get_sequence_output(), masked_lm_positions, charges, charge_weights) else: (charge_loss, charge_example_loss, charge_log_probs) = (0, 0, None) if do_pks: (pk_loss, pk_example_loss, pk_log_probs) = get_pk_output( albert_config, model.get_sequence_output(), masked_lm_positions, pks, pk_weights) else: (pk_loss, pk_example_loss, pk_log_probs) = (0, 0, None) if do_solubility: (solubility_loss, solubility_example_loss, solubility_log_probs) = get_solubility_output( albert_config, model.get_sequence_output(), masked_lm_positions, solubilities, solubility_weights) else: (solubility_loss, solubility_example_loss, solubility_log_probs) = (0, 0, None) total_loss = masked_lm_loss + hydrophobicity_loss + charge_loss + pk_loss + solubility_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: tf.logging.info("number of hidden group %d to initialize", albert_config.num_hidden_groups) num_of_initialize_group = 1 if FLAGS.init_from_group0: num_of_initialize_group = albert_config.num_hidden_groups if albert_config.net_structure_type > 0: num_of_initialize_group = albert_config.num_hidden_layers (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint, num_of_initialize_group) if use_tpu: def tpu_scaffold(): for gid in range(num_of_initialize_group): tf.logging.info("initialize the %dth layer", gid) tf.logging.info(assignment_map[gid]) tf.train.init_from_checkpoint(init_checkpoint, assignment_map[gid]) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: for gid in range(num_of_initialize_group): tf.logging.info("initialize the %dth layer", gid) tf.logging.info(assignment_map[gid]) tf.train.init_from_checkpoint(init_checkpoint, assignment_map[gid]) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu, optimizer, poly_power, start_warmup_step) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(*args): """Computes the loss and accuracy of the model.""" (masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, hydrophobicity_example_loss, hydrophobicity_log_probs, hydrophobicities, hydrophobicity_weights, charge_example_loss, charge_log_probs, charges, charge_weights, pk_example_loss, pk_log_probs, pks, pk_weights, solubility_example_loss, solubility_log_probs, solubilities, solubility_weights) = args[:20] masked_lm_log_probs = tf.reshape(masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax( masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) if do_hydro: hydrophobicity_log_probs = tf.reshape(hydrophobicity_log_probs, [-1, hydrophobicity_log_probs.shape[-1]]) hydrophobicity_predictions = tf.argmax( hydrophobicity_log_probs, axis=-1, output_type=tf.int32) hydrophobicity_example_loss = tf.reshape(hydrophobicity_example_loss, [-1]) hydrophobicities = tf.reshape(hydrophobicities, [-1]) hydrophobicity_weights = tf.reshape(hydrophobicity_weights, [-1]) hydrophobicity_accuracy = tf.metrics.accuracy( labels=hydrophobicities, predictions=hydrophobicity_predictions, weights=hydrophobicity_weights) hydrophobicity_mean_loss = tf.metrics.mean( values=hydrophobicity_example_loss, weights=hydrophobicity_weights) else: hydrophobicity_accuracy = 0 hydrophobicity_mean_loss = 0 if do_charge: charge_log_probs = tf.reshape(charge_log_probs, [-1, charge_log_probs.shape[-1]]) charge_predictions = tf.argmax( charge_log_probs, axis=-1, output_type=tf.int32) charge_example_loss = tf.reshape(charge_example_loss, [-1]) charges = tf.reshape(charges, [-1]) charge_weights = tf.reshape(charge_weights, [-1]) charge_accuracy = tf.metrics.accuracy( labels=charges, predictions=charge_predictions, weights=charge_weights) charge_mean_loss = tf.metrics.mean( values=charge_example_loss, weights=charge_weights) else: charge_accuracy = 0 charge_mean_loss = 0 if do_pks: pk_log_probs = tf.reshape(pk_log_probs, [-1, pk_log_probs.shape[-1]]) pk_predictions = tf.argmax( pk_log_probs, axis=-1, output_type=tf.int32) pk_example_loss = tf.reshape(pk_example_loss, [-1]) pks = tf.reshape(pks, [-1]) pk_weights = tf.reshape(pk_weights, [-1]) pk_accuracy = tf.metrics.accuracy( labels=pks, predictions=pk_predictions, weights=pk_weights) pk_mean_loss = tf.metrics.mean( values=pk_example_loss, weights=pk_weights) else: pk_accuracy = 0 pk_mean_loss = 0 if do_solubility: solubility_log_probs = tf.reshape(solubility_log_probs, [-1, solubility_log_probs.shape[-1]]) solubility_predictions = tf.argmax( solubility_log_probs, axis=-1, output_type=tf.int32) solubility_example_loss = tf.reshape(solubility_example_loss, [-1]) solubilities = tf.reshape(solubilities, [-1]) solubility_weights = tf.reshape(solubility_weights, [-1]) solubility_accuracy = tf.metrics.accuracy( labels=solubilities, predictions=solubility_predictions, weights=solubility_weights) solubility_mean_loss = tf.metrics.mean( values=solubility_example_loss, weights=solubility_weights) else: solubility_accuracy = 0 solubility_mean_loss = 0 metrics = { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "hydrophobicity_accuracy": hydrophobicity_accuracy, "hydrophobicity_loss": hydrophobicity_mean_loss, "charge_accuracy": charge_accuracy, "charge_loss": charge_mean_loss, "pk_accuracy": pk_accuracy, "pk_loss": pk_mean_loss, "solubility_accuracy": solubility_accuracy, "solubility_loss": solubility_mean_loss } return metrics metric_values = [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, hydrophobicity_example_loss, hydrophobicity_log_probs, hydrophobicities, hydrophobicity_weights, charge_example_loss, charge_log_probs, charges, charge_weights, pk_example_loss, pk_log_probs, pks, pk_weights, solubility_example_loss, solubility_log_probs, solubilities, solubility_weights ] eval_metrics = (metric_fn, metric_values) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec