def built_model(self): bert_config = modeling.BertConfig.from_json_file(self.__bert_config_path) model = modeling.BertModel(config=bert_config, is_training=self.__is_training, input_ids=self.input_ids, input_mask=self.input_masks, token_type_ids=self.segment_ids, use_one_hot_embeddings=False) # 获取bert最后一层的输出 output_layer = model.get_sequence_output() hidden_size = output_layer.shape[-1].value if self.__is_training: output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) ner_model = BiLSTMCRF(embedded_chars=output_layer, hidden_sizes=self.__ner_hidden_sizes, layers=self.__ner_layers, keep_prob=self.keep_prob, num_labels=self.__num_classes, max_len=self.__max_len, labels=self.label_ids, sequence_lens=self.sequence_len, is_training=self.__is_training) self.loss, self.true_y, self.predictions = ner_model.construct_graph() with tf.name_scope('train_op'): self.train_op = optimization.create_optimizer( self.loss, self.__learning_rate, self.__num_train_step, self.__num_warmup_step, use_tpu=False)
def built_model(self): bert_config = modeling.BertConfig.from_json_file( self.__bert_config_path) model = modeling.BertModel(config=bert_config, is_training=self.__is_training, input_ids=self.input_ids, input_mask=self.input_masks, token_type_ids=self.segment_ids, use_one_hot_embeddings=False) output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value if self.__is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) with tf.name_scope("output"): output_weights = tf.get_variable( "output_weights", [self.__num_classes, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [self.__num_classes], initializer=tf.zeros_initializer()) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) if self.__num_classes == 1: self.predictions = tf.cast(tf.greater_equal(logits, 0.0), dtype=tf.int32, name="predictions") else: self.predictions = tf.argmax(logits, axis=-1, name="predictions") if self.__is_training: with tf.name_scope("loss"): if self.__num_classes == 1: losses = tf.nn.sigmoid_cross_entropy_with_logits( logits=tf.reshape(logits, [-1]), labels=tf.cast(self.label_ids, dtype=tf.float32)) else: losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.label_ids) self.loss = tf.reduce_mean(losses, name="loss") with tf.name_scope('train_op'): self.train_op = optimization.create_optimizer( self.loss, self.__learning_rate, self.__num_train_step, self.__num_warmup_step, use_tpu=False)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" guids = features["guids"] input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] jit_scope = tf.contrib.compiler.jit.experimental_jit_scope with jit_scope(): model = modeling.BertModel(config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids) if mode != tf.estimator.ModeKeys.PREDICT: raise ValueError("Only PREDICT modes are supported: %s" % (mode)) tvars = tf.trainable_variables() (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) all_layers = model.get_all_encoder_layers() predictions = { "guid": guids, } for (i, layer_index) in enumerate(layer_indexes): predictions["layer_output_%d" % i] = all_layers[layer_index] from tensorflow.estimator import EstimatorSpec output_spec = EstimatorSpec(mode=mode, predictions=predictions) return output_spec
def built_model(self): bert_config = modeling.BertConfig.from_json_file( self.__bert_config_path) model = modeling.BertModel(config=bert_config, is_training=self.__is_training, input_ids=self.concat_input_ids, input_mask=self.concat_input_masks, token_type_ids=self.concat_segment_ids, use_one_hot_embeddings=False) concat_output = model.get_pooled_output() output_a, output_b = tf.split(concat_output, [self.__batch_size] * 2, axis=0) # ------------------------------------------------------------------------------------------- # 余弦相似度 + 对比损失 # ------------------------------------------------------------------------------------------- with tf.name_scope("cosine_similarity"): # [batch_size] norm_a = tf.sqrt(tf.reduce_sum(tf.square(output_a), axis=-1)) # [batch_size] norm_b = tf.sqrt(tf.reduce_sum(tf.square(output_b), axis=-1)) # [batch_size] dot = tf.reduce_sum(tf.multiply(output_a, output_b), axis=-1) # [batch_size] norm = norm_a * norm_b # [batch_size] self.similarity = tf.div(dot, norm, name="similarity") self.predictions = tf.cast(tf.greater_equal( self.similarity, self.__neg_threshold), tf.int32, name="predictions") with tf.name_scope("loss"): # 预测为正例的概率 pred_pos_prob = tf.square((1 - self.similarity)) cond = (self.similarity > self.__neg_threshold) zeros = tf.zeros_like(self.similarity, dtype=tf.float32) pred_neg_prob = tf.where(cond, tf.square(self.similarity), zeros) self.label_ids = tf.cast(self.label_ids, dtype=tf.float32) losses = self.label_ids * pred_pos_prob + ( 1. - self.label_ids) * pred_neg_prob self.loss = tf.reduce_mean(losses, name="loss") # -------------------------------------------------------------------------------------------- # # 曼哈顿距离 + 二元交叉熵 # -------------------------------------------------------------------------------------------- # with tf.name_scope("manhattan_distance"): # man_distance = tf.reduce_sum(tf.abs(output_a - output_b), -1) # self.similarity = tf.exp(-man_distance) # self.predictions = tf.cast(tf.greater_equal(self.similarity, 0.5), tf.int32, name="predictions") # # with tf.name_scope("loss"): # losses = self.label_ids * tf.log(self.similarity) + (1 - self.label_ids) * tf.log(1 - self.similarity) # self.loss = tf.reduce_mean(-losses, name="loss") with tf.name_scope('train_op'): self.train_op = optimization.create_optimizer( self.loss, self.__learning_rate, self.__num_train_step, self.__num_warmup_step, use_tpu=False)
def built_model(self): bert_config = modeling.BertConfig.from_json_file( self.__bert_config_path) model = modeling.BertModel(config=bert_config, is_training=self.__is_training, input_ids=self.input_ids, input_mask=self.input_masks, token_type_ids=self.segment_ids, use_one_hot_embeddings=False) final_hidden = model.get_sequence_output() final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3) seq_length = final_hidden_shape[1] hidden_size = final_hidden_shape[2] with tf.name_scope("output"): output_weights = tf.get_variable( "output_weights", [2, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [2], initializer=tf.zeros_initializer()) final_hidden_matrix = tf.reshape(final_hidden, [-1, hidden_size]) logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [-1, seq_length, 2]) logits = tf.transpose(logits, [2, 0, 1]) unstacked_logits = tf.unstack(logits, axis=0) # [batch_size, seq_length] start_logits, end_logits = (unstacked_logits[0], unstacked_logits[1]) self.start_logits = start_logits self.end_logits = end_logits if self.__is_training: with tf.name_scope("loss"): start_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=start_logits, labels=self.start_position) end_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=end_logits, labels=self.end_position) losses = tf.concat([start_losses, end_losses], axis=0) self.loss = tf.reduce_mean(losses, name="loss") with tf.name_scope('train_op'): self.train_op = optimization.create_optimizer( self.loss, self.__learning_rate, self.__num_train_step, self.__num_warmup_step, use_tpu=False)
def built_model(self): bert_config = modeling.BertConfig.from_json_file( self.__bert_config_path) model = modeling.BertModel(config=bert_config, is_training=self.__is_training, input_ids=self.concat_input_ids, input_mask=self.concat_input_masks, token_type_ids=self.concat_segment_ids, use_one_hot_embeddings=False) concat_output = model.get_pooled_output() output_a, output_b = tf.split( concat_output, [self.__batch_size, self.__batch_size * self.__num_samples], axis=0) with tf.name_scope("reshape_output_b"): # batch_size 个tensor:[neg_samples, hidden_size] split_output_b = tf.split(output_b, [self.__num_samples] * self.__batch_size, axis=0) # batch_size 个tensor: [1, neg_samples, hidden_size] expand_output_b = [ tf.expand_dims(tensor, 0) for tensor in split_output_b ] # [batch_size, num_samples, hidden_size] reshape_output_b = tf.concat(expand_output_b, axis=0) with tf.name_scope("cosine_similarity"): # [batch_size, 1, hidden_size] expand_output_a = tf.expand_dims(output_a, 1) # [batch_size, 1] norm_a = tf.sqrt(tf.reduce_sum(tf.square(expand_output_a), -1)) # [batch_size, n_samples] norm_b = tf.sqrt(tf.reduce_sum(tf.square(reshape_output_b), -1)) # [batch_size, n_samples] dot = tf.reduce_sum(tf.multiply(expand_output_a, reshape_output_b), axis=-1) # [batch_size, n_samples] norm = norm_a * norm_b self.similarity = tf.div(dot, norm, name="similarity") self.predictions = tf.argmax(self.similarity, -1, name="predictions") with tf.name_scope("loss"): if self.__num_samples == 2: pos_similarity = tf.reshape( tf.slice(self.similarity, [0, 0], [self.__batch_size, 1]), [self.__batch_size]) neg_similarity = tf.reshape( tf.slice(self.similarity, [0, 1], [self.__batch_size, self.__num_samples - 1]), [self.__batch_size]) distance = self.__margin - pos_similarity + neg_similarity zeros = tf.zeros_like(distance, dtype=tf.float32) cond = (distance >= zeros) losses = tf.where(cond, distance, zeros) self.loss = tf.reduce_mean(losses, name="loss") else: pos_similarity = tf.exp( tf.reshape( tf.slice(self.similarity, [0, 0], [self.__batch_size, 1]), [self.__batch_size])) neg_similarity = tf.exp( tf.slice(self.similarity, [0, 1], [self.__batch_size, self.__num_samples - 1])) norm_seg_similarity = tf.reduce_sum(neg_similarity, axis=-1) pos_prob = tf.div(pos_similarity, norm_seg_similarity) self.loss = tf.reduce_mean(-tf.log(pos_prob), name="loss") with tf.name_scope('train_op'): self.train_op = optimization.create_optimizer( self.loss, self.__learning_rate, self.__num_train_step, self.__num_warmup_step, use_tpu=False)
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, token_label_ids, predicate_matrix_ids, num_token_labels, num_predicate_labels, use_one_hot_embeddings): """Creates a classification model.""" model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # We "pool" the model by simply taking the hidden state corresponding # to the first token. float Tensor of shape [batch_size, hidden_size] # model_pooled_output = model.get_pooled_output() # """Gets final hidden layer of encoder. # # Returns: # float Tensor of shape [batch_size, seq_length, hidden_size] corresponding # to the final hidden of the transformer encoder. # """ sequence_bert_encode_output = model.get_sequence_output() if is_training: sequence_bert_encode_output = tf.nn.dropout( sequence_bert_encode_output, keep_prob=0.9) with tf.variable_scope("predicate_head_select_loss"): bert_sequenc_length = sequence_bert_encode_output.shape[-2].value # shape [batch_size, sequence_length, sequencd_length, predicate_label_numbers] predicate_score_matrix = getHeadSelectionScores( encode_input=sequence_bert_encode_output, hidden_size_n1=100, label_number=num_predicate_labels) predicate_head_probabilities = tf.nn.sigmoid(predicate_score_matrix) # predicate_head_prediction = tf.argmax(predicate_head_probabilities, axis=3) predicate_head_predictions_round = tf.round( predicate_head_probabilities) predicate_head_predictions = tf.cast(predicate_head_predictions_round, tf.int32) # shape [batch_size, sequence_length, sequencd_length] predicate_matrix = tf.reshape( predicate_matrix_ids, [-1, bert_sequenc_length, bert_sequenc_length]) gold_predicate_matrix_one_hot = tf.one_hot(predicate_matrix, depth=num_predicate_labels, dtype=tf.float32) # shape [batch_size, sequence_length, sequencd_length, predicate_label_numbers] predicate_sigmoid_cross_entropy_with_logits = tf.nn.sigmoid_cross_entropy_with_logits( logits=predicate_score_matrix, labels=gold_predicate_matrix_one_hot) def batch_sequence_matrix_max_sequence_length(batch_sequence_matrix): """Get the longest effective length of the input sequence (excluding padding)""" mask = tf.math.logical_not(tf.math.equal(batch_sequence_matrix, 0)) mask = tf.cast(mask, tf.float32) mask_length = tf.reduce_sum(mask, axis=1) mask_length = tf.cast(mask_length, tf.int32) mask_max_length = tf.reduce_max(mask_length) return mask_max_length mask_max_length = batch_sequence_matrix_max_sequence_length( token_label_ids) predicate_sigmoid_cross_entropy_with_logits = predicate_sigmoid_cross_entropy_with_logits[:, : mask_max_length, : mask_max_length, :] # shape [] predicate_head_select_loss = tf.reduce_sum( predicate_sigmoid_cross_entropy_with_logits) with tf.variable_scope("token_label_loss"): bert_encode_hidden_size = sequence_bert_encode_output.shape[-1].value token_label_output_weight = tf.get_variable( "token_label_output_weights", [num_token_labels, bert_encode_hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) token_label_output_bias = tf.get_variable( "token_label_output_bias", [num_token_labels], initializer=tf.zeros_initializer()) sequence_bert_encode_output = tf.reshape(sequence_bert_encode_output, [-1, bert_encode_hidden_size]) token_label_logits = tf.matmul(sequence_bert_encode_output, token_label_output_weight, transpose_b=True) token_label_logits = tf.nn.bias_add(token_label_logits, token_label_output_bias) token_label_logits = tf.reshape( token_label_logits, [-1, FLAGS.max_seq_length, num_token_labels]) token_label_log_probs = tf.nn.log_softmax(token_label_logits, axis=-1) token_label_one_hot_labels = tf.one_hot(token_label_ids, depth=num_token_labels, dtype=tf.float32) token_label_per_example_loss = -tf.reduce_sum( token_label_one_hot_labels * token_label_log_probs, axis=-1) token_label_loss = tf.reduce_sum(token_label_per_example_loss) token_label_probabilities = tf.nn.softmax(token_label_logits, axis=-1) token_label_predictions = tf.argmax(token_label_probabilities, axis=-1) # return (token_label_loss, token_label_per_example_loss, token_label_logits, token_label_predict) loss = predicate_head_select_loss + token_label_loss return (loss, predicate_head_select_loss, predicate_head_probabilities, predicate_head_predictions, token_label_loss, token_label_per_example_loss, token_label_logits, token_label_predictions)