def built_model(self): bert_config = modeling.BertConfig.from_json_file(self.__bert_config_path) model = modeling.BertModel(config=bert_config, is_training=self.__is_training, input_ids=self.input_ids, input_mask=self.input_masks, token_type_ids=self.segment_ids, use_one_hot_embeddings=False) # 获取bert最后一层的输出 output_layer = model.get_sequence_output() hidden_size = output_layer.shape[-1].value if self.__is_training: output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) ner_model = BiLSTMCRF(embedded_chars=output_layer, hidden_sizes=self.__ner_hidden_sizes, layers=self.__ner_layers, keep_prob=self.keep_prob, num_labels=self.__num_classes, max_len=self.__max_len, labels=self.label_ids, sequence_lens=self.sequence_len, is_training=self.__is_training) self.loss, self.true_y, self.predictions = ner_model.construct_graph() with tf.name_scope('train_op'): self.train_op = optimization.create_optimizer( self.loss, self.__learning_rate, self.__num_train_step, self.__num_warmup_step, use_tpu=False)
def built_model(self): bert_config = modeling.BertConfig.from_json_file( self.__bert_config_path) model = modeling.BertModel(config=bert_config, is_training=self.__is_training, input_ids=self.input_ids, input_mask=self.input_masks, token_type_ids=self.segment_ids, use_one_hot_embeddings=False) output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value if self.__is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) with tf.name_scope("output"): output_weights = tf.get_variable( "output_weights", [self.__num_classes, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [self.__num_classes], initializer=tf.zeros_initializer()) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) self.predictions = tf.argmax(logits, axis=-1, name="predictions") if self.__is_training: with tf.name_scope("loss"): losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.label_ids) self.loss = tf.reduce_mean(losses, name="loss") with tf.name_scope('train_op'): self.train_op = optimization.create_optimizer( self.loss, self.__learning_rate, self.__num_train_step, self.__num_warmup_step, use_tpu=False)
def built_model(self): bert_config = modeling.BertConfig.from_json_file( self.__bert_config_path) model = modeling.BertModel(config=bert_config, is_training=self.__is_training, input_ids=self.concat_input_ids, input_mask=self.concat_input_masks, token_type_ids=self.concat_segment_ids, use_one_hot_embeddings=False) concat_output = model.get_pooled_output() output_a, output_b = tf.split(concat_output, [self.__batch_size] * 2, axis=0) # ------------------------------------------------------------------------------------------- # 余弦相似度 + 对比损失 # ------------------------------------------------------------------------------------------- with tf.name_scope("cosine_similarity"): # [batch_size] norm_a = tf.sqrt(tf.reduce_sum(tf.square(output_a), axis=-1)) # [batch_size] norm_b = tf.sqrt(tf.reduce_sum(tf.square(output_b), axis=-1)) # [batch_size] dot = tf.reduce_sum(tf.multiply(output_a, output_b), axis=-1) # [batch_size] norm = norm_a * norm_b # [batch_size] self.similarity = tf.div(dot, norm, name="similarity") self.predictions = tf.cast(tf.greater_equal( self.similarity, self.__neg_threshold), tf.int32, name="predictions") with tf.name_scope("loss"): # 预测为正例的概率 pred_pos_prob = tf.square((1 - self.similarity)) cond = (self.similarity > self.__neg_threshold) zeros = tf.zeros_like(self.similarity, dtype=tf.float32) pred_neg_prob = tf.where(cond, tf.square(self.similarity), zeros) self.label_ids = tf.cast(self.label_ids, dtype=tf.float32) losses = self.label_ids * pred_pos_prob + ( 1. - self.label_ids) * pred_neg_prob self.loss = tf.reduce_mean(losses, name="loss") # -------------------------------------------------------------------------------------------- # # 曼哈顿距离 + 二元交叉熵 # -------------------------------------------------------------------------------------------- # with tf.name_scope("manhattan_distance"): # man_distance = tf.reduce_sum(tf.abs(output_a - output_b), -1) # self.similarity = tf.exp(-man_distance) # self.predictions = tf.cast(tf.greater_equal(self.similarity, 0.5), tf.int32, name="predictions") # # with tf.name_scope("loss"): # losses = self.label_ids * tf.log(self.similarity) + (1 - self.label_ids) * tf.log(1 - self.similarity) # self.loss = tf.reduce_mean(-losses, name="loss") with tf.name_scope('train_op'): self.train_op = optimization.create_optimizer( self.loss, self.__learning_rate, self.__num_train_step, self.__num_warmup_step, use_tpu=False)
def built_model(self): bert_config = modeling.BertConfig.from_json_file( self.__bert_config_path) model = modeling.BertModel(config=bert_config, is_training=self.__is_training, input_ids=self.input_ids, input_mask=self.input_masks, token_type_ids=self.segment_ids, use_one_hot_embeddings=False) final_hidden = model.get_sequence_output() final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3) seq_length = final_hidden_shape[1] hidden_size = final_hidden_shape[2] with tf.name_scope("output"): output_weights = tf.get_variable( "output_weights", [2, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [2], initializer=tf.zeros_initializer()) final_hidden_matrix = tf.reshape(final_hidden, [-1, hidden_size]) logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [-1, seq_length, 2]) logits = tf.transpose(logits, [2, 0, 1]) unstacked_logits = tf.unstack(logits, axis=0) # [batch_size, seq_length] start_logits, end_logits = (unstacked_logits[0], unstacked_logits[1]) self.start_logits = start_logits self.end_logits = end_logits if self.__is_training: with tf.name_scope("loss"): start_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=start_logits, labels=self.start_position) end_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=end_logits, labels=self.end_position) losses = tf.concat([start_losses, end_losses], axis=0) self.loss = tf.reduce_mean(losses, name="loss") with tf.name_scope('train_op'): self.train_op = optimization.create_optimizer( self.loss, self.__learning_rate, self.__num_train_step, self.__num_warmup_step, use_tpu=False)
def built_model(self): bert_config = modeling.BertConfig.from_json_file( self.__bert_config_path) model = modeling.BertModel(config=bert_config, is_training=self.__is_training, input_ids=self.concat_input_ids, input_mask=self.concat_input_masks, token_type_ids=self.concat_segment_ids, use_one_hot_embeddings=False) concat_output = model.get_pooled_output() output_a, output_b = tf.split( concat_output, [self.__batch_size, self.__batch_size * self.__num_samples], axis=0) with tf.name_scope("reshape_output_b"): # batch_size 个tensor:[neg_samples, hidden_size] split_output_b = tf.split(output_b, [self.__num_samples] * self.__batch_size, axis=0) # batch_size 个tensor: [1, neg_samples, hidden_size] expand_output_b = [ tf.expand_dims(tensor, 0) for tensor in split_output_b ] # [batch_size, num_samples, hidden_size] reshape_output_b = tf.concat(expand_output_b, axis=0) with tf.name_scope("cosine_similarity"): # [batch_size, 1, hidden_size] expand_output_a = tf.expand_dims(output_a, 1) # [batch_size, 1] norm_a = tf.sqrt(tf.reduce_sum(tf.square(expand_output_a), -1)) # [batch_size, n_samples] norm_b = tf.sqrt(tf.reduce_sum(tf.square(reshape_output_b), -1)) # [batch_size, n_samples] dot = tf.reduce_sum(tf.multiply(expand_output_a, reshape_output_b), axis=-1) # [batch_size, n_samples] norm = norm_a * norm_b self.similarity = tf.div(dot, norm, name="similarity") self.predictions = tf.argmax(self.similarity, -1, name="predictions") with tf.name_scope("loss"): if self.__num_samples == 2: pos_similarity = tf.reshape( tf.slice(self.similarity, [0, 0], [self.__batch_size, 1]), [self.__batch_size]) neg_similarity = tf.reshape( tf.slice(self.similarity, [0, 1], [self.__batch_size, self.__num_samples - 1]), [self.__batch_size]) distance = self.__margin - pos_similarity + neg_similarity zeros = tf.zeros_like(distance, dtype=tf.float32) cond = (distance >= zeros) losses = tf.where(cond, distance, zeros) self.loss = tf.reduce_mean(losses, name="loss") else: pos_similarity = tf.exp( tf.reshape( tf.slice(self.similarity, [0, 0], [self.__batch_size, 1]), [self.__batch_size])) neg_similarity = tf.exp( tf.slice(self.similarity, [0, 1], [self.__batch_size, self.__num_samples - 1])) norm_seg_similarity = tf.reduce_sum(neg_similarity, axis=-1) pos_prob = tf.div(pos_similarity, norm_seg_similarity) self.loss = tf.reduce_mean(-tf.log(pos_prob), name="loss") with tf.name_scope('train_op'): self.train_op = optimization.create_optimizer( self.loss, self.__learning_rate, self.__num_train_step, self.__num_warmup_step, use_tpu=False)