def create_model(bert_config, is_training, input_ids, input_mask, num_labels, labels=None): model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, use_one_hot_embeddings=False) output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) probabilities = tf.nn.sigmoid(logits) if labels is None: return probabilities one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * logits, axis=-1) loss = tf.reduce_mean(per_example_loss) return loss, per_example_loss, logits, probabilities
def create_model(self, is_training=False, use_one_hot_embedding=True): """Create a classification moodel.""" model = modeling.BertModel( config=self.bert_config, is_training=is_training, input_ids=self.ids_placeholder, input_mask=self.mask_placeholder, token_type_ids=self.segment_placeholder, use_one_hot_embeddings=use_one_hot_embedding ) output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [self.num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02) ) output_bias = tf.get_variable("output_bias", [self.num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if self.is_training: output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) label_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels_placeholder, logits=logits) loss = tf.reduce_mean(label_loss) sigmoid_logits = tf.math.sigmoid(logits) return loss, logits, sigmoid_logits
def create_model(self, use_one_hot_embeddings=True): model = modeling.BertModel( config=self.bert_config, is_training=self.is_training, input_ids=self.ids_placeholder, input_mask=self.mask_placeholder, token_type_ids=self.segment_placeholder, use_one_hot_embeddings=use_one_hot_embeddings) output_layer = model.get_sequence_output() # output_layer shape is if self.is_training: output_layer = tf.keras.layers.Dropout(rate=0.1)(output_layer) logits = hidden2tag(output_layer, self.num_labels) # TODO test shape logits = tf.reshape(logits, [-1, FLAGS.max_seq_length, self.num_labels]) if FLAGS.crf: mask2len = tf.reduce_sum(self.mask_placeholder, axis=1) loss, trans = crf_loss(logits, self.labels_placeholder, self.mask_placeholder, self.num_labels, mask2len) predict, viterbi_score = tf.contrib.crf.crf_decode( logits, trans, mask2len) return loss, logits, predict else: loss, predict = softmax_layer(logits, self.labels_placeholder, self.num_labels, self.mask_placeholder) return loss, logits, predict
def create_model(bert_config, is_training, input_ids, input_mask, num_labels, filters, kernel_size, strides, pool_size, labels=None): model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, use_one_hot_embeddings=False) bert_embedding = model.get_all_encoder_layers()[-1] _, max_len, dim = bert_embedding.shape x_input = tf.reshape(bert_embedding, shape=[-1, max_len, dim, 1]) with tf.variable_scope("loss"): if is_training: dropout = 0.1 else: dropout = 0 conv = tf.layers.conv2d(x_input, filters, kernel_size=(kernel_size, dim), strides=strides, activation=tf.nn.relu) pool = tf.layers.max_pooling2d(conv, pool_size=(pool_size, 1), strides=strides) fc1 = tf.layers.flatten(pool, name="fc1") fc2 = tf.layers.dense(fc1, 128) fc2 = tf.layers.dropout(fc2, rate=dropout) out = tf.layers.dense(fc2, num_labels) probabilities = tf.nn.softmax(out, axis=-1) if labels is None: return probabilities log_probs = tf.nn.log_softmax(out, axis=-1) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) return loss, per_example_loss, out, probabilities
def model(self, input_ids, input_mask, seq_lens, labels): ''' 构建模型 :param input_ids: :param input_mask: :param seq_lens: :param labels: :return: ''' bert_config_file = os.path.join(self.bert_path, 'bert_config.json') bert_config = modeling.BertConfig.from_json_file(bert_config_file) bert_model = modeling.BertModel(config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, use_one_hot_embeddings=False) bert_embedding = bert_model.get_all_encoder_layers()[ self.encoder_layer] cell_fw = tf.nn.rnn_cell.LSTMCell(self.num_units) cell_fw = tf.nn.rnn_cell.DropoutWrapper(cell_fw, output_keep_prob=1 - self.dropout) cell_bw = tf.nn.rnn_cell.LSTMCell(self.num_units) cell_bw = tf.nn.rnn_cell.DropoutWrapper(cell_bw, output_keep_prob=1 - self.dropout) ((rnn_fw_outputs, rnn_bw_outputs), (rnn_fw_final_state, rnn_bw_final_state)) = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=bert_embedding[:, 1:-1, :], sequence_length=seq_lens, dtype=tf.float32) rnn_outputs = tf.add(rnn_fw_outputs, rnn_bw_outputs) logits_seq = tf.layers.dense(rnn_outputs, len(self.tag2label)) log_likelihood, transition_matrix = tf.contrib.crf.crf_log_likelihood( logits_seq, labels, seq_lens) preds_seq, crf_scores = tf.contrib.crf.crf_decode( logits_seq, transition_matrix, seq_lens) return preds_seq, log_likelihood
def model(self, input_ids, input_mask, seq_lens, labels): ''' 构建模型 :param input_ids: :param input_mask: :param seq_lens: :param labels: :return: ''' bert_config_file = os.path.join(self.bert_path, 'bert_config.json') bert_config = modeling.BertConfig.from_json_file(bert_config_file) bert_model = modeling.BertModel( config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, use_one_hot_embeddings=False) bert_embedding = bert_model.get_all_encoder_layers()[self.encoder_layer] logits_seq = tf.layers.dense(bert_embedding[:, 1:-1, :], len(self.tag2label)) log_likelihood, transition_matrix = tf.contrib.crf.crf_log_likelihood(logits_seq, labels, seq_lens) preds_seq, crf_scores = tf.contrib.crf.crf_decode(logits_seq, transition_matrix, seq_lens) return preds_seq, log_likelihood