def __init__(self, batch_size, n_class, ball_num, w_size, embedding_size, words_size, hidden_size, layer_size): self._inputs = tf.keras.layers.Input( shape=(w_size, ball_num), batch_size=batch_size, name="red_inputs" ) self._tag_indices = tf.keras.layers.Input( shape=(ball_num, ), batch_size=batch_size, dtype=tf.int32, name="red_tag_indices" ) self._sequence_length = tf.keras.layers.Input( shape=(), batch_size=batch_size, dtype=tf.int32, name="sequence_length" ) # 构建特征抽取 embedding = tf.keras.layers.Embedding(words_size, embedding_size)(self._inputs) first_lstm = tf.convert_to_tensor( [tf.keras.layers.LSTM(hidden_size)(embedding[:, :, i, :]) for i in range(ball_num)] ) first_lstm = tf.transpose(first_lstm, perm=[1, 0, 2]) second_lstm = None for _ in range(layer_size): second_lstm = tf.keras.layers.LSTM(hidden_size, return_sequences=True)(first_lstm) self._outputs = tf.keras.layers.Dense(n_class)(second_lstm) # 构建损失函数 self._log_likelihood, self._transition_params = crf_log_likelihood( self._outputs, self._tag_indices, self._sequence_length ) self._loss = tf.reduce_sum(-self._log_likelihood) # 构建预测 self._pred_sequence, self._viterbi_score = crf_decode( self._outputs, self._transition_params, self._sequence_length )
def compute_loss(self, x, y, sample_weight=None, training=True): viterbi_sequence, potentials, sequence_length, chain_kernel, logits = self(x, training=training) crf_loss = -crf_log_likelihood(potentials, y, sequence_length, chain_kernel)[0] #ds_loss = compute_dsc_loss(potentials, y, self.alpha) cc_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(y, logits) if sample_weight is not None: cc_loss = tf.reduce_sum(cc_loss*sample_weight)/tf.reduce_sum(sample_weight) return tf.reduce_mean(crf_loss), cc_loss
def crf_loss(self, input, tag_ids, sentence_len_list): likehood, self.trans_p = crf.crf_log_likelihood( inputs=input, tag_indices=tag_ids, transition_params=self.trans_p, sequence_lengths=sentence_len_list) loss = tf.reduce_mean(-likehood) return loss
def compute_loss(self, x, y, training=False): y_pred = self(x, training=training) _, potentials, sequence_length, chain_kernel = y_pred crf_loss = -crf_log_likelihood(potentials, y, sequence_length, chain_kernel)[0] return tf.reduce_mean(crf_loss), sum(self.losses)
def get_negative_log_likelihood(self, y_true): y_true = tf.cast(y_true, tf.int32) self.sequence_length = tf.cast(self.sequence_length, tf.int32) log_likelihood, _ = crf_log_likelihood(self.potentials, y_true, self.sequence_length, self.chain_kernel) return -log_likelihood
def loss(self, y_true, y_pred): y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype) log_likelihood, self.transitions = crf_log_likelihood( y_pred, tf.cast(tf.keras.backend.argmax(y_true), dtype=tf.int32), self.sequence_lengths, transition_params=self.transitions, ) return tf.reduce_mean(-log_likelihood)
def compute_loss(self, x, y, sample_weight, training=False): y_pred = self(x, training=training) viterbi_sequence, potentials, sequence_length, chain_kernel = y_pred # we now add the CRF loss: crf_loss = -crf_log_likelihood(potentials, y, sequence_length, chain_kernel)[0] if sample_weight is not None: crf_loss = crf_loss * sample_weight return viterbi_sequence, sequence_length, tf.reduce_mean(crf_loss)
def call(self, y_true, y_pred): label_sequences = tf.convert_to_tensor(y_true) decoded_sequences, potentials, sequence_lengths, chain_kernel = y_pred log_likelihood, _ = crf_log_likelihood( inputs=potentials, tag_indices=label_sequences, sequence_lengths=sequence_lengths, transition_params=chain_kernel) loss = -tf.reduce_mean(log_likelihood) return loss
def get_negative_log_likelihood(self, y_true): y_true = tf.cast(y_true, tf.int32) if len(K.int_shape(y_true)) == 3: y_true = K.argmax(y_true, axis=-1) self.sequence_length = tf.cast(self.sequence_length, tf.int32) log_likelihood, _ = crf_log_likelihood(self.potentials, y_true, self.sequence_length, self.chain_kernel) return -log_likelihood
def call(self, inputs, inputs_length, targets, training=None): if self.use_bert: embedding_inputs = inputs else: embedding_inputs = self.embedding(inputs) dropout_inputs = self.dropout(embedding_inputs, training) bilstm_outputs = self.bilstm(dropout_inputs) logits = self.dense(bilstm_outputs) tensor_targets = tf.convert_to_tensor(targets, dtype=tf.int64) log_likelihood, self.transition_params = crf_log_likelihood( logits, tensor_targets, inputs_length, transition_params=self.transition_params) return logits, log_likelihood, self.transition_params
def get_negative_log_likelihood(self, y_true): # covert 3D ont-hot encoded tensor to 2D tensor, which represents the matrxi # of tag indices for which we compute the log-likehood y_true = tf.keras.backend.argmax(y_true, axis=-1) y_true = tf.cast(y_true, tf.int32) self.sequence_length = tf.cast(self.sequence_length, tf.int32) log_likelihood, _ = crf_log_likelihood(self.potentials, y_true, self.sequence_length, self.chain_kernel) return -log_likelihood
def get_negative_log_likelihood(self, y_true): # TODO: remove typing cast self.potentials = tf.keras.backend.cast(self.potentials, tf.float32) y_true = tf.keras.backend.cast(y_true, tf.int32) self.sequence_length = tf.keras.backend.cast(self.sequence_length, tf.int32) # self.chain_kernel = tf.keras.backend.cast(self.chain_kernel, # tf.float32) log_likelihood, _ = crf_log_likelihood( self.potentials, y_true, self.sequence_length, self.chain_kernel) return -log_likelihood
def _compute_loss(self, x, y, sample_weight, training=False): y_pred, potentials, sequence_length, chain_kernel = self(x, training=True) crf_loss = -crf_log_likelihood(potentials, y, sequence_length, chain_kernel)[0] if sample_weight: crf_loss = crf_loss * sample_weight crf_loss = tf.reduce_mean(crf_loss) # loss = self.compiled_loss( # tf.cast(y, dtype=self.dtype), # tf.cast(y_pred, dtype=self.dtype), # sample_weight=sample_weight, # regularization_losses=self.losses) # return crf_loss, loss, y_pred return crf_loss, sum(self.losses), y_pred
def crf_loss(inputs, tag_indices, transition_params, lengths): """ parameters: inputs [B, L, N] lengths [B] tags [B, L, N] returns: loss """ sequence_log_likelihood, _ = crf_log_likelihood( inputs=inputs, tag_indices=tag_indices, sequence_lengths=lengths, transition_params=transition_params) loss = tf.reduce_mean(-sequence_log_likelihood) return loss
def loss(self, y_true, y_pred): """Computes the log-likelihood of tag sequences in a CRF. Args: y_true : A (batch_size, n_steps, n_classes) tensor. y_pred : A (batch_size, n_steps, n_classes) tensor. Returns: loss: A scalar containing the log-likelihood of the given sequence of tag indices. """ # y_true = tf.cast(tf.argmax(y_true, axis=-1), tf.int32) log_likelihood, self.transition_params = crf.crf_log_likelihood( y_pred, y_true, self.sequence_lengths, self.transition_params) loss = tf.reduce_mean(-log_likelihood) return loss
def return_crf_result(self, labels: tf.Tensor, logits: tf.Tensor, mode: str, input_mask: tf.Tensor): input_mask.set_shape([None, None]) logits = create_dummy_if_empty(logits) input_mask = create_dummy_if_empty(input_mask) viterbi_decoded, potentials, sequence_length, chain_kernel = self.crf( logits, input_mask) if mode != tf.estimator.ModeKeys.PREDICT: loss = -crf_log_likelihood(potentials, labels, sequence_length, chain_kernel)[0] loss = tf.reduce_mean(loss) loss = nan_loss_handling(loss) self.add_loss(loss) acc = self.metric_fn( labels, viterbi_decoded, sample_weight=input_mask) self.add_metric(acc) # make the crf prediction has the same shape as non-crf prediction return tf.one_hot(viterbi_decoded, name='%s_predict' % self.problem_name, depth=self.params.num_classes[self.problem_name])
def call(self, inputs, inputs_length, targets, training=None): if self.use_bert: if self.finetune: embedding_inputs = self.bert_model(inputs[0], attention_mask=inputs[1])[0] else: embedding_inputs = inputs else: embedding_inputs = self.embedding(inputs) outputs = self.dropout(embedding_inputs, training) if self.use_bilstm: outputs = self.bilstm(outputs) logits = self.dense(outputs) tensor_targets = tf.convert_to_tensor(targets, dtype=tf.int32) log_likelihood, self.transition_params = crf_log_likelihood( logits, tensor_targets, inputs_length, transition_params=self.transition_params) return logits, log_likelihood, self.transition_params
def compute_loss(self, inputs: TensorLike, tag_indices: TensorLike, sequence_lengths: TensorLike, transition_params: Optional[TensorLike]) -> tf.Tensor: log_likelihood, transition_params = crf_log_likelihood( inputs, tag_indices, sequence_lengths, transition_params) return -tf.reduce_mean(log_likelihood)