def __init__(self, embedding_dim, hidden_dim, dropout_rate, vocab: Vocabulary, *args, **kwargs): super().__init__(vocab=vocab, *args, **kwargs) _vocab_size = self._vocab.get_vocab_size("text") _intent_size = self._vocab.get_vocab_size("intent") _slot_size = self._vocab.get_vocab_size("tags") self.embedding = tf.keras.layers.Embedding(_vocab_size, embedding_dim, mask_zero=True) self.bi_lstm = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(hidden_dim, return_sequences=True)) self.attention_layer = SelfAttentionLayer(hidden_dim=1024, output_dim=128, dropout_rate=dropout_rate) self.dropout1 = tf.keras.layers.Dropout(rate=dropout_rate) self.concat = tf.keras.layers.Concatenate() self.intent_decoder_cell = tf.keras.layers.LSTMCell(units=64) self.slot_decoder_cell = tf.keras.layers.LSTMCell(units=64) self.intent_decoder_dropout = tf.keras.layers.Dropout( rate=dropout_rate) self.slot_decoder_dropout = tf.keras.layers.Dropout(rate=dropout_rate) self.intent_liner_layer = tf.keras.layers.Dense(units=_intent_size) self.slot_liner_layer = tf.keras.layers.Dense(units=_slot_size) self.intent_embedding = tf.keras.layers.Embedding(_intent_size, 8) self.slot_embedding = tf.keras.layers.Embedding(_slot_size, 32) self._intent_loss = TokenClassificationLoss() self._slot_loss = TokenClassificationLoss()
def __init__(self, embedding_dim, hidden_dim, dropout_rate, vocab: Vocabulary, *args, **kwargs): super().__init__(vocab=vocab, *args, **kwargs) _vocab_size = self._vocab.get_vocab_size("text") _intent_size = self._vocab.get_vocab_size("intent") _slot_size = self._vocab.get_vocab_size("tags") self.embedding = tf.keras.layers.Embedding(_vocab_size, embedding_dim, mask_zero=True) self.bi_lstm = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(hidden_dim, return_sequences=True, return_state=True)) self.dropout = tf.keras.layers.Dropout(rate=dropout_rate) self.slot_gated_attention = SlotGatedAttention(attn_size=2 * hidden_dim, remove_slot_attn=False) # TODO 添加remove_slot_attn 版本 self.v = self.add_weight(name="v", shape=(2 * hidden_dim, ), initializer="glorot_uniform") self.intent_liner_layer = tf.keras.layers.Dense(2 * hidden_dim) self.intent_output_dense = tf.keras.layers.Dense(_intent_size) self.slot_output_dense = tf.keras.layers.Dense(_slot_size) self.intent_loss = SequenceClassificationLoss() self.slot_loss = TokenClassificationLoss()
def __init__(self, pretrained_layer: TransformersPretrainedLayer, dropout_rate, vocab: Vocabulary, *args, **kwargs): super().__init__(vocab=vocab, *args, **kwargs) # _vocab_size = self._vocab.get_vocab_size("text") _intent_size = self._vocab.get_vocab_size("intent") _slot_size = self._vocab.get_vocab_size("tags") self.pretrained_layer = pretrained_layer self.dropout1 = tf.keras.layers.Dropout(rate=dropout_rate) self.dropout2 = tf.keras.layers.Dropout(rate=dropout_rate) self.intent_output_dense = tf.keras.layers.Dense(_intent_size) self.slot_output_dense = tf.keras.layers.Dense(_slot_size) self.intent_loss = SequenceClassificationLoss() self.slot_loss = TokenClassificationLoss()
def __init__(self, embedding_dim, hidden_dim, dropout_rate, vocab: Vocabulary, priority_order: str = "slot_first", iteration_num: int = 1, use_crf: bool = False, *args, **kwargs): super().__init__(vocab=vocab, *args, **kwargs) _vocab_size = self._vocab.get_vocab_size("text") _intent_size = self._vocab.get_vocab_size("intent") _slot_size = self._vocab.get_vocab_size("tags") self._use_crf = use_crf self.embedding = tf.keras.layers.Embedding(input_dim=_vocab_size, output_dim=embedding_dim, mask_zero=True) self.bi_lstm = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(hidden_dim, return_sequences=True, return_state=True)) self.dropout = tf.keras.layers.Dropout(rate=dropout_rate) self.slot_gated_attention = SlotGatedAttention(attn_size=2 * hidden_dim, remove_slot_attn=False) self.sf_id_subnet_stack = [ SfIdSubnet(attn_size=2 * hidden_dim, priority_order=priority_order, id=i, iteration_num=iteration_num) for i in range(iteration_num) ] self.intent_output_dense = tf.keras.layers.Dense(_intent_size) self.slot_output_dense = tf.keras.layers.Dense(_slot_size) self.iteration_num = iteration_num self.intent_loss = SequenceClassificationLoss() if use_crf: self.crf = tfa.layers.CRF(_slot_size, use_kernel=False) self.slot_loss = CrfLoss() else: self.slot_loss = TokenClassificationLoss()
class BertSlu(Model): def __init__(self, pretrained_layer: TransformersPretrainedLayer, dropout_rate, vocab: Vocabulary, *args, **kwargs): super().__init__(vocab=vocab, *args, **kwargs) # _vocab_size = self._vocab.get_vocab_size("text") _intent_size = self._vocab.get_vocab_size("intent") _slot_size = self._vocab.get_vocab_size("tags") self.pretrained_layer = pretrained_layer self.dropout1 = tf.keras.layers.Dropout(rate=dropout_rate) self.dropout2 = tf.keras.layers.Dropout(rate=dropout_rate) self.intent_output_dense = tf.keras.layers.Dense(_intent_size) self.slot_output_dense = tf.keras.layers.Dense(_slot_size) self.intent_loss = SequenceClassificationLoss() self.slot_loss = TokenClassificationLoss() def init_metrics(self) -> Dict[str, Metric]: return {"nlu_acc": NluAccMetric()} def call(self, input_ids, attention_mask, token_type_ids, intent_ids=None, tags_ids=None, training=True, mask=None) -> Dict: _bert_output = self.pretrained_layer(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, training=training) hidden_states, pooler = _bert_output.last_hidden_state, _bert_output.pooler_output pooler = self.dropout1(pooler, training=training) intent_logits = self.intent_output_dense(pooler) hidden_states = self.dropout2(hidden_states, training=training) slot_logits = self.slot_output_dense(hidden_states) output_dict = { "intent_logits": intent_logits, "slot_logits": slot_logits } if intent_ids is not None and tags_ids is not None: _intent_loss = self.intent_loss.compute_loss(y_true=intent_ids, y_pred=intent_logits) _slot_loss = self.slot_loss.compute_loss(y_true=tags_ids, y_pred=slot_logits) output_dict["loss"] = _intent_loss + _slot_loss self._metrics["nlu_acc"].update_state( y_true=[intent_ids, tags_ids], y_pred=[intent_logits, slot_logits]) return output_dict
class SfId(Model): def __init__(self, embedding_dim, hidden_dim, dropout_rate, vocab: Vocabulary, priority_order: str = "slot_first", iteration_num: int = 1, use_crf: bool = False, *args, **kwargs): super().__init__(vocab=vocab, *args, **kwargs) _vocab_size = self._vocab.get_vocab_size("text") _intent_size = self._vocab.get_vocab_size("intent") _slot_size = self._vocab.get_vocab_size("tags") self._use_crf = use_crf self.embedding = tf.keras.layers.Embedding(input_dim=_vocab_size, output_dim=embedding_dim, mask_zero=True) self.bi_lstm = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(hidden_dim, return_sequences=True, return_state=True)) self.dropout = tf.keras.layers.Dropout(rate=dropout_rate) self.slot_gated_attention = SlotGatedAttention(attn_size=2 * hidden_dim, remove_slot_attn=False) self.sf_id_subnet_stack = [ SfIdSubnet(attn_size=2 * hidden_dim, priority_order=priority_order, id=i, iteration_num=iteration_num) for i in range(iteration_num) ] self.intent_output_dense = tf.keras.layers.Dense(_intent_size) self.slot_output_dense = tf.keras.layers.Dense(_slot_size) self.iteration_num = iteration_num self.intent_loss = SequenceClassificationLoss() if use_crf: self.crf = tfa.layers.CRF(_slot_size, use_kernel=False) self.slot_loss = CrfLoss() else: self.slot_loss = TokenClassificationLoss() def init_metrics(self) -> Dict[str, Metric]: return { "nlu_acc": NluAccMetric(), "f1_score": SeqEvalF1Metric(label_map=self._vocab._index_to_token["tags"]) } def call(self, input_ids, intent_ids=None, tags_ids=None, mask=None, training=True) -> Dict: inputs = self.embedding(input_ids) hidden, forward_h, forward_c, backword_h, backword_c = self.bi_lstm( inputs) # (b, s, 2*e) (b, e) (b, e) (b, e) (b, e) hidden = self.dropout(hidden, training=training) final_state = tf.concat([forward_h, backword_h], axis=-1) # (b, 2*e) c_slot, c_intent = self.slot_gated_attention(hidden, final_state) for _id, _sf_id_subnet in enumerate(self.sf_id_subnet_stack): if _id == self.iteration_num - 1: slot_output, intent_output, r_intent, slot_reinforce_state = _sf_id_subnet( lstm_enc=hidden, final_state=final_state, c_slot=c_slot, c_intent=c_intent) else: r_intent, slot_reinforce_state = _sf_id_subnet( lstm_enc=hidden, final_state=final_state, c_slot=c_slot, c_intent=c_intent) c_slot = slot_reinforce_state c_intent = r_intent y_slot = self.slot_output_dense(slot_output) y_intent = self.intent_output_dense(intent_output) output_dict = {"intent_logits": y_intent, "slot_logits": y_slot} if self._use_crf: decoded_sequence, potentials, sequence_length, chain_kernel = self.crf( y_slot) output_dict["decoded_sequence"] = decoded_sequence if intent_ids is not None and tags_ids is not None: _intent_loss = self.intent_loss.compute_loss(y_true=intent_ids, y_pred=y_intent) if self._use_crf: _slot_loss = self.slot_loss.compute_loss( potentials, tags_ids, sequence_length, chain_kernel) else: _slot_loss = self.slot_loss.compute_loss(y_true=tags_ids, y_pred=y_slot) output_dict["loss"] = _intent_loss + _slot_loss slot_pred = decoded_sequence if self._use_crf else y_slot self._metrics["nlu_acc"].update_state( y_true=[intent_ids, tags_ids], y_pred=[y_intent, slot_pred]) if not training: self._metrics["f1_score"].update_state(y_true=tags_ids, y_pred=slot_pred) return output_dict
class StackPropagationSlu(Model): def __init__(self, embedding_dim, hidden_dim, dropout_rate, vocab: Vocabulary, *args, **kwargs): super().__init__(vocab=vocab, *args, **kwargs) _vocab_size = self._vocab.get_vocab_size("text") _intent_size = self._vocab.get_vocab_size("intent") _slot_size = self._vocab.get_vocab_size("tags") self.embedding = tf.keras.layers.Embedding(_vocab_size, embedding_dim, mask_zero=True) self.bi_lstm = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(hidden_dim, return_sequences=True)) self.attention_layer = SelfAttentionLayer(hidden_dim=1024, output_dim=128, dropout_rate=dropout_rate) self.dropout1 = tf.keras.layers.Dropout(rate=dropout_rate) self.concat = tf.keras.layers.Concatenate() self.intent_decoder_cell = tf.keras.layers.LSTMCell(units=64) self.slot_decoder_cell = tf.keras.layers.LSTMCell(units=64) self.intent_decoder_dropout = tf.keras.layers.Dropout( rate=dropout_rate) self.slot_decoder_dropout = tf.keras.layers.Dropout(rate=dropout_rate) self.intent_liner_layer = tf.keras.layers.Dense(units=_intent_size) self.slot_liner_layer = tf.keras.layers.Dense(units=_slot_size) self.intent_embedding = tf.keras.layers.Embedding(_intent_size, 8) self.slot_embedding = tf.keras.layers.Embedding(_slot_size, 32) self._intent_loss = TokenClassificationLoss() self._slot_loss = TokenClassificationLoss() def init_metrics(self) -> Dict[str, Metric]: return {"nlu_acc": NluAccMetric()} # @tf.function() def call(self, input_ids, intent_ids=None, tags_ids=None, mask=None, training=True): x = self.embedding(input_ids) # (b, s, e) x = self.dropout1(x, training=training) h = self.bi_lstm(x) # (b, s, 2e) c = self.attention_layer(h) # (b, s, 2e) e = self.concat([h, c]) # intent_decoder _intent_h_state, _intent_c_state = tf.zeros( [x.shape[0], 64]), tf.zeros([x.shape[0], 64]) # (b, 64) _slot_h_state, _slot_c_state = tf.zeros([x.shape[0], 64 ]), tf.zeros([x.shape[0], 64]) # (b, 64) # https://stackoverflow.com/questions/64567161/tensorflow-cannot-be-accessed-here-it-is-defined-in-another-function-or-code-b y_intent, y_slot = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True), tf.TensorArray( dtype=tf.float32, size=0, dynamic_size=True) # y_intent, y_slot = [], [] prev_intent_tensor = tf.zeros([x.shape[0], 8]) prev_slot_tensor = tf.zeros([x.shape[0], 32]) for i in tf.range(x.shape[1]): _hidden = e[:, i, :] _intent_hidden = tf.concat([_hidden, prev_intent_tensor], axis=-1) # 添加dropout _intent_hidden = self.intent_decoder_dropout(_intent_hidden, training=training) _intent_h_state, (_intent_h_state, _intent_c_state) = self.intent_decoder_cell( _intent_hidden, states=[_intent_h_state, _intent_c_state]) _h_intent_i = self.intent_liner_layer(_intent_h_state) y_intent = y_intent.write(i, _h_intent_i) # y_intent.append(_h_intent_i) prev_intent_tensor = self.intent_embedding( tf.argmax(_h_intent_i, axis=-1)) # slot_decoder _slot_hidden = tf.concat([_hidden, _h_intent_i, prev_slot_tensor], axis=-1) # 添加dropout _slot_hidden = self.slot_decoder_dropout(_slot_hidden, training=training) _slot_h_state, (_slot_h_state, _slot_c_state) = self.slot_decoder_cell( _slot_hidden, states=[_slot_h_state, _slot_c_state]) _h_slot_i = self.slot_liner_layer(_slot_h_state) y_slot = y_slot.write(i, _h_slot_i) # y_slot.append(_h_slot_i) prev_slot_tensor = self.slot_embedding( tf.argmax(_h_slot_i, axis=-1)) # 注意不可用reshape transpose与reshape结果是不一样的 # 错误写法: tf.reshape(y_intent.stack(), [x.shape[0], x.shape[1], -1]) y_intent = tf.transpose(y_intent.stack(), [1, 0, 2]) y_slot = tf.transpose(y_slot.stack(), [1, 0, 2]) o_intent = self.get_o_intent(intent_pred=y_intent, mask=x._keras_mask) output_dict = {"intent_logits": o_intent, "slot_logits": y_slot} if intent_ids is not None and tags_ids is not None: _intent_ids = tf.broadcast_to(intent_ids, tags_ids.shape) active_loss = tags_ids != -100 _intent_loss = self._intent_loss.compute_loss( y_true=tf.boolean_mask(_intent_ids, active_loss), y_pred=tf.boolean_mask(y_intent, active_loss)) _slot_loss = self._slot_loss.compute_loss(y_true=tags_ids, y_pred=y_slot) output_dict["loss"] = _intent_loss + _slot_loss self._metrics["nlu_acc"].update_state( y_true=[intent_ids, tags_ids], y_pred=[o_intent, y_slot]) return output_dict @staticmethod def get_o_intent(intent_pred, mask): mask = tf.cast(mask, dtype=tf.int32) o_intent = tf.argmax(intent_pred, axis=-1) seq_lengths = tf.reduce_sum(mask, axis=-1) # 取token_level_intent most_common 作为query intent # https://www.tensorflow.org/api_docs/python/tf/unique_with_counts def get_max_count_intent(_intent): _y, _idx, _count = tf.unique_with_counts(_intent) _intent = _y[tf.argmax(_count)] return [_intent] o_intent = tf.convert_to_tensor([ get_max_count_intent(o_intent[i][:seq_lengths[i]]) for i in range(len(seq_lengths)) ], dtype=tf.int32) return o_intent
class SlotGated(Model): def __init__(self, embedding_dim, hidden_dim, dropout_rate, vocab: Vocabulary, *args, **kwargs): super().__init__(vocab=vocab, *args, **kwargs) _vocab_size = self._vocab.get_vocab_size("text") _intent_size = self._vocab.get_vocab_size("intent") _slot_size = self._vocab.get_vocab_size("tags") self.embedding = tf.keras.layers.Embedding(_vocab_size, embedding_dim, mask_zero=True) self.bi_lstm = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(hidden_dim, return_sequences=True, return_state=True)) self.dropout = tf.keras.layers.Dropout(rate=dropout_rate) self.slot_gated_attention = SlotGatedAttention(attn_size=2 * hidden_dim, remove_slot_attn=False) # TODO 添加remove_slot_attn 版本 self.v = self.add_weight(name="v", shape=(2 * hidden_dim, ), initializer="glorot_uniform") self.intent_liner_layer = tf.keras.layers.Dense(2 * hidden_dim) self.intent_output_dense = tf.keras.layers.Dense(_intent_size) self.slot_output_dense = tf.keras.layers.Dense(_slot_size) self.intent_loss = SequenceClassificationLoss() self.slot_loss = TokenClassificationLoss() def init_metrics(self): return { "nlu_acc": NluAccMetric(), "f1_score": SeqEvalF1Metric(label_map=self._vocab._index_to_token["tags"]) } def call(self, input_ids, intent_ids=None, tags_ids=None, mask=None, training=True) -> Dict: inputs = self.embedding(input_ids) # b, s, e hidden, forward_h, forward_c, backword_h, backword_c = self.bi_lstm( inputs) # (b, s, 2*e) (b, e) (b, e) (b, e) (b, e) hidden = self.dropout(hidden, training=training) final_state = tf.concat([forward_h, backword_h], axis=-1) # (b, 2*e) c_slot, c_intent = self.slot_gated_attention(hidden, final_state) # (b, s, 2*e) (b, 2*e) # formula(6) in paper: g = \sum(v * tanh(C_slot + W * C_intent)) _c_intent = tf.expand_dims(c_intent, axis=1) _c_intent = tf.broadcast_to(_c_intent, c_slot.shape) _c_intent = self.intent_liner_layer(_c_intent) # (b, s, 2*e) g = self.v * tf.nn.tanh(c_slot + _c_intent) g = tf.reduce_sum(g, axis=-1) # (b, s) g = tf.expand_dims(g, axis=-1) # formula(7) in paper: y^S_i = softmax(W_{hy}^S(h_i + c_i^S.g)) y_slot = self.slot_output_dense(hidden + c_slot * g) y_intent = self.intent_output_dense(final_state + c_intent) output_dict = {"intent_logits": y_intent, "slot_logits": y_slot} if intent_ids is not None and tags_ids is not None: _intent_loss = self.intent_loss.compute_loss(y_true=intent_ids, y_pred=y_intent) _slot_loss = self.slot_loss.compute_loss(y_true=tags_ids, y_pred=y_slot) output_dict["loss"] = _intent_loss + _slot_loss self._metrics["nlu_acc"].update_state( y_true=[intent_ids, tags_ids], y_pred=[y_intent, y_slot]) if not training: self._metrics["f1_score"].update_state(y_true=tags_ids, y_pred=y_slot) return output_dict