Exemple #1
0
    def __init__(self, embedding_dim, hidden_dim, dropout_rate,
                 vocab: Vocabulary, *args, **kwargs):
        super().__init__(vocab=vocab, *args, **kwargs)

        _vocab_size = self._vocab.get_vocab_size("text")
        _intent_size = self._vocab.get_vocab_size("intent")
        _slot_size = self._vocab.get_vocab_size("tags")

        self.embedding = tf.keras.layers.Embedding(_vocab_size,
                                                   embedding_dim,
                                                   mask_zero=True)
        self.bi_lstm = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(hidden_dim, return_sequences=True))
        self.attention_layer = SelfAttentionLayer(hidden_dim=1024,
                                                  output_dim=128,
                                                  dropout_rate=dropout_rate)
        self.dropout1 = tf.keras.layers.Dropout(rate=dropout_rate)

        self.concat = tf.keras.layers.Concatenate()

        self.intent_decoder_cell = tf.keras.layers.LSTMCell(units=64)
        self.slot_decoder_cell = tf.keras.layers.LSTMCell(units=64)
        self.intent_decoder_dropout = tf.keras.layers.Dropout(
            rate=dropout_rate)
        self.slot_decoder_dropout = tf.keras.layers.Dropout(rate=dropout_rate)

        self.intent_liner_layer = tf.keras.layers.Dense(units=_intent_size)
        self.slot_liner_layer = tf.keras.layers.Dense(units=_slot_size)

        self.intent_embedding = tf.keras.layers.Embedding(_intent_size, 8)
        self.slot_embedding = tf.keras.layers.Embedding(_slot_size, 32)
        self._intent_loss = TokenClassificationLoss()
        self._slot_loss = TokenClassificationLoss()
Exemple #2
0
    def __init__(self, embedding_dim, hidden_dim, dropout_rate,
                 vocab: Vocabulary, *args, **kwargs):

        super().__init__(vocab=vocab, *args, **kwargs)

        _vocab_size = self._vocab.get_vocab_size("text")
        _intent_size = self._vocab.get_vocab_size("intent")
        _slot_size = self._vocab.get_vocab_size("tags")

        self.embedding = tf.keras.layers.Embedding(_vocab_size,
                                                   embedding_dim,
                                                   mask_zero=True)
        self.bi_lstm = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(hidden_dim,
                                 return_sequences=True,
                                 return_state=True))
        self.dropout = tf.keras.layers.Dropout(rate=dropout_rate)
        self.slot_gated_attention = SlotGatedAttention(attn_size=2 *
                                                       hidden_dim,
                                                       remove_slot_attn=False)
        # TODO 添加remove_slot_attn 版本

        self.v = self.add_weight(name="v",
                                 shape=(2 * hidden_dim, ),
                                 initializer="glorot_uniform")
        self.intent_liner_layer = tf.keras.layers.Dense(2 * hidden_dim)

        self.intent_output_dense = tf.keras.layers.Dense(_intent_size)
        self.slot_output_dense = tf.keras.layers.Dense(_slot_size)
        self.intent_loss = SequenceClassificationLoss()
        self.slot_loss = TokenClassificationLoss()
Exemple #3
0
    def __init__(self, pretrained_layer: TransformersPretrainedLayer,
                 dropout_rate, vocab: Vocabulary, *args, **kwargs):
        super().__init__(vocab=vocab, *args, **kwargs)

        # _vocab_size = self._vocab.get_vocab_size("text")
        _intent_size = self._vocab.get_vocab_size("intent")
        _slot_size = self._vocab.get_vocab_size("tags")

        self.pretrained_layer = pretrained_layer
        self.dropout1 = tf.keras.layers.Dropout(rate=dropout_rate)
        self.dropout2 = tf.keras.layers.Dropout(rate=dropout_rate)

        self.intent_output_dense = tf.keras.layers.Dense(_intent_size)
        self.slot_output_dense = tf.keras.layers.Dense(_slot_size)

        self.intent_loss = SequenceClassificationLoss()
        self.slot_loss = TokenClassificationLoss()
Exemple #4
0
    def __init__(self,
                 embedding_dim,
                 hidden_dim,
                 dropout_rate,
                 vocab: Vocabulary,
                 priority_order: str = "slot_first",
                 iteration_num: int = 1,
                 use_crf: bool = False,
                 *args,
                 **kwargs):
        super().__init__(vocab=vocab, *args, **kwargs)

        _vocab_size = self._vocab.get_vocab_size("text")
        _intent_size = self._vocab.get_vocab_size("intent")
        _slot_size = self._vocab.get_vocab_size("tags")

        self._use_crf = use_crf
        self.embedding = tf.keras.layers.Embedding(input_dim=_vocab_size,
                                                   output_dim=embedding_dim,
                                                   mask_zero=True)
        self.bi_lstm = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(hidden_dim,
                                 return_sequences=True,
                                 return_state=True))
        self.dropout = tf.keras.layers.Dropout(rate=dropout_rate)
        self.slot_gated_attention = SlotGatedAttention(attn_size=2 *
                                                       hidden_dim,
                                                       remove_slot_attn=False)

        self.sf_id_subnet_stack = [
            SfIdSubnet(attn_size=2 * hidden_dim,
                       priority_order=priority_order,
                       id=i,
                       iteration_num=iteration_num)
            for i in range(iteration_num)
        ]
        self.intent_output_dense = tf.keras.layers.Dense(_intent_size)
        self.slot_output_dense = tf.keras.layers.Dense(_slot_size)
        self.iteration_num = iteration_num
        self.intent_loss = SequenceClassificationLoss()
        if use_crf:
            self.crf = tfa.layers.CRF(_slot_size, use_kernel=False)
            self.slot_loss = CrfLoss()
        else:
            self.slot_loss = TokenClassificationLoss()
Exemple #5
0
class BertSlu(Model):
    def __init__(self, pretrained_layer: TransformersPretrainedLayer,
                 dropout_rate, vocab: Vocabulary, *args, **kwargs):
        super().__init__(vocab=vocab, *args, **kwargs)

        # _vocab_size = self._vocab.get_vocab_size("text")
        _intent_size = self._vocab.get_vocab_size("intent")
        _slot_size = self._vocab.get_vocab_size("tags")

        self.pretrained_layer = pretrained_layer
        self.dropout1 = tf.keras.layers.Dropout(rate=dropout_rate)
        self.dropout2 = tf.keras.layers.Dropout(rate=dropout_rate)

        self.intent_output_dense = tf.keras.layers.Dense(_intent_size)
        self.slot_output_dense = tf.keras.layers.Dense(_slot_size)

        self.intent_loss = SequenceClassificationLoss()
        self.slot_loss = TokenClassificationLoss()

    def init_metrics(self) -> Dict[str, Metric]:
        return {"nlu_acc": NluAccMetric()}

    def call(self,
             input_ids,
             attention_mask,
             token_type_ids,
             intent_ids=None,
             tags_ids=None,
             training=True,
             mask=None) -> Dict:
        _bert_output = self.pretrained_layer(input_ids=input_ids,
                                             attention_mask=attention_mask,
                                             token_type_ids=token_type_ids,
                                             training=training)
        hidden_states, pooler = _bert_output.last_hidden_state, _bert_output.pooler_output
        pooler = self.dropout1(pooler, training=training)
        intent_logits = self.intent_output_dense(pooler)

        hidden_states = self.dropout2(hidden_states, training=training)
        slot_logits = self.slot_output_dense(hidden_states)

        output_dict = {
            "intent_logits": intent_logits,
            "slot_logits": slot_logits
        }
        if intent_ids is not None and tags_ids is not None:
            _intent_loss = self.intent_loss.compute_loss(y_true=intent_ids,
                                                         y_pred=intent_logits)
            _slot_loss = self.slot_loss.compute_loss(y_true=tags_ids,
                                                     y_pred=slot_logits)
            output_dict["loss"] = _intent_loss + _slot_loss

            self._metrics["nlu_acc"].update_state(
                y_true=[intent_ids, tags_ids],
                y_pred=[intent_logits, slot_logits])
        return output_dict
Exemple #6
0
class SfId(Model):
    def __init__(self,
                 embedding_dim,
                 hidden_dim,
                 dropout_rate,
                 vocab: Vocabulary,
                 priority_order: str = "slot_first",
                 iteration_num: int = 1,
                 use_crf: bool = False,
                 *args,
                 **kwargs):
        super().__init__(vocab=vocab, *args, **kwargs)

        _vocab_size = self._vocab.get_vocab_size("text")
        _intent_size = self._vocab.get_vocab_size("intent")
        _slot_size = self._vocab.get_vocab_size("tags")

        self._use_crf = use_crf
        self.embedding = tf.keras.layers.Embedding(input_dim=_vocab_size,
                                                   output_dim=embedding_dim,
                                                   mask_zero=True)
        self.bi_lstm = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(hidden_dim,
                                 return_sequences=True,
                                 return_state=True))
        self.dropout = tf.keras.layers.Dropout(rate=dropout_rate)
        self.slot_gated_attention = SlotGatedAttention(attn_size=2 *
                                                       hidden_dim,
                                                       remove_slot_attn=False)

        self.sf_id_subnet_stack = [
            SfIdSubnet(attn_size=2 * hidden_dim,
                       priority_order=priority_order,
                       id=i,
                       iteration_num=iteration_num)
            for i in range(iteration_num)
        ]
        self.intent_output_dense = tf.keras.layers.Dense(_intent_size)
        self.slot_output_dense = tf.keras.layers.Dense(_slot_size)
        self.iteration_num = iteration_num
        self.intent_loss = SequenceClassificationLoss()
        if use_crf:
            self.crf = tfa.layers.CRF(_slot_size, use_kernel=False)
            self.slot_loss = CrfLoss()
        else:
            self.slot_loss = TokenClassificationLoss()

    def init_metrics(self) -> Dict[str, Metric]:
        return {
            "nlu_acc":
            NluAccMetric(),
            "f1_score":
            SeqEvalF1Metric(label_map=self._vocab._index_to_token["tags"])
        }

    def call(self,
             input_ids,
             intent_ids=None,
             tags_ids=None,
             mask=None,
             training=True) -> Dict:
        inputs = self.embedding(input_ids)
        hidden, forward_h, forward_c, backword_h, backword_c = self.bi_lstm(
            inputs)  # (b, s, 2*e) (b, e) (b, e) (b, e) (b, e)
        hidden = self.dropout(hidden, training=training)
        final_state = tf.concat([forward_h, backword_h], axis=-1)
        # (b, 2*e)
        c_slot, c_intent = self.slot_gated_attention(hidden, final_state)
        for _id, _sf_id_subnet in enumerate(self.sf_id_subnet_stack):
            if _id == self.iteration_num - 1:
                slot_output, intent_output, r_intent, slot_reinforce_state = _sf_id_subnet(
                    lstm_enc=hidden,
                    final_state=final_state,
                    c_slot=c_slot,
                    c_intent=c_intent)
            else:
                r_intent, slot_reinforce_state = _sf_id_subnet(
                    lstm_enc=hidden,
                    final_state=final_state,
                    c_slot=c_slot,
                    c_intent=c_intent)
                c_slot = slot_reinforce_state
                c_intent = r_intent
        y_slot = self.slot_output_dense(slot_output)
        y_intent = self.intent_output_dense(intent_output)
        output_dict = {"intent_logits": y_intent, "slot_logits": y_slot}
        if self._use_crf:
            decoded_sequence, potentials, sequence_length, chain_kernel = self.crf(
                y_slot)
            output_dict["decoded_sequence"] = decoded_sequence
        if intent_ids is not None and tags_ids is not None:
            _intent_loss = self.intent_loss.compute_loss(y_true=intent_ids,
                                                         y_pred=y_intent)
            if self._use_crf:
                _slot_loss = self.slot_loss.compute_loss(
                    potentials, tags_ids, sequence_length, chain_kernel)
            else:
                _slot_loss = self.slot_loss.compute_loss(y_true=tags_ids,
                                                         y_pred=y_slot)
            output_dict["loss"] = _intent_loss + _slot_loss

            slot_pred = decoded_sequence if self._use_crf else y_slot
            self._metrics["nlu_acc"].update_state(
                y_true=[intent_ids, tags_ids], y_pred=[y_intent, slot_pred])
            if not training:
                self._metrics["f1_score"].update_state(y_true=tags_ids,
                                                       y_pred=slot_pred)
        return output_dict
Exemple #7
0
class StackPropagationSlu(Model):
    def __init__(self, embedding_dim, hidden_dim, dropout_rate,
                 vocab: Vocabulary, *args, **kwargs):
        super().__init__(vocab=vocab, *args, **kwargs)

        _vocab_size = self._vocab.get_vocab_size("text")
        _intent_size = self._vocab.get_vocab_size("intent")
        _slot_size = self._vocab.get_vocab_size("tags")

        self.embedding = tf.keras.layers.Embedding(_vocab_size,
                                                   embedding_dim,
                                                   mask_zero=True)
        self.bi_lstm = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(hidden_dim, return_sequences=True))
        self.attention_layer = SelfAttentionLayer(hidden_dim=1024,
                                                  output_dim=128,
                                                  dropout_rate=dropout_rate)
        self.dropout1 = tf.keras.layers.Dropout(rate=dropout_rate)

        self.concat = tf.keras.layers.Concatenate()

        self.intent_decoder_cell = tf.keras.layers.LSTMCell(units=64)
        self.slot_decoder_cell = tf.keras.layers.LSTMCell(units=64)
        self.intent_decoder_dropout = tf.keras.layers.Dropout(
            rate=dropout_rate)
        self.slot_decoder_dropout = tf.keras.layers.Dropout(rate=dropout_rate)

        self.intent_liner_layer = tf.keras.layers.Dense(units=_intent_size)
        self.slot_liner_layer = tf.keras.layers.Dense(units=_slot_size)

        self.intent_embedding = tf.keras.layers.Embedding(_intent_size, 8)
        self.slot_embedding = tf.keras.layers.Embedding(_slot_size, 32)
        self._intent_loss = TokenClassificationLoss()
        self._slot_loss = TokenClassificationLoss()

    def init_metrics(self) -> Dict[str, Metric]:
        return {"nlu_acc": NluAccMetric()}

    # @tf.function()
    def call(self,
             input_ids,
             intent_ids=None,
             tags_ids=None,
             mask=None,
             training=True):
        x = self.embedding(input_ids)  # (b, s, e)
        x = self.dropout1(x, training=training)
        h = self.bi_lstm(x)  # (b, s, 2e)
        c = self.attention_layer(h)  # (b, s, 2e)
        e = self.concat([h, c])

        # intent_decoder
        _intent_h_state, _intent_c_state = tf.zeros(
            [x.shape[0], 64]), tf.zeros([x.shape[0], 64])
        # (b, 64)
        _slot_h_state, _slot_c_state = tf.zeros([x.shape[0], 64
                                                 ]), tf.zeros([x.shape[0], 64])
        # (b, 64)
        # https://stackoverflow.com/questions/64567161/tensorflow-cannot-be-accessed-here-it-is-defined-in-another-function-or-code-b
        y_intent, y_slot = tf.TensorArray(dtype=tf.float32,
                                          size=0,
                                          dynamic_size=True), tf.TensorArray(
                                              dtype=tf.float32,
                                              size=0,
                                              dynamic_size=True)
        # y_intent, y_slot = [], []
        prev_intent_tensor = tf.zeros([x.shape[0], 8])
        prev_slot_tensor = tf.zeros([x.shape[0], 32])
        for i in tf.range(x.shape[1]):
            _hidden = e[:, i, :]
            _intent_hidden = tf.concat([_hidden, prev_intent_tensor], axis=-1)
            # 添加dropout
            _intent_hidden = self.intent_decoder_dropout(_intent_hidden,
                                                         training=training)
            _intent_h_state, (_intent_h_state,
                              _intent_c_state) = self.intent_decoder_cell(
                                  _intent_hidden,
                                  states=[_intent_h_state, _intent_c_state])
            _h_intent_i = self.intent_liner_layer(_intent_h_state)
            y_intent = y_intent.write(i, _h_intent_i)
            # y_intent.append(_h_intent_i)
            prev_intent_tensor = self.intent_embedding(
                tf.argmax(_h_intent_i, axis=-1))
            # slot_decoder
            _slot_hidden = tf.concat([_hidden, _h_intent_i, prev_slot_tensor],
                                     axis=-1)
            # 添加dropout
            _slot_hidden = self.slot_decoder_dropout(_slot_hidden,
                                                     training=training)
            _slot_h_state, (_slot_h_state,
                            _slot_c_state) = self.slot_decoder_cell(
                                _slot_hidden,
                                states=[_slot_h_state, _slot_c_state])
            _h_slot_i = self.slot_liner_layer(_slot_h_state)
            y_slot = y_slot.write(i, _h_slot_i)
            # y_slot.append(_h_slot_i)
            prev_slot_tensor = self.slot_embedding(
                tf.argmax(_h_slot_i, axis=-1))
        # 注意不可用reshape  transpose与reshape结果是不一样的
        # 错误写法: tf.reshape(y_intent.stack(), [x.shape[0], x.shape[1], -1])
        y_intent = tf.transpose(y_intent.stack(), [1, 0, 2])
        y_slot = tf.transpose(y_slot.stack(), [1, 0, 2])

        o_intent = self.get_o_intent(intent_pred=y_intent, mask=x._keras_mask)

        output_dict = {"intent_logits": o_intent, "slot_logits": y_slot}
        if intent_ids is not None and tags_ids is not None:
            _intent_ids = tf.broadcast_to(intent_ids, tags_ids.shape)
            active_loss = tags_ids != -100

            _intent_loss = self._intent_loss.compute_loss(
                y_true=tf.boolean_mask(_intent_ids, active_loss),
                y_pred=tf.boolean_mask(y_intent, active_loss))
            _slot_loss = self._slot_loss.compute_loss(y_true=tags_ids,
                                                      y_pred=y_slot)
            output_dict["loss"] = _intent_loss + _slot_loss
            self._metrics["nlu_acc"].update_state(
                y_true=[intent_ids, tags_ids], y_pred=[o_intent, y_slot])
        return output_dict

    @staticmethod
    def get_o_intent(intent_pred, mask):
        mask = tf.cast(mask, dtype=tf.int32)
        o_intent = tf.argmax(intent_pred, axis=-1)
        seq_lengths = tf.reduce_sum(mask, axis=-1)

        # 取token_level_intent most_common 作为query intent
        # https://www.tensorflow.org/api_docs/python/tf/unique_with_counts

        def get_max_count_intent(_intent):
            _y, _idx, _count = tf.unique_with_counts(_intent)
            _intent = _y[tf.argmax(_count)]
            return [_intent]

        o_intent = tf.convert_to_tensor([
            get_max_count_intent(o_intent[i][:seq_lengths[i]])
            for i in range(len(seq_lengths))
        ],
                                        dtype=tf.int32)
        return o_intent
Exemple #8
0
class SlotGated(Model):
    def __init__(self, embedding_dim, hidden_dim, dropout_rate,
                 vocab: Vocabulary, *args, **kwargs):

        super().__init__(vocab=vocab, *args, **kwargs)

        _vocab_size = self._vocab.get_vocab_size("text")
        _intent_size = self._vocab.get_vocab_size("intent")
        _slot_size = self._vocab.get_vocab_size("tags")

        self.embedding = tf.keras.layers.Embedding(_vocab_size,
                                                   embedding_dim,
                                                   mask_zero=True)
        self.bi_lstm = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(hidden_dim,
                                 return_sequences=True,
                                 return_state=True))
        self.dropout = tf.keras.layers.Dropout(rate=dropout_rate)
        self.slot_gated_attention = SlotGatedAttention(attn_size=2 *
                                                       hidden_dim,
                                                       remove_slot_attn=False)
        # TODO 添加remove_slot_attn 版本

        self.v = self.add_weight(name="v",
                                 shape=(2 * hidden_dim, ),
                                 initializer="glorot_uniform")
        self.intent_liner_layer = tf.keras.layers.Dense(2 * hidden_dim)

        self.intent_output_dense = tf.keras.layers.Dense(_intent_size)
        self.slot_output_dense = tf.keras.layers.Dense(_slot_size)
        self.intent_loss = SequenceClassificationLoss()
        self.slot_loss = TokenClassificationLoss()

    def init_metrics(self):
        return {
            "nlu_acc":
            NluAccMetric(),
            "f1_score":
            SeqEvalF1Metric(label_map=self._vocab._index_to_token["tags"])
        }

    def call(self,
             input_ids,
             intent_ids=None,
             tags_ids=None,
             mask=None,
             training=True) -> Dict:
        inputs = self.embedding(input_ids)  # b, s, e
        hidden, forward_h, forward_c, backword_h, backword_c = self.bi_lstm(
            inputs)  # (b, s, 2*e) (b, e) (b, e) (b, e) (b, e)
        hidden = self.dropout(hidden, training=training)
        final_state = tf.concat([forward_h, backword_h], axis=-1)
        # (b, 2*e)
        c_slot, c_intent = self.slot_gated_attention(hidden, final_state)
        # (b, s, 2*e) (b, 2*e)
        # formula(6) in paper: g = \sum(v * tanh(C_slot + W * C_intent))
        _c_intent = tf.expand_dims(c_intent, axis=1)
        _c_intent = tf.broadcast_to(_c_intent, c_slot.shape)
        _c_intent = self.intent_liner_layer(_c_intent)
        # (b, s, 2*e)
        g = self.v * tf.nn.tanh(c_slot + _c_intent)
        g = tf.reduce_sum(g, axis=-1)  # (b, s)
        g = tf.expand_dims(g, axis=-1)
        # formula(7) in paper: y^S_i = softmax(W_{hy}^S(h_i + c_i^S.g))
        y_slot = self.slot_output_dense(hidden + c_slot * g)
        y_intent = self.intent_output_dense(final_state + c_intent)

        output_dict = {"intent_logits": y_intent, "slot_logits": y_slot}
        if intent_ids is not None and tags_ids is not None:
            _intent_loss = self.intent_loss.compute_loss(y_true=intent_ids,
                                                         y_pred=y_intent)
            _slot_loss = self.slot_loss.compute_loss(y_true=tags_ids,
                                                     y_pred=y_slot)
            output_dict["loss"] = _intent_loss + _slot_loss

            self._metrics["nlu_acc"].update_state(
                y_true=[intent_ids, tags_ids], y_pred=[y_intent, y_slot])
            if not training:
                self._metrics["f1_score"].update_state(y_true=tags_ids,
                                                       y_pred=y_slot)
        return output_dict