Ejemplo n.º 1
0
    def build(self,
              word_length,
              num_labels,
              num_intent_labels,
              word_vocab_size,
              char_vocab_size,
              word_emb_dims=100,
              char_emb_dims=30,
              char_lstm_dims=30,
              tagger_lstm_dims=100,
              dropout=0.2):
        """
        Build a model

        Args:
            word_length (int): max word length (in characters)
            num_labels (int): number of slot labels
            num_intent_labels (int): number of intent classes
            word_vocab_size (int): word vocabulary size
            char_vocab_size (int): character vocabulary size
            word_emb_dims (int, optional): word embedding dimensions
            char_emb_dims (int, optional): character embedding dimensions
            char_lstm_dims (int, optional): character feature LSTM hidden size
            tagger_lstm_dims (int, optional): tagger LSTM hidden size
            dropout (float, optional): dropout rate
        """
        self.word_length = word_length
        self.num_labels = num_labels
        self.num_intent_labels = num_intent_labels
        self.word_vocab_size = word_vocab_size
        self.char_vocab_size = char_vocab_size
        self.word_emb_dims = word_emb_dims
        self.char_emb_dims = char_emb_dims
        self.char_lstm_dims = char_lstm_dims
        self.tagger_lstm_dims = tagger_lstm_dims
        self.dropout = dropout

        words_input = Input(shape=(None, ), name='words_input')
        embedding_layer = Embedding(self.word_vocab_size,
                                    self.word_emb_dims,
                                    name='word_embedding')
        word_embeddings = embedding_layer(words_input)
        word_embeddings = Dropout(self.dropout)(word_embeddings)

        # create word character input and embeddings layer
        word_chars_input = Input(shape=(None, self.word_length),
                                 name='word_chars_input')
        char_embedding_layer = Embedding(self.char_vocab_size,
                                         self.char_emb_dims,
                                         input_length=self.word_length,
                                         name='char_embedding')
        # apply embedding to each word
        char_embeddings = char_embedding_layer(word_chars_input)
        # feed dense char vectors into BiLSTM
        char_embeddings = TimeDistributed(
            Bidirectional(self._rnn_cell(
                self.char_lstm_dims)))(char_embeddings)
        char_embeddings = Dropout(self.dropout)(char_embeddings)

        # first BiLSTM layer (used for intent classification)
        first_bilstm_layer = Bidirectional(
            self._rnn_cell(self.tagger_lstm_dims,
                           return_sequences=True,
                           return_state=True))
        first_lstm_out = first_bilstm_layer(word_embeddings)

        lstm_y_sequence = first_lstm_out[:1][
            0]  # save y states of the LSTM layer
        states = first_lstm_out[1:]
        hf, _, hb, _ = states  # extract last hidden states
        h_state = concatenate([hf, hb], axis=-1)
        intents = Dense(self.num_intent_labels,
                        activation='softmax',
                        name='intent_classifier_output')(h_state)

        # create the 2nd feature vectors
        combined_features = concatenate([lstm_y_sequence, char_embeddings],
                                        axis=-1)

        # 2nd BiLSTM layer for label classification
        second_bilstm_layer = Bidirectional(
            self._rnn_cell(self.tagger_lstm_dims,
                           return_sequences=True))(combined_features)
        second_bilstm_layer = Dropout(self.dropout)(second_bilstm_layer)
        bilstm_out = Dense(self.num_labels)(second_bilstm_layer)

        # feed BiLSTM vectors into CRF
        with tf.device('/cpu:0'):
            crf = CRF(self.num_labels, name='intent_slot_crf')
            labels = crf(bilstm_out)

        # compile the model
        model = Model(inputs=[words_input, word_chars_input],
                      outputs=[intents, labels])

        # define losses and metrics
        loss_f = {
            'intent_classifier_output': 'categorical_crossentropy',
            'intent_slot_crf': crf.loss
        }
        metrics = {
            'intent_classifier_output': 'categorical_accuracy',
            'intent_slot_crf': crf.viterbi_accuracy
        }

        model.compile(loss=loss_f,
                      optimizer=tf.train.AdamOptimizer(),
                      metrics=metrics)
        self.model = model
Ejemplo n.º 2
0
    def build(self,
              vocabulary_size,
              num_pos_labels,
              num_chunk_labels,
              char_vocab_size=None,
              max_word_len=25,
              feature_size=100,
              dropout=0.5,
              classifier='softmax',
              optimizer=None):
        """
        Build a chunker/POS model

        Args:
            vocabulary_size (int): the size of the input vocabulary
            num_pos_labels (int): the size of of POS labels
            num_chunk_labels (int): the sie of chunk labels
            char_vocab_size (int, optional): character vocabulary size
            max_word_len (int, optional): max characters in a word
            feature_size (int, optional): feature size - determines the embedding/LSTM layer \
                hidden state size
            dropout (float, optional): dropout rate
            classifier (str, optional): classifier layer, 'softmax' for softmax or 'crf' for \
                conditional random fields classifier. default is 'softmax'.
            optimizer (tensorflow.python.training.optimizer.Optimizer, optional): optimizer, if \
                None will use default SGD (paper setup)
        """
        self.vocabulary_size = vocabulary_size
        self.char_vocab_size = char_vocab_size
        self.num_pos_labels = num_pos_labels
        self.num_chunk_labels = num_chunk_labels
        self.max_word_len = max_word_len
        self.feature_size = feature_size
        self.dropout = dropout
        self.classifier = classifier

        word_emb_layer = Embedding(self.vocabulary_size,
                                   self.feature_size,
                                   name='embedding',
                                   mask_zero=False)
        word_input = Input(shape=(None, ))
        word_embedding = word_emb_layer(word_input)
        input_src = word_input
        features = word_embedding

        # add char input if present
        if self.char_vocab_size is not None:
            char_input = Input(shape=(None, self.max_word_len))
            char_emb_layer = Embedding(self.char_vocab_size,
                                       30,
                                       name='char_embedding',
                                       mask_zero=False)
            char_embedding = char_emb_layer(char_input)
            char_embedding = TimeDistributed(Conv1D(
                30, 3, padding='same'))(char_embedding)
            char_embedding = TimeDistributed(
                GlobalMaxPooling1D())(char_embedding)

            input_src = [input_src, char_input]
            features = concatenate([word_embedding, char_embedding])

        rnn_layer_1 = Bidirectional(
            self._rnn_cell(return_sequences=True))(features)
        rnn_layer_2 = Bidirectional(
            self._rnn_cell(return_sequences=True))(rnn_layer_1)
        rnn_layer_3 = Bidirectional(
            self._rnn_cell(return_sequences=True))(rnn_layer_2)

        # outputs
        pos_out = Dense(self.num_pos_labels,
                        activation='softmax',
                        name='pos_output')(rnn_layer_1)
        losses = {'pos_output': 'categorical_crossentropy'}
        metrics = {'pos_output': 'categorical_accuracy'}

        if 'crf' in self.classifier:
            with tf.device('/cpu:0'):
                chunk_crf = CRF(self.num_chunk_labels, name='chunk_crf')
                rnn_layer_3_dense = Dense(self.num_chunk_labels)(Dropout(
                    self.dropout)(rnn_layer_3))
                chunks_out = chunk_crf(rnn_layer_3_dense)
                losses['chunk_crf'] = chunk_crf.loss
                metrics['chunk_crf'] = chunk_crf.viterbi_accuracy
        else:
            chunks_out = TimeDistributed(Dense(self.num_chunk_labels,
                                               activation='softmax'),
                                         name='chunk_out')(rnn_layer_3)
            losses['chunk_out'] = 'categorical_crossentropy'
            metrics['chunk_out'] = 'categorical_accuracy'

        model = keras.Model(input_src, [pos_out, chunks_out])
        if optimizer is None:
            self.optimizer = tf.train.AdamOptimizer()
        else:
            self.optimizer = optimizer
        model.compile(optimizer=self.optimizer, loss=losses, metrics=metrics)
        self.model = model
Ejemplo n.º 3
0
    def build(self,
              word_length,
              target_label_dims,
              word_vocab_size,
              char_vocab_size,
              word_embedding_dims=100,
              char_embedding_dims=16,
              tagger_lstm_dims=200,
              dropout=0.5):
        """
        Build a NERCRF model

        Args:
            word_length (int): max word length in characters
            target_label_dims (int): number of entity labels (for classification)
            word_vocab_size (int): word vocabulary size
            char_vocab_size (int): character vocabulary size
            word_embedding_dims (int): word embedding dimensions
            char_embedding_dims (int): character embedding dimensions
            tagger_lstm_dims (int): word tagger LSTM output dimensions
            dropout (float): dropout rate
        """
        self.word_length = word_length
        self.target_label_dims = target_label_dims
        self.word_vocab_size = word_vocab_size
        self.char_vocab_size = char_vocab_size
        self.word_embedding_dims = word_embedding_dims
        self.char_embedding_dims = char_embedding_dims
        self.tagger_lstm_dims = tagger_lstm_dims
        self.dropout = dropout

        # build word input
        words_input = tf.keras.layers.Input(shape=(None, ), name='words_input')
        embedding_layer = tf.keras.layers.Embedding(self.word_vocab_size,
                                                    self.word_embedding_dims,
                                                    name='word_embedding')
        word_embeddings = embedding_layer(words_input)

        # create word character embeddings
        word_chars_input = tf.keras.layers.Input(shape=(None,
                                                        self.word_length),
                                                 name='word_chars_input')
        char_embedding_layer = tf.keras.layers.Embedding(
            self.char_vocab_size,
            self.char_embedding_dims,
            name='char_embedding')(word_chars_input)
        char_embeddings = \
            tf.keras.layers.TimeDistributed(
                tf.keras.layers.Conv1D(128, 3, padding='same', activation='relu'))(
                char_embedding_layer)
        char_embeddings = tf.keras.layers.TimeDistributed(
            tf.keras.layers.GlobalMaxPooling1D())(char_embeddings)

        # create the final feature vectors
        features = tf.keras.layers.concatenate(
            [word_embeddings, char_embeddings], axis=-1)

        # encode using a bi-LSTM
        features = tf.keras.layers.Dropout(self.dropout)(features)
        bilstm = tf.keras.layers.Bidirectional(
            self._rnn_cell(self.tagger_lstm_dims,
                           return_sequences=True))(features)
        bilstm = tf.keras.layers.Bidirectional(
            self._rnn_cell(self.tagger_lstm_dims,
                           return_sequences=True))(bilstm)
        bilstm = tf.keras.layers.Dropout(self.dropout)(bilstm)
        bilstm = tf.keras.layers.Dense(self.target_label_dims)(bilstm)

        inputs = [words_input, word_chars_input]

        sequence_lengths = tf.keras.layers.Input(shape=(1, ),
                                                 dtype='int32',
                                                 name='seq_lens')
        inputs.append(sequence_lengths)
        crf = CRF(self.target_label_dims, name='ner_crf')
        predictions = crf(inputs=bilstm, sequence_lengths=sequence_lengths)

        # compile the model
        model = tf.keras.Model(inputs=inputs, outputs=predictions)
        model.compile(loss={'ner_crf': crf.loss},
                      optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=5.))

        self.model = model
Ejemplo n.º 4
0
    def build(self,
              word_length,
              target_label_dims,
              word_vocab_size,
              char_vocab_size,
              word_embedding_dims=100,
              char_embedding_dims=16,
              word_lstm_dims=20,
              tagger_lstm_dims=200,
              dropout=0.5,
              crf_mode='pad'):
        """
        Build a NERCRF model

        Args:
            word_length (int): max word length in characters
            target_label_dims (int): number of entity labels (for classification)
            word_vocab_size (int): word vocabulary size
            char_vocab_size (int): character vocabulary size
            word_embedding_dims (int): word embedding dimensions
            char_embedding_dims (int): character embedding dimensions
            word_lstm_dims (int): character LSTM feature extractor output dimensions
            tagger_lstm_dims (int): word tagger LSTM output dimensions
            dropout (float): dropout rate
            crf_mode (string): CRF operation mode, select 'pad'/'reg' for supplied sequences in
                input or full sequence tagging. ('reg' is forced when use_cudnn=True)
        """
        self.word_length = word_length
        self.target_label_dims = target_label_dims
        self.word_vocab_size = word_vocab_size
        self.char_vocab_size = char_vocab_size
        self.word_embedding_dims = word_embedding_dims
        self.char_embedding_dims = char_embedding_dims
        self.word_lstm_dims = word_lstm_dims
        self.tagger_lstm_dims = tagger_lstm_dims
        self.dropout = dropout
        self.crf_mode = crf_mode

        assert crf_mode in ('pad', 'reg'), 'crf_mode is invalid'

        # build word input
        words_input = Input(shape=(None, ), name='words_input')
        embedding_layer = Embedding(self.word_vocab_size,
                                    self.word_embedding_dims,
                                    name='word_embedding')
        word_embeddings = embedding_layer(words_input)

        # create word character embeddings
        word_chars_input = Input(shape=(None, self.word_length),
                                 name='word_chars_input')
        char_embedding_layer = Embedding(
            self.char_vocab_size,
            self.char_embedding_dims,
            name='char_embedding')(word_chars_input)
        char_embeddings = TimeDistributed(
            Conv1D(128, 3, padding='same',
                   activation='relu'))(char_embedding_layer)
        char_embeddings = TimeDistributed(
            GlobalMaxPooling1D())(char_embeddings)

        # create the final feature vectors
        features = concatenate([word_embeddings, char_embeddings], axis=-1)

        # encode using a bi-LSTM
        features = Dropout(self.dropout)(features)
        bilstm = Bidirectional(
            self._rnn_cell(self.tagger_lstm_dims,
                           return_sequences=True))(features)
        bilstm = Bidirectional(
            self._rnn_cell(self.tagger_lstm_dims,
                           return_sequences=True))(bilstm)
        bilstm = Dropout(self.dropout)(bilstm)
        bilstm = Dense(self.target_label_dims)(bilstm)

        inputs = [words_input, word_chars_input]

        if self.use_cudnn:
            self.crf_mode = 'reg'
        with tf.device('/cpu:0'):
            crf = CRF(self.target_label_dims,
                      mode=self.crf_mode,
                      name='ner_crf')
            if self.crf_mode == 'pad':
                sequence_lengths = Input(batch_shape=(None, 1), dtype='int32')
                predictions = crf([bilstm, sequence_lengths])
                inputs.append(sequence_lengths)
            else:
                predictions = crf(bilstm)

        # compile the model
        model = tf.keras.Model(inputs=inputs, outputs=predictions)
        model.compile(loss={'ner_crf': crf.loss},
                      optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=5.),
                      metrics=[crf.viterbi_accuracy])
        self.model = model