def build(self, word_length, num_labels, num_intent_labels, word_vocab_size, char_vocab_size, word_emb_dims=100, char_emb_dims=30, char_lstm_dims=30, tagger_lstm_dims=100, dropout=0.2): """ Build a model Args: word_length (int): max word length (in characters) num_labels (int): number of slot labels num_intent_labels (int): number of intent classes word_vocab_size (int): word vocabulary size char_vocab_size (int): character vocabulary size word_emb_dims (int, optional): word embedding dimensions char_emb_dims (int, optional): character embedding dimensions char_lstm_dims (int, optional): character feature LSTM hidden size tagger_lstm_dims (int, optional): tagger LSTM hidden size dropout (float, optional): dropout rate """ self.word_length = word_length self.num_labels = num_labels self.num_intent_labels = num_intent_labels self.word_vocab_size = word_vocab_size self.char_vocab_size = char_vocab_size self.word_emb_dims = word_emb_dims self.char_emb_dims = char_emb_dims self.char_lstm_dims = char_lstm_dims self.tagger_lstm_dims = tagger_lstm_dims self.dropout = dropout words_input = Input(shape=(None, ), name='words_input') embedding_layer = Embedding(self.word_vocab_size, self.word_emb_dims, name='word_embedding') word_embeddings = embedding_layer(words_input) word_embeddings = Dropout(self.dropout)(word_embeddings) # create word character input and embeddings layer word_chars_input = Input(shape=(None, self.word_length), name='word_chars_input') char_embedding_layer = Embedding(self.char_vocab_size, self.char_emb_dims, input_length=self.word_length, name='char_embedding') # apply embedding to each word char_embeddings = char_embedding_layer(word_chars_input) # feed dense char vectors into BiLSTM char_embeddings = TimeDistributed( Bidirectional(self._rnn_cell( self.char_lstm_dims)))(char_embeddings) char_embeddings = Dropout(self.dropout)(char_embeddings) # first BiLSTM layer (used for intent classification) first_bilstm_layer = Bidirectional( self._rnn_cell(self.tagger_lstm_dims, return_sequences=True, return_state=True)) first_lstm_out = first_bilstm_layer(word_embeddings) lstm_y_sequence = first_lstm_out[:1][ 0] # save y states of the LSTM layer states = first_lstm_out[1:] hf, _, hb, _ = states # extract last hidden states h_state = concatenate([hf, hb], axis=-1) intents = Dense(self.num_intent_labels, activation='softmax', name='intent_classifier_output')(h_state) # create the 2nd feature vectors combined_features = concatenate([lstm_y_sequence, char_embeddings], axis=-1) # 2nd BiLSTM layer for label classification second_bilstm_layer = Bidirectional( self._rnn_cell(self.tagger_lstm_dims, return_sequences=True))(combined_features) second_bilstm_layer = Dropout(self.dropout)(second_bilstm_layer) bilstm_out = Dense(self.num_labels)(second_bilstm_layer) # feed BiLSTM vectors into CRF with tf.device('/cpu:0'): crf = CRF(self.num_labels, name='intent_slot_crf') labels = crf(bilstm_out) # compile the model model = Model(inputs=[words_input, word_chars_input], outputs=[intents, labels]) # define losses and metrics loss_f = { 'intent_classifier_output': 'categorical_crossentropy', 'intent_slot_crf': crf.loss } metrics = { 'intent_classifier_output': 'categorical_accuracy', 'intent_slot_crf': crf.viterbi_accuracy } model.compile(loss=loss_f, optimizer=tf.train.AdamOptimizer(), metrics=metrics) self.model = model
def build(self, vocabulary_size, num_pos_labels, num_chunk_labels, char_vocab_size=None, max_word_len=25, feature_size=100, dropout=0.5, classifier='softmax', optimizer=None): """ Build a chunker/POS model Args: vocabulary_size (int): the size of the input vocabulary num_pos_labels (int): the size of of POS labels num_chunk_labels (int): the sie of chunk labels char_vocab_size (int, optional): character vocabulary size max_word_len (int, optional): max characters in a word feature_size (int, optional): feature size - determines the embedding/LSTM layer \ hidden state size dropout (float, optional): dropout rate classifier (str, optional): classifier layer, 'softmax' for softmax or 'crf' for \ conditional random fields classifier. default is 'softmax'. optimizer (tensorflow.python.training.optimizer.Optimizer, optional): optimizer, if \ None will use default SGD (paper setup) """ self.vocabulary_size = vocabulary_size self.char_vocab_size = char_vocab_size self.num_pos_labels = num_pos_labels self.num_chunk_labels = num_chunk_labels self.max_word_len = max_word_len self.feature_size = feature_size self.dropout = dropout self.classifier = classifier word_emb_layer = Embedding(self.vocabulary_size, self.feature_size, name='embedding', mask_zero=False) word_input = Input(shape=(None, )) word_embedding = word_emb_layer(word_input) input_src = word_input features = word_embedding # add char input if present if self.char_vocab_size is not None: char_input = Input(shape=(None, self.max_word_len)) char_emb_layer = Embedding(self.char_vocab_size, 30, name='char_embedding', mask_zero=False) char_embedding = char_emb_layer(char_input) char_embedding = TimeDistributed(Conv1D( 30, 3, padding='same'))(char_embedding) char_embedding = TimeDistributed( GlobalMaxPooling1D())(char_embedding) input_src = [input_src, char_input] features = concatenate([word_embedding, char_embedding]) rnn_layer_1 = Bidirectional( self._rnn_cell(return_sequences=True))(features) rnn_layer_2 = Bidirectional( self._rnn_cell(return_sequences=True))(rnn_layer_1) rnn_layer_3 = Bidirectional( self._rnn_cell(return_sequences=True))(rnn_layer_2) # outputs pos_out = Dense(self.num_pos_labels, activation='softmax', name='pos_output')(rnn_layer_1) losses = {'pos_output': 'categorical_crossentropy'} metrics = {'pos_output': 'categorical_accuracy'} if 'crf' in self.classifier: with tf.device('/cpu:0'): chunk_crf = CRF(self.num_chunk_labels, name='chunk_crf') rnn_layer_3_dense = Dense(self.num_chunk_labels)(Dropout( self.dropout)(rnn_layer_3)) chunks_out = chunk_crf(rnn_layer_3_dense) losses['chunk_crf'] = chunk_crf.loss metrics['chunk_crf'] = chunk_crf.viterbi_accuracy else: chunks_out = TimeDistributed(Dense(self.num_chunk_labels, activation='softmax'), name='chunk_out')(rnn_layer_3) losses['chunk_out'] = 'categorical_crossentropy' metrics['chunk_out'] = 'categorical_accuracy' model = keras.Model(input_src, [pos_out, chunks_out]) if optimizer is None: self.optimizer = tf.train.AdamOptimizer() else: self.optimizer = optimizer model.compile(optimizer=self.optimizer, loss=losses, metrics=metrics) self.model = model
def build(self, word_length, target_label_dims, word_vocab_size, char_vocab_size, word_embedding_dims=100, char_embedding_dims=16, tagger_lstm_dims=200, dropout=0.5): """ Build a NERCRF model Args: word_length (int): max word length in characters target_label_dims (int): number of entity labels (for classification) word_vocab_size (int): word vocabulary size char_vocab_size (int): character vocabulary size word_embedding_dims (int): word embedding dimensions char_embedding_dims (int): character embedding dimensions tagger_lstm_dims (int): word tagger LSTM output dimensions dropout (float): dropout rate """ self.word_length = word_length self.target_label_dims = target_label_dims self.word_vocab_size = word_vocab_size self.char_vocab_size = char_vocab_size self.word_embedding_dims = word_embedding_dims self.char_embedding_dims = char_embedding_dims self.tagger_lstm_dims = tagger_lstm_dims self.dropout = dropout # build word input words_input = tf.keras.layers.Input(shape=(None, ), name='words_input') embedding_layer = tf.keras.layers.Embedding(self.word_vocab_size, self.word_embedding_dims, name='word_embedding') word_embeddings = embedding_layer(words_input) # create word character embeddings word_chars_input = tf.keras.layers.Input(shape=(None, self.word_length), name='word_chars_input') char_embedding_layer = tf.keras.layers.Embedding( self.char_vocab_size, self.char_embedding_dims, name='char_embedding')(word_chars_input) char_embeddings = \ tf.keras.layers.TimeDistributed( tf.keras.layers.Conv1D(128, 3, padding='same', activation='relu'))( char_embedding_layer) char_embeddings = tf.keras.layers.TimeDistributed( tf.keras.layers.GlobalMaxPooling1D())(char_embeddings) # create the final feature vectors features = tf.keras.layers.concatenate( [word_embeddings, char_embeddings], axis=-1) # encode using a bi-LSTM features = tf.keras.layers.Dropout(self.dropout)(features) bilstm = tf.keras.layers.Bidirectional( self._rnn_cell(self.tagger_lstm_dims, return_sequences=True))(features) bilstm = tf.keras.layers.Bidirectional( self._rnn_cell(self.tagger_lstm_dims, return_sequences=True))(bilstm) bilstm = tf.keras.layers.Dropout(self.dropout)(bilstm) bilstm = tf.keras.layers.Dense(self.target_label_dims)(bilstm) inputs = [words_input, word_chars_input] sequence_lengths = tf.keras.layers.Input(shape=(1, ), dtype='int32', name='seq_lens') inputs.append(sequence_lengths) crf = CRF(self.target_label_dims, name='ner_crf') predictions = crf(inputs=bilstm, sequence_lengths=sequence_lengths) # compile the model model = tf.keras.Model(inputs=inputs, outputs=predictions) model.compile(loss={'ner_crf': crf.loss}, optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=5.)) self.model = model
def build(self, word_length, target_label_dims, word_vocab_size, char_vocab_size, word_embedding_dims=100, char_embedding_dims=16, word_lstm_dims=20, tagger_lstm_dims=200, dropout=0.5, crf_mode='pad'): """ Build a NERCRF model Args: word_length (int): max word length in characters target_label_dims (int): number of entity labels (for classification) word_vocab_size (int): word vocabulary size char_vocab_size (int): character vocabulary size word_embedding_dims (int): word embedding dimensions char_embedding_dims (int): character embedding dimensions word_lstm_dims (int): character LSTM feature extractor output dimensions tagger_lstm_dims (int): word tagger LSTM output dimensions dropout (float): dropout rate crf_mode (string): CRF operation mode, select 'pad'/'reg' for supplied sequences in input or full sequence tagging. ('reg' is forced when use_cudnn=True) """ self.word_length = word_length self.target_label_dims = target_label_dims self.word_vocab_size = word_vocab_size self.char_vocab_size = char_vocab_size self.word_embedding_dims = word_embedding_dims self.char_embedding_dims = char_embedding_dims self.word_lstm_dims = word_lstm_dims self.tagger_lstm_dims = tagger_lstm_dims self.dropout = dropout self.crf_mode = crf_mode assert crf_mode in ('pad', 'reg'), 'crf_mode is invalid' # build word input words_input = Input(shape=(None, ), name='words_input') embedding_layer = Embedding(self.word_vocab_size, self.word_embedding_dims, name='word_embedding') word_embeddings = embedding_layer(words_input) # create word character embeddings word_chars_input = Input(shape=(None, self.word_length), name='word_chars_input') char_embedding_layer = Embedding( self.char_vocab_size, self.char_embedding_dims, name='char_embedding')(word_chars_input) char_embeddings = TimeDistributed( Conv1D(128, 3, padding='same', activation='relu'))(char_embedding_layer) char_embeddings = TimeDistributed( GlobalMaxPooling1D())(char_embeddings) # create the final feature vectors features = concatenate([word_embeddings, char_embeddings], axis=-1) # encode using a bi-LSTM features = Dropout(self.dropout)(features) bilstm = Bidirectional( self._rnn_cell(self.tagger_lstm_dims, return_sequences=True))(features) bilstm = Bidirectional( self._rnn_cell(self.tagger_lstm_dims, return_sequences=True))(bilstm) bilstm = Dropout(self.dropout)(bilstm) bilstm = Dense(self.target_label_dims)(bilstm) inputs = [words_input, word_chars_input] if self.use_cudnn: self.crf_mode = 'reg' with tf.device('/cpu:0'): crf = CRF(self.target_label_dims, mode=self.crf_mode, name='ner_crf') if self.crf_mode == 'pad': sequence_lengths = Input(batch_shape=(None, 1), dtype='int32') predictions = crf([bilstm, sequence_lengths]) inputs.append(sequence_lengths) else: predictions = crf(bilstm) # compile the model model = tf.keras.Model(inputs=inputs, outputs=predictions) model.compile(loss={'ner_crf': crf.loss}, optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=5.), metrics=[crf.viterbi_accuracy]) self.model = model