def BertCrfModel(transformer_model, max_sentence_length, label_nums): input_ids = tf.keras.Input( name="input_ids", shape=(max_sentence_length,), dtype=tf.int32 ) attention_mask = tf.keras.Input( name="attention_mask", shape=(max_sentence_length,), dtype=tf.int32 ) transformer = transformer_model([input_ids, attention_mask]) hidden_states = transformer[1] hidden_states_size = 1 hidden_states_ind = list(range(-hidden_states_size, 0, 1)) selected_hidden_states = tf.keras.layers.concatenate( tuple([hidden_states[i] for i in hidden_states_ind]) ) crf = CRF(dtype="float32") output = tf.keras.layers.Dense(label_nums, activation="relu")( selected_hidden_states ) output = crf(output) model = tf.keras.models.Model( inputs=[input_ids, attention_mask], outputs=output ) model = ModelWithCRFLoss(model) return model
def build(self, hp): # Model definition inpt = Input(shape=(MAX_LEN, )) # MAX_LEN, VECT_SIZE # input_dim: Size of the vocabulary, i.e. maximum integer index + 1 # output_dim: Dimension of the dense embedding # input_shape: 2D tensor with shape (batch_size, input_length) # doc_vocab: vocabulary - number of words - of the train dataset model = Embedding( doc_vocab, output_dim=100, input_length=MAX_LEN, # n_words + 2 (PAD & UNK) weights=[embedding_matrix], # use GloVe vectors as initial weights mask_zero=True, trainable=True, activity_regularizer=l1(0.0000001))(inpt) # name='word_embedding' # hp.Choice('activity_regularizer_1', values=[0.0, 0.00001, 0.000001, 0.0000001]) # , activity_regularizer=l1(0.0000001) hp.Choice('activity_regularizer_2', values=[0.0, 0.0000001, 0.00000001, 0.000000001]) # recurrent_dropout=0.1 (recurrent_dropout: 10% possibility to drop of the connections that simulate LSTM memory cells) # units = 100 / 0.55 = 182 neurons (to account for 0.55 dropout) model = Bidirectional( LSTM(units=100, return_sequences=True, activity_regularizer=l1(0.000000001), recurrent_constraint=max_norm(2)))( model) # input_shape=(1, MAX_LEN, VECT_SIZE) model = Dropout(hp.Choice('dropout', values=[0.0, 0.3, 0.5]))(model) # model = TimeDistributed(Dense(number_labels, activation="relu"))(model) # a dense layer as suggested by neuralNer model = Dense(number_labels, activation=None)( model) # activation='linear' (they are the same) crf = CRF( ) # CRF layer { SHOULD I SET -> number_labels+1 (+1 -> PAD) } out = crf(model) # output model = Model(inputs=inpt, outputs=out) # set learning rate # lr_rate = InverseTimeDecay(initial_learning_rate=0.05, decay_rate=4, decay_steps=steps_per_epoch) # lr_rate = ExponentialDecay(initial_learning_rate=0.01, decay_rate=0.5, decay_steps=10000) # set optimizer # decay=learning_rate / epochs # CASE 1: decay=0.01 # CASE 2: decay=0.1/5 opt = SGD( learning_rate=0.0, momentum=0.9, clipvalue=5.0 ) # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5] # opt = SGD(learning_rate=0.01, decay=0.01/steps_per_epoch, momentum=0.9, clipvalue=10.0) # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5] # opt = SGD(learning_rate=lr_rate, clipvalue=3.0, clipnorm=2.0, momentum=0.9) # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5] # compile Bi-LSTM-CRF model.compile(optimizer=opt, loss=crf.loss, metrics=[crf.accuracy]) # , f1score() # model.compile(optimizer=opt, loss=crf.loss, metrics=[crf.viterbi_accuracy]) self.initial_lrate = hp.Choice('learning_rate', [0.05, 0.01]) return model
def main(): X, y, words, tags = load_dataset(DATA_PATH) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15) inp = Input(shape=(MAX_LEN, )) model = Embedding(input_dim=len(words) + 2, output_dim=EMBEDDING_SIZE, input_length=MAX_LEN, mask_zero=True)(inp) model = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(model) model = TimeDistributed(Dense(50, activation="relu"))(model) crf = CRF(len(tags) + 1) # CRF layer out = crf(model) # output model = Model(inp, out) model.compile(optimizer="rmsprop", loss=crf.loss, metrics=[crf.accuracy]) model.summary() checkpointer = ModelCheckpoint(filepath='model.h5', verbose=0, mode='auto', save_best_only=True, monitor='val_loss') history = model.fit(X_train, np.array(y_train), batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.1, callbacks=[checkpointer])
def build_crf(self): reg = tf.keras.regularizers.L2(1e-3) input = Input(shape=(self.seq_len, self.feat_dim), dtype='float32') mid = Dense(self.num_classes, input_shape=(self.seq_len, self.feat_dim), activation='linear', kernel_regularizer=reg)(input) crf = CRF(dtype='float32', sparse_target=True) crf.sequence_lengths = self.seq_len crf.output_dim = self.num_classes output = crf(mid) model = Model(input, output) opt = tf.keras.optimizers.Adam(learning_rate=0.01) model.compile(loss=crf.loss, optimizer=opt, metrics=[crf.accuracy]) model.load_weights(self.crf_weights) self.crf = model
def get_compiled_model(self, vectorizer_model_name, missing_values_handled, max_sentence_length, max_word_length, n_words, n_chars, n_tags, word2idx): vectorizer_model_settings = models[vectorizer_model_name] vectorizer_model_size = vectorizer_model_settings['vector_size'] word_in = Input(shape=(max_sentence_length, )) if not vectorizer_model_settings['precomputed_vectors']: emb_word = Embedding(input_dim=n_words + 2, output_dim=vectorizer_model_size, input_length=max_sentence_length, mask_zero=True)(word_in) else: embedding_weights = get_embedding_weights(vectorizer_model_name, vectorizer_model_size, missing_values_handled, word2idx) emb_word = Embedding(input_dim=n_words + 2, output_dim=vectorizer_model_size, input_length=max_sentence_length, mask_zero=True, weights=[embedding_weights], trainable=False)(word_in) # input and embeddings for characters char_in = Input(shape=( max_sentence_length, max_word_length, )) emb_char = TimeDistributed( Embedding(input_dim=n_chars + 2, output_dim=10, mask_zero=True))(char_in) # character LSTM to get word encodings by characters char_enc = TimeDistributed( LSTM(units=20, return_sequences=False, recurrent_dropout=0.5))(emb_char) # main LSTM x = concatenate([emb_word, char_enc]) # x = SpatialDropout1D(0.3)(x) model = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(x) model = TimeDistributed(Dense(50, activation='relu'))(model) crf = CRF(n_tags + 1) out = crf(model) model = Model([word_in, char_in], out) model.summary() model.compile(optimizer="rmsprop", loss=crf.loss, metrics=[crf.accuracy]) return model
def test_model(): inputs = Input(shape=(None, ), dtype='int32') output = Embedding(100, 40, trainable=True, mask_zero=True)(inputs) output = Bidirectional(GRU(64, return_sequences=True))(output) output = Dense(9, activation=None)(output) crf = CRF(dtype='float32', name='crf') output = crf(output) base_model = Model(inputs, output) model = ModelWithCRFLoss(base_model) model.compile(optimizer='adam') return model
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.num_labels = config.num_labels self.config = config self.bert = TFBertMainLayer(self.config, name="bert") self.bilstm = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1)) self.dropout = Dropout(0.2) self.time_distributed = TimeDistributed( Dense(self.num_labels, activation="relu")) self.crf = CRF(self.num_labels + 1)
def __call__(self): self.reg = tf.keras.regularizers.L2(self.L2) input = Input(shape=(self.seq_dim, self.feat_dim), dtype='float32') mid = Dense(self.num_classes, input_shape=(self.seq_dim, self.feat_dim), activation='softmax', kernel_regularizer=self.reg)(input) self.crf = CRF(dtype='float32', sparse_target=True) self.crf.sequence_lengths = self.seq_dim self.crf.output_dim = self.num_classes output = self.crf(mid) model = Model(input, output) self.model = model return model
def test(): inputs = Input(shape=(None, ), dtype='int32') output = Embedding(100, 40, trainable=True, mask_zero=True)(inputs) output = Bidirectional(GRU(64, return_sequences=True))(output) output = Dense(9, activation=None)(output) crf = CRF(dtype='float32') output = crf(output) model = Model(inputs, output) model.compile(loss=crf.loss, optimizer='adam', metrics=[crf.accuracy]) x = [[5, 2, 3] * 3] * 10 y = [[1, 2, 3] * 3] * 10 model.fit(x=x, y=y, epochs=2, batch_size=2) model.save('model')
def test(): inputs = Input(shape=(None, ), dtype='int32') output = Embedding(100, 40, trainable=True, mask_zero=True)(inputs) output = Bidirectional(GRU(64, return_sequences=True))(output) output = Dense(9, activation=None)(output) crf = CRF(dtype='float32') output = crf(output) base_model = Model(inputs, output) model = ModelWithCRFLoss(base_model) model.compile(optimizer='adam') x = np.array([[5, 2, 3] * 3] * 100) y = np.array([[1, 2, 3] * 3] * 100) model.fit(x=x, y=y, epochs=10, batch_size=4, validation_split=0.1) model.save('model')
def _bi_lstm_crf_model(n_words: int, n_tags: int, max_len: int): """Model""" input_ = tf.keras.layers.Input(shape=(max_len, ), name='input_layer') embedding_layer = tf.keras.layers.Embedding(input_dim=n_words + 2, output_dim=50, mask_zero=True, name='embedding_layer')(input_) lstm_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(units=128, return_sequences=True, activation="relu", recurrent_dropout=0.4))(embedding_layer) tensor = tf.keras.layers.Dropout(0.4)(lstm_layer) tensor = tf.keras.layers.Dense(n_tags)(tensor) crf = CRF(n_tags) output = crf(tensor) model = tf.keras.models.Model(input_, output) return model
def __init__(self, vocab_size=10, num_states=4, embedding_dim=16, rnn_units=8): super(BiRNNCRF, self).__init__() # 0 for `pad`, 1 for `unk` self.embedding_layer = Embedding(input_dim=vocab_size + 2, output_dim=embedding_dim) # merge_mode: sum, mul, concat, ave. Default is `concat`. self.bi_rnn_layer = Bidirectional(GRU(units=rnn_units, return_sequences=True), merge_mode="ave") # 4-tag: B, M, E, S, `0` for pad self.dense_layer = Dense(units=5, activation='softmax') self.crf_layer = CRF()
def get_compiled_model(self, vectorizer_model_name, missing_values_handled, max_sentence_length, max_word_length, n_words, n_tags, word2idx): vectorizer_model_settings = models[vectorizer_model_name] vectorizer_model_size = vectorizer_model_settings['vector_size'] word_in = Input(shape=(max_sentence_length, )) if not vectorizer_model_settings['precomputed_vectors']: emb_word = Embedding(input_dim=n_words + 2, output_dim=vectorizer_model_size, input_length=max_sentence_length, mask_zero=True)(word_in) else: embedding_weights = get_embedding_weights(vectorizer_model_name, vectorizer_model_size, missing_values_handled, word2idx) emb_word = Embedding(input_dim=n_words + 2, output_dim=vectorizer_model_size, input_length=max_sentence_length, mask_zero=True, weights=[embedding_weights], trainable=False)(word_in) model = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(emb_word) model = TimeDistributed(Dense(50, activation='relu'))(model) # print(dir(CRF)) crf = CRF(n_tags + 1) out = crf(model) model = Model(word_in, out) model.summary() model.compile(optimizer="rmsprop", loss=crf.loss, metrics=[crf.accuracy]) return model
with open('w2i.json', encoding="utf8") as json_file: word2idx = json.load(json_file) with open('i2w.json', encoding="utf8") as json_file: idx2word = json.load(json_file) max_len=223 model=Sequential() model.add(Embedding(input_dim=len(word2idx), output_dim=40, input_length=max_len, mask_zero=False)) model.add(Bidirectional(LSTM(units=64, return_sequences=True, recurrent_dropout=0.2, dropout=0.2))) model.add(TimeDistributed(Dense(64, activation="relu"))) model.add(Dropout(0.2)) model.add(TimeDistributed(Dense(64, activation="relu"))) model.add(Dropout(0.2)) crf = CRF(4) # CRF layer model.add(crf) model.load_weights('model.h5') name=[] for s in sentences: s_ids=[] for w in s: if (w in word2idx): s_ids.append(word2idx[w]) else: s_ids.append(word2idx['UNK']) X = pad_sequences(maxlen=max_len, sequences=[s_ids], padding="post", value=word2idx["PAD"]) predict = model.predict(X) for i in range(max_len): if predict[0][i] == 1:
trainable=True, activity_regularizer=l1(0.0000001))(inpt) # name='word_embedding' # recurrent_dropout=0.1 (recurrent_dropout: 10% possibility to drop of the connections that simulate LSTM memory cells) # units = 100 / 0.55 = 182 neurons (to account for 0.55 dropout) model = Bidirectional( LSTM(units=100, return_sequences=True, activity_regularizer=l1(0.0000000001), recurrent_constraint=max_norm(2)))( model) # input_shape=(1, MAX_LEN, VECT_SIZE) # model = Dropout(0.3)(model) # 0.5 # model = TimeDistributed(Dense(number_labels, activation="relu"))(model) # a dense layer as suggested by neuralNer model = Dense(number_labels, activation=None)( model) # activation='linear' (they are the same) crf = CRF() # CRF layer { SHOULD I SET -> number_labels+1 (+1 -> PAD) } out = crf(model) # output model = Model(inputs=inpt, outputs=out) # set optimizer # decay=learning_rate / epochs opt = SGD(learning_rate=0.0, momentum=0.9, clipvalue=5.0 ) # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5] #opt = SGD(learning_rate=0.05, decay=0.01, momentum=0.9, clipvalue=5.0) # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5] # compile Bi-LSTM-CRF model.compile(optimizer=opt, loss=crf.loss, metrics=[crf.accuracy]) # model.compile(optimizer=opt, loss=crf.loss, metrics=[crf.viterbi_accuracy]) print('BEFORE TRAINING', model.get_weights())
"""# CRF ## Model """ inputs = Input((MAX_SEQ_LENGTH, )) output = Embedding(len(train_val_vocab), embedding_dimension, embeddings_initializer=tf.keras.initializers.Constant( embedding_matrix_train_validate), trainable=False, mask_zero=True)(inputs) output = Bidirectional(LSTM(crf_rnn_units, return_sequences=True))(output) output = TimeDistributed(Dense(len(pos_tags)))(output) crf = CRF(dtype='float32') output = crf(output) base_model = Model(inputs, output) model_CRF = ModelWithCRFLoss(base_model) model_CRF.compile(optimizer='adam') model_CRF.build((None, MAX_SEQ_LENGTH)) model_CRF.summary() """## Fit""" Y_train_crf = flatten_y(Y_train) Y_validate_crf = flatten_y(Y_validate) model_CRF.fit(X_train, Y_train_crf, validation_data=(X_validate, Y_validate_crf), batch_size=crf_batch_size, epochs=gru_epochs,
st.set_option('deprecation.showPyplotGlobalUse', False) # Setting global network params SEQ_LEN = 1024 NUM_CLASSES = 25 FEAT_DIM = 128 # Since tf2crf is a custom class, need to build the CRF model and load the weights (can not load the model directly) reg = tf.keras.regularizers.L2(1e-3) input = Input(shape=(SEQ_LEN, FEAT_DIM), dtype='float32') mid = Dense(NUM_CLASSES, input_shape=(SEQ_LEN, FEAT_DIM), activation='linear', kernel_regularizer=reg)(input) crf = CRF(dtype='float32', sparse_target=True) crf.sequence_lengths = SEQ_LEN crf.output_dim = NUM_CLASSES output = crf(mid) model = Model(input, output) opt = tf.keras.optimizers.Adam(learning_rate=0.01) model.compile(loss=crf.loss, optimizer=opt, metrics=[crf.accuracy]) load_from = './model_01' model.load_weights(load_from) crf = model # Loading FCNN feature extractor cnn = load_model('cnn_extractor.h5')
def __init__(self): super().__init__(name='intent') self.bert = TFBertModel.from_pretrained('bert-base-uncased') self.dropout = Dropout(0.1) self.dense = Dense(9, activation='relu') self.crf = CRF(9)