def create_model(self, vocab_words_size, vocab_pos_size, sequence_length, model_params=None): if model_params is None: model_params = self._get_default_params() word_input = Input(shape=(sequence_length, ), name='words_input') word_pipe = Embedding(input_dim=vocab_words_size + 1, output_dim=model_params['embedding_dim'], input_length=sequence_length, trainable=True)(word_input) word_pipe = Bidirectional(LSTM( model_params['lstm_cells'], return_sequences=True, dropout=model_params['word_lstm_dropout'], recurrent_dropout=model_params['word_lstm_rec_dropout']), merge_mode='concat')(word_pipe) word_pipe = TimeDistributed(Flatten())(word_pipe) pos_input = Input(shape=(sequence_length, ), name='pos_input') pos_pipe = Embedding(input_dim=vocab_pos_size + 1, output_dim=model_params['embedding_dim'], input_length=sequence_length, trainable=True)(pos_input) pos_pipe = Bidirectional(LSTM( model_params['lstm_cells'], return_sequences=True, dropout=model_params['pos_lstm_dropout'], recurrent_dropout=model_params['pos_lstm_rec_dropout']), merge_mode='concat')(pos_pipe) pos_pipe = TimeDistributed(Flatten())(pos_pipe) # Concatenate both inputs comb_pipe = concatenate([word_pipe, pos_pipe]) # Main BiLSTM model comb_pipe = Bidirectional(LSTM(model_params['lstm_cells'], return_sequences=True), merge_mode='concat')(comb_pipe) comb_pipe = TimeDistributed(Dense(64))(comb_pipe) output = CRF(2, name='output')(comb_pipe) model = Model(inputs=[word_input, pos_input], outputs=output) model.compile(loss=crf_loss, optimizer='adam', metrics=[crf_accuracy]) return model
def _mail_model_two(self): output_size = 2 in_mail = Input(shape=(None, self.line_embedding_size), dtype='float32') # batches of arbitrary number of vectors with size self.line_embedding_size mask = Masking()(in_mail) hidden = Bidirectional(GRU(32 // 2, return_sequences=True, implementation=0))(mask) crf = CRF(output_size, sparse_target=False) # , test_mode='marginal', learn_mode='marginal') output = crf(hidden) model = KerasModel(inputs=in_mail, outputs=output) # model.compile(loss=crf.loss_function, optimizer='adam', metrics=[crf.accuracy]) return model
def load(self, path): """Load model from a path Args: path (str): model file path """ adam = k.optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999) crf = CRF(self.n_tags) self.model = load_model(path, compile=False, custom_objects={'CRF': CRF}) self.model.compile(optimizer=adam, loss=crf.loss_function, metrics=[crf.accuracy, 'accuracy']) self.model.summary()
def model(train=True): (train_X, train_Y), (dev_X, dev_Y), (vocab, chunks) = load_data() model = Sequential() model.add(Embedding(len(vocab), EMBED_DIM, mask_zero=True)) model.add(Bidirectional(LSTM(BIRNN_UNITS // 2, return_sequences=True))) crf = CRF(len(chunks), sparse_target=True) model.add(crf) model.summary() model.compile('adam', loss=crf.loss_function, metrics=[crf.accuracy]) if train: return model
def create_model2(): model = Sequential() model.add( Embedding(input_dim=len(char_dict), output_dim=10, input_length=100, mask_zero=False)) model.add(Bidirectional(LSTM(units=32, return_sequences=True))) crf = CRF(7, sparse_target=False) model.add(crf) model.compile('adam', loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() return model
def bilstm_cnn_crf(maxlen, useful_word_len, class_label_count, embedding_size, embedding_weights=None, is_train=True): word_input = Input(shape=(maxlen, ), dtype="int32", name="word_input") if is_train: word_emb = Embedding(useful_word_len, output_dim=embedding_size, input_length=maxlen, weights=[embedding_weights], \ name="word_emb")(word_input) else: word_emb = Embedding(useful_word_len, output_dim=embedding_size, input_length=maxlen, \ name="word_emb")(word_input) # bilstm bilstm = Bidirectional(LSTM(64, return_sequences=True))(word_emb) bilstm_drop = Dropout(0.1)(bilstm) bilstm_dense = TimeDistributed(Dense(embedding_size))(bilstm_drop) #cnn half_window_size = 2 filter_kernel_number = 64 padding_layer = ZeroPadding1D(padding=half_window_size)(word_emb) conv = Conv1D(nb_filter=filter_kernel_number, filter_length=2 * half_window_size + 1, padding="valid")(padding_layer) conv_drop = Dropout(0.1)(conv) conv_dense = TimeDistributed(Dense(filter_kernel_number))(conv_drop) #merge rnn_cnn_merge = concatenate([bilstm_dense, conv_dense], axis=2) # rnn_cnn_merge = merge([bilstm_dense, conv_dense], mode="concat", concat_axis=2) dense = TimeDistributed(Dense(class_label_count))(rnn_cnn_merge) # crf crf = CRF(class_label_count, sparse_target=False) crf_output = crf(dense) # model model = Model(input=[word_input], output=crf_output) model.compile(loss=crf.loss_function, optimizer="adam", metrics=[crf.accuracy]) return model
def compile(self, tokenizer, data_dir='./data/', embedding_dim=300, dropout_fraction=0.2, hidden_dim=64, embedding_file='glove-sbwc.i25.vec'): # Load embedding layer print('Loading spanish embedding...') embeddings_index = {} f = open(os.path.join(data_dir, embedding_file), 'r') for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs f.close() print(('Found %s word vectors.' % len(embeddings_index))) # Create embedding layer print('Creating embedding layer...') embedding_matrix = np.zeros((len(tokenizer.tokenizer.word_index) + 1, embedding_dim)) for word, i in list(tokenizer.tokenizer.word_index.items()): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector # Create network print('Creating network...') self.model = Sequential() self.model.add(Embedding(len(tokenizer.tokenizer.word_index) + 1, embedding_dim, weights=[embedding_matrix], input_length=tokenizer.max_sequence_length, trainable=False, mask_zero=True)) self.model.add(Bidirectional(LSTM(hidden_dim, return_sequences=True))) #self.model.add(TimeDistributed(Dense(hidden_dim))) self.model.add(TimeDistributed(Dense(hidden_dim, activation='relu'))) #CRF crf = CRF(len(self.tag_map),learn_mode='marginal') self.model.add(crf) # Compile model print('Compiling network...') self.model.compile(loss=crf.loss_function, optimizer='adam', metrics=[crf.accuracy]) self.model.summary()
def CNN_CRF_char(charvocabsize, targetvocabsize, posivocabsize, wordvobsize, char_W, posi_W, word_W, input_seq_lenth, char_k, posi_k, word_k, batch_size=16): char_input = Input(shape=(input_seq_lenth, ), dtype='int32') char_embedding_RNN = Embedding(input_dim=charvocabsize + 1, output_dim=char_k, input_length=input_seq_lenth, mask_zero=False, trainable=True, weights=[char_W])(char_input) char_embedding = Dropout(0.5)(char_embedding_RNN) posi_input = Input(shape=(input_seq_lenth, ), dtype='int32') cnn3 = Conv1D(100, 3, activation='relu', strides=1, padding='same')(char_embedding) cnn4 = Conv1D(50, 4, activation='relu', strides=1, padding='same')(char_embedding) cnn2 = Conv1D(50, 2, activation='relu', strides=1, padding='same')(char_embedding) cnn5 = Conv1D(50, 5, activation='relu', strides=1, padding='same')(char_embedding) cnns = concatenate([cnn5, cnn3, cnn4, cnn2], axis=-1) cnns = BatchNormalization(axis=1)(cnns) cnns = Dropout(0.5)(cnns) TimeD = TimeDistributed(Dense(targetvocabsize + 1))(cnns) crflayer = CRF(targetvocabsize + 1, sparse_target=False) model = crflayer(TimeD) Models = Model([char_input, posi_input], model) Models.compile(loss=crflayer.loss_function, optimizer=optimizers.Adam(lr=0.001), metrics=[crflayer.accuracy]) return Models
def prepare_kadjk_model(max_mini_batch_size, max_conversation_length, timesteps, num_word_dimensions, word_to_index, word_vec_dict, num_tags, loss_function, optimizer): #Hyperparameters m = timesteps h = timesteps model = Sequential() dictionary_size = len(word_to_index) + 1 print('dictionary_size:' + str(dictionary_size)) embedding_weights = numpy.zeros((dictionary_size, num_word_dimensions)) for word, index in word_to_index.items(): embedding_weights[index, :] = word_vec_dict[word] # define inputs here embedding_layer = Embedding(dictionary_size, num_word_dimensions, weights=[embedding_weights], embeddings_regularizer=regularizers.l2(0.0001)) model.add( TimeDistributed(embedding_layer, input_shape=(max_conversation_length, timesteps))) # model.add(TimeDistributed(Bidirectional(LSTM(m // 2, return_sequences=True, # kernel_regularizer=regularizers.l2(0.0001))))) # model.add(TimeDistributed(Dropout(0.2))) # model.add(TimeDistributed(GlobalAveragePooling1D())) model.add( TimeDistributed( Bidirectional( LSTM(m // 2, kernel_regularizer=regularizers.l2(0.0001))))) model.add(Dropout(0.2)) model.add( Bidirectional(LSTM(h // 2, return_sequences=True, kernel_regularizer=regularizers.l2(0.0001)), merge_mode='concat')) model.add(Dropout(0.2)) crf = CRF(num_tags, sparse_target=False, kernel_regularizer=regularizers.l2(0.0001)) print("Before CRF: %s" % str(model.output_shape)) model.add(crf) model.compile(optimizer, loss=crf_loss, metrics=[crf_accuracy]) #TODO: Can we support providing custom loss functions like Lee-Dernoncourt model? return model
def build_model(self): input = Input(shape=(self.max_len, )) model = Embedding(input_dim=self.n_words, output_dim=50, input_length=self.max_len, mask_zero=True)(input) # 50-dim embedding model = Dropout(0.1)(model) model = Bidirectional( LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model) model = TimeDistributed(Dense(50, activation="relu"))( model) # a dense layer as suggested by neuralNer crf = CRF(self.n_tags) # CRF layer out = crf(model) # output model = Model(input, out) self.model = model
def stackedLSTM(embeddings=[], word2iddict=w2id, drop=tr_dropout, hidden_layers=hdn_layers, embed_dims=dimensions): seq_input = Input(batch_shape=BATCH_SHAPE) seq = Embedding(input_dim=len(word2iddict), output_dim=embed_dims, weights=[embeddings], name='seq')(seq_input) kGEN = Bidirectional(LSTM(dimensions, return_sequences=True, activation='relu'))(seq) kGEN = LSTM(N_CLASSES, return_sequences=True, activation='softmax')(kGEN) kGEN = TimeDistributed(Dense(N_CLASSES, activation='relu'))(kGEN) crf = CRF(N_CLASSES) seq_out = crf(kGEN) model = Model(inputs=seq_input, outputs=seq_out) model.compile(optimizer='rmsprop', loss=crf.loss_function, metrics=[crf.accuracy]) print(model.metrics_names) return model
def model_create(self): inputs = Input(shape=(data_prossor.maxlen, ), dtype='int32') x = Embedding(self.max_wnum, self.embd_dim, mask_zero=True)(inputs) x = Bidirectional(LSTM(self.lstmunit, return_sequences=True))(x) x = Dropout(self.drop_rate)(x) # Bi-LSTM展开输出 x = Dense(self.label_num)(x) crf = CRF(units=self.label_num, sparse_target=True)(x) model = Model(inputs=inputs, outputs=crf) model.summary() model.compile(optimizer='adam', loss=losses.crf_loss, metrics=[metrics.crf_accuracy]) plot_model(model, to_file="{}{}".format(self.model_path, 'bilstm-crf.png')) return model
def modeling(train=True): model = Sequential() model.add(Embedding(len(load_vocab()) + 1, EMBED_DIM, mask_zero=True)) model.add(Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True))) crf = CRF(len(chunk_tags), sparse_target=True) model.add(crf) model.summary() plot_model(model, to_file='model/model.png', show_shapes=True) model.compile(Adam(), loss=crf.loss_function, metrics=[crf.accuracy]) if exists(filepath): model.load_weights(filepath) if train: x, y = load_data() model.fit(x, y, batch_size, epochs, verbose, callbacks, validation_split) model.save_weights(filepath) return model
def get_model(self): # main #char_input = Input(shape=(None,), name='main_input') x = self.embedding.model.output x = Bidirectional( CuDNNLSTM(self.parms['n_lstm'], return_sequences=True))(x) crf = CRF(self.parms['n_entity'], sparse_target=True) output = crf(x) model = Model(inputs=[self.embedding.model.inputs], outputs=output) model.compile(optimizer="adam", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() return model
def NamedEntityRecognizer(seq_len, voc_dim, emb_dim, n_class): print("Initializing the Named Entity Recognizer...") inputs = Input((seq_len, )) embeddings = Embedding(voc_dim + 2, emb_dim)(inputs) bilstm = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(embeddings) hidden = TimeDistributed(Dense(150, activation="relu"))(bilstm) outputs = CRF(n_class)(hidden) return keras.Model(inputs=inputs, outputs=outputs)
def build_model(self): input_layer = Input(shape=(self.max_sent_len,), name='input_layer') embed_layer = Embedding(self.vocab_size, self.emb_dims, mask_zero=True, name='embedding_layer')(input_layer) birnn_layer = Bidirectional(LSTM(self.birnn_unit // 2, return_sequences=True, name = 'bilstm_layer'))(embed_layer) crf = CRF(self.tag_num, sparse_target=False, name='crf_layer') crf_layer = crf(birnn_layer) model = Model(inputs=input_layer, outputs=crf_layer) model.compile(optimizer='adam', loss=crf.loss_function, metrics=[crf.accuracy]) return model
def BiLSTM_CRF_attention(self, wordsids, classes, train, val, train_label, val_label, istrain=True): now = datetime.now() self.gpu_config() output_dim = 50 lstm_cell = 50 span = 2 * 60 inputs = Input(shape=(span, )) emb = Embedding(len(wordsinds), output_dim, mask_zero=False)(inputs) bd = Bidirectional(LSTM(lstm_cell, return_sequences=True))(emb) bd_d = Dropout(0.5)(bd) td = TimeDistributed(Dense(len(classes)))(bd_d) at = AttentionLayer()(td) td_d = Dropout(0.5)(at) crf = CRF(len(classes), sparse_target=True)(td_d) model = Model(inputs, crf) checkpoint = ModelCheckpoint("model/model_{}.h5".format( now.strftime("%d_%H_%M")), monitor="val_acc", verbose=1, save_best_only=True, mode="max") model.compile(keras.optimizers.Adam(1e-2), loss=crf.loss_function, metrics=[crf.accuracy]) if istrain: history = model.fit( train, train_label, self.batch_size, epochs=self.epochs, validation_data=(val, val_label), # steps_per_epoch = 1, # validation_steps = 1, callbacks=[checkpoint]) return model, history else: return model
def build_model(vectorizer, embed_dim, num_layers, recurrent_dim, lr, dropout, no_crf=False, num_classes=3): input_ = Input(shape=(vectorizer.max_len, ), dtype='int32') m = Embedding(input_dim=len(vectorizer.syll2idx), output_dim=embed_dim, mask_zero=True, input_length=vectorizer.max_len)(input_) m = Dropout(dropout)(m) for i in range(num_layers): if i == 0: curr_input = m else: curr_input = curr_enc_out curr_enc_out = Bidirectional(LSTM(units=recurrent_dim, return_sequences=True, activation='tanh', recurrent_dropout=dropout, name='enc_lstm_' + str(i + 1)), merge_mode='sum')(curr_input) curr_enc_out = Dropout(dropout)(curr_enc_out) dense = TimeDistributed(Dense(num_classes, activation='relu'), name='dense')(curr_enc_out) optim = Adam(lr=lr) if not no_crf: crf = CRF(num_classes) output_ = crf(dense) model = Model(inputs=input_, outputs=output_) model.compile(optimizer=optim, loss=crf_loss, metrics=[crf_viterbi_accuracy]) else: output_ = Activation('softmax', name='out')(dense) model = Model(inputs=input_, outputs=output_) model.compile(optimizer=optim, loss={'out': 'categorical_crossentropy'}, metrics=['accuracy']) return model
def embed_bilstm_crf_model_with_pos(self): input_char = Input(shape=(self.max_len, )) input_word = Input(shape=(self.max_word_len, )) input_pos = Input(shape=(self.max_word_len, )) input_bigram = Input(shape=(self.max_bigram_len, )) char_embedding = Embedding(input_dim=self.max_vocab_size, output_dim=self.embed_dim, weights=[self.char_text_embeddings], trainable=self.embed_trainable, mask_zero=False) word_embedding = Embedding(input_dim=self.max_word_vocab_size, output_dim=self.word_embed_dim, weights=[self.word_text_embeddings], trainable=self.embed_trainable, mask_zero=False) pos_embedding = Embedding(input_dim=60, output_dim=self.word_embed_dim) # bigram_embedding = Embedding(input_dim=self.max_bigram_vocab_size, output_dim=self.bigram_embed_dim) char_embed_output = char_embedding(input_char) word_embed_output = word_embedding(input_word) pos_embed_output = pos_embedding(input_pos) # bigram_embed_output = bigram_embedding(input_bigram) concat_output = Concatenate(axis=1)( [char_embed_output, word_embed_output, pos_embed_output]) lstm_out = Bidirectional(LSTM(self.lstm_units, return_sequences=True))(concat_output) att_out = SeqSelfAttention(attention_activation="sigmoid")(lstm_out) dropout = Dropout(0.3)(att_out) permute_concat_output = Permute((2, 1))(dropout) dense_out = Dense(self.max_len, activation="relu")(permute_concat_output) permute_dense_out = Permute((2, 1))(dense_out) dense = TimeDistributed(Dense(9, activation="relu"))( permute_dense_out) ## 9是词性标注的种类个数 crf_tags = CRF(9, learn_mode="marginal", sparse_target=True)(dense) model = Model(inputs=[input_char, input_word, input_pos], outputs=crf_tags) model.summary() model.compile(optimizer=Adam(lr=0.01), loss=crf_loss, metrics=[crf_accuracy]) plot_model(model, to_file=os.path.join(self.saved_model_dir, 'embed_bilstm_crf_model.png'), show_shapes=True) return model
def build(self): # build word embedding word_ids = Input(batch_shape=(None, None), dtype='int32') lengths = Input(batch_shape=(None, None), dtype='int32') inputs = [word_ids] if self._embeddings is None: word_embeddings = Embedding(input_dim=self._word_vocab_size, output_dim=self._word_embedding_dim)(word_ids) else: word_embeddings = Embedding(input_dim=self._embeddings.shape[0], output_dim=self._embeddings.shape[1], weights=[self._embeddings])(word_ids) # build character based word embedding if self._use_char: char_ids = Input(batch_shape=(None, None, None), dtype='int32') inputs.append(char_ids) char_embeddings = Embedding(input_dim=self._char_vocab_size, output_dim=self._char_embedding_dim)(char_ids) s = K.shape(char_embeddings) char_embeddings = Lambda(lambda x: K.reshape(x, shape=(-1, s[-2], self._char_embedding_dim)))(char_embeddings) fwd_state = LSTM(self._char_lstm_size, return_state=True)(char_embeddings)[-2] bwd_state = LSTM(self._char_lstm_size, return_state=True, go_backwards=True)(char_embeddings)[-2] char_embeddings = Concatenate(axis=-1)([fwd_state, bwd_state]) # shape = (batch size, max sentence length, char hidden size) char_embeddings = Lambda(lambda x: K.reshape(x, shape=[-1, s[1], 2 * self._char_lstm_size]))(char_embeddings) # combine characters and word word_embeddings = Concatenate(axis=-1)([word_embeddings, char_embeddings]) inputs.append(lengths) word_embeddings = Dropout(self._dropout)(word_embeddings) z = Bidirectional(LSTM(units=self._word_lstm_size, return_sequences=True))(word_embeddings) z = Dropout(self._dropout)(z) z = Dense(self._fc_dim, activation='tanh')(z) z = Dense(self._fc_dim, activation='tanh')(z) if self._use_crf: crf_layer = CRF(self._num_labels, sparse_target=False) self._loss = crf_layer.loss_function pred = crf_layer(z) else: self._loss = 'categorical_crossentropy' pred = Dense(self._num_labels, activation='softmax')(z) self.model = Model(inputs=inputs, outputs=pred)
def create_conditional_random_field(max_sentence_length: int, n_classes: int, l2_kernel: float, l2_chain: float) -> Sequential: model = Sequential() crf = CRF(units=n_classes, learn_mode='join', test_mode='viterbi', kernel_regularizer=(l2(l2_kernel) if l2_kernel > 0.0 else None), chain_regularizer=(l2(l2_chain) if l2_chain > 0.0 else None)) model.add( Masking(mask_value=0.0, input_shape=(max_sentence_length, EMBEDDING_SIZE))) model.add(crf) model.compile(optimizer='rmsprop', loss=crf.loss_function, metrics=[crf.accuracy]) return model
def _create_model(self): """ 创建训练使用的模型 """ model = Sequential() model.add( Embedding(len(self.vocab), self.embedding_dim, mask_zero=True)) model.add( Bidirectional(LSTM(self.birnn_units // 2, return_sequences=True))) crf = CRF(len(self.tags), sparse_target=True) model.add(crf) model.summary() # model.compile('adam',loss=crf.loss_function,metrics=[crf.accuracy]) rms_prop = optimizers.RMSprop(lr=0.01, decay=1e-4) model.compile(optimizer=rms_prop,loss=crf.loss_function,metrics=\ [crf.accuracy]) return model
def get_model_lstm(): caseEmbeddings = np.identity(len(case2Idx), dtype='float32') words_input = Input(shape=(None, nb_embedding_dims), dtype='float32', name='words_input') casing_input = Input(shape=(None,), dtype='int32', name='casing_input') casing = Embedding(output_dim=caseEmbeddings.shape[1], input_dim=caseEmbeddings.shape[0], weights=[caseEmbeddings], trainable=False, name = 'case_embed')(casing_input) character_input=Input(shape=(None,nb_char_embeddings,),name='char_input') embed_char_out=TimeDistributed(Embedding(len(char2Idx),32,embeddings_initializer=RandomUniform(minval=-0.5, maxval=0.5)), name='char_embedding')(character_input) char_lstm = TimeDistributed(Bidirectional(LSTM(50, name = 'char_lstm')))(embed_char_out) output = concatenate([words_input, casing, char_lstm]) output = Bidirectional(LSTM(200, return_sequences=True, dropout=0.50, recurrent_dropout=0.5, name = 'token_lstm'))(output) # output = TimeDistributed(Dense(len(label2Idx), activation='relu', name = 'token_dense'))(output) crf = CRF(len(label2Idx), name = 'crf') output = crf(output) model = Model(inputs=[words_input, casing_input, character_input], outputs=[output]) model.compile(loss=crf.loss_function, optimizer='nadam', metrics=[crf.accuracy]) model.summary() return(model)
def build_model(self): model = Sequential() if self.__pre_trained: model.add( Embedding(input_dim=self.__words_weights.shape[0], output_dim=self.__words_weights.shape[1], input_length=self.__max_len, weights=[self.__words_weights], trainable=False)) else: model.add( Embedding(input_dim=self.__num_words, output_dim=self.__max_len, input_length=self.__max_len, trainable=False)) model.add( Bidirectional( LSTM(units=self.__max_len // 2, return_sequences=True, recurrent_dropout=0.1))) model.add(Dropout(self.__dropout)) model.add( TimeDistributed(Dense(units=self.__num_tags, activation='relu'))) if self.__isa_crf: from keras_contrib.layers import CRF from keras_contrib.losses import crf_loss model.add(CRF(units=self.__num_tags)) # , metrics=[crf_viterbi_accuracy, crf_accuracy]) model.compile(optimizer=Adam(learning_rate=self.__learning_rate), loss=crf_loss) else: model.add(Dense(units=self.__num_tags, activation='softmax')) model.compile(optimizer=Adam(learning_rate=self.__learning_rate), loss="categorical_crossentropy") return model
def run(X_train, Y_train, X_val, Y_val, embedding_matrix, vocab_size, maxlen=40, emb_dim=300, neg_ratio=0, hidden_dim=300, drop=0.2, r_drop=0.1): ##build model # input = Input(shape=(maxlen,)) # model = Embedding(vocab_size, emb_dim, weights=[embedding_matrix], input_length=maxlen, trainable=False)(input) # model = Dropout(drop)(model) # model = Bidirectional(LSTM(hidden_dim, return_sequences=True, recurrent_dropout=r_drop))(model) # model = Dropout(drop)(model) # out = TimeDistributed(Dense(1, activation='sigmoid'))(model) input = Input(shape=(maxlen,)) model = Embedding(vocab_size, emb_dim, weights=[embedding_matrix], input_length=maxlen, trainable=False)(input) model = Bidirectional(LSTM(hidden_dim, return_sequences=True, recurrent_dropout=r_drop))(model) model = TimeDistributed(Dense(hidden_dim//4, activation='relu'))(model) model = TimeDistributed(Dropout(drop))(model) ##use CRF instead of Dense crf = CRF(2) out = crf(model) model = Model(input, out) Y_train_2 = keras.utils.to_categorical(Y_train) Y_val_2 = keras.utils.to_categorical(Y_val) model.compile(optimizer='adam', loss=crf.loss_function, metrics=[crf.accuracy]) earlyStop = [EarlyStopping(monitor='val_loss', patience=1)] history = model.fit(X_train, Y_train_2, batch_size=64, epochs=10, validation_data=(X_val, Y_val_2), callbacks=earlyStop) preds = model.predict(X_val) test = [[np.argmax(y) for y in x] for x in preds] test_arr = np.asarray(test) test_arr = np.reshape(test_arr, (-1)) print (metrics.precision_recall_fscore_support(np.reshape(Y_val,(-1)), test_arr, average=None, labels=[0, 1])) # Y_pred_ = [[1 if y>=threshold else 0 for y in x] for x in Y_pred] Y_val_ = np.squeeze(Y_val) print ("Evaluate: dev seg exact") pred_out_dir = out_dir+'seg_'+str(neg_ratio)+'neg' gold_dir = '../../data/val_segs/'+'seg_'+str(neg_ratio)+'neg' p, r, f = seg_exact_match(test, Y_val_, pred_out_dir, gold_dir) return model, history, p, r, f
def build_model(tag_num, max_seq_len, features=1024, embedding_dim=100, embedding_weights=None, rnn_units=100, return_attention=False, lr=1e-3): """Build the model for predicting tags. :param token_num: Number of tokens in the word dictionary. :param tag_num: Number of tags. :param embedding_dim: The output dimension of the embedding layer. :param embedding_weights: Initial weights for embedding layer. :param rnn_units: The number of RNN units in a single direction. :param return_attention: Whether to return the attention matrix. :param lr: Learning rate of optimizer. :return model: The built model. """ input_layer = keras.layers.Input(shape=(max_seq_len,features)) lstm_layer = keras.layers.Bidirectional(keras.layers.LSTM(units=rnn_units, recurrent_dropout=0.4, return_sequences=True), name='Bi-LSTM')(input_layer) attention_layer = SeqSelfAttention(attention_activation='sigmoid', attention_width=9, return_attention=return_attention, name='Attention')(lstm_layer) if return_attention: attention_layer, attention = attention_layer crf = CRF(tag_num, sparse_target=True, name='CRF') outputs = [crf(attention_layer)] loss = {'CRF': crf.loss_function} if return_attention: outputs.append(attention) loss['Attention'] = 'categorical_crossentropy' model = keras.models.Model(inputs=input_layer, outputs=outputs) model.compile( optimizer=keras.optimizers.Adam(lr=lr), loss=loss, metrics={'CRF': crf.accuracy}, ) return model
def Bilstm_CNN_Crf(maxlen, char_value_dict_len, class_label_count, embedding_weights=None, is_train=True): word_input = Input(shape=(maxlen, ), dtype='int32', name='word_input') if is_train: word_emb=Embedding(char_value_dict_len+2,output_dim=100,\ input_length=maxlen,weights=[embedding_weights],\ name='word_emb')(word_input) else: word_emb=Embedding(char_value_dict_len+2,output_dim=100,\ input_length=maxlen,\ name='word_emb')(word_input) # bilstm bilstm = Bidirectional(LSTM(64, return_sequences=True))(word_emb) bilstm_d = Dropout(0.1)(bilstm) # cnn # half_window_size=2 # padding_layer=ZeroPadding1D(padding=half_window_size)(word_emb) # conv=Conv1D(nb_filter=50,filter_length=2*half_window_size+1,\ # padding='valid')(padding_layer) # conv_d=Dropout(0.1)(conv) # dense_conv=TimeDistributed(Dense(50))(conv_d) # merge # rnn_cnn_merge=concatenate([bilstm_d,dense_conv],axis=2) # dense=TimeDistributed(Dense(class_label_count))(rnn_cnn_merge) dense = TimeDistributed(Dense(class_label_count))(bilstm_d) # crf crf = CRF(class_label_count, sparse_target=False) crf_output = crf(dense) # build model model = Model(input=[word_input], output=[crf_output]) model.compile(loss=crf.loss_function, optimizer='adam', metrics=[crf.accuracy]) # model.summary() return model
def create(self, train_file_path: str, output_summary: bool = False): if not os.path.exists(train_file_path): raise FileNotFoundError with open(train_file_path, 'r') as train_file: self.max_words_count_in_sentence = pd.read_csv(train_file).groupby( "article_id").size().max() input_layer = Input(shape=(self.max_words_count_in_sentence, )) word_embedding_size = 150 model = Embedding( input_dim=len(self.lang.vocab), output_dim=word_embedding_size, input_length=self.max_words_count_in_sentence)(input_layer) model = Bidirectional( LSTM(units=word_embedding_size, return_sequences=True, dropout=0.5, recurrent_dropout=0.5, kernel_initializer=keras.initializers.he_normal()))(model) model = LSTM(units=word_embedding_size * 2, return_sequences=True, dropout=0.5, recurrent_dropout=0.5, kernel_initializer=keras.initializers.he_normal())(model) model = TimeDistributed(Dense(len(self._tags), activation="relu"))(model) crf = CRF(len(self._tags)) model = Model(input_layer, crf(model)) model.compile(optimizer=keras.optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999), loss=crf.loss_function, metrics=[crf.accuracy, 'accuracy']) if output_summary: model.summary() self._model = model
def bert_BiLSTM_CRF_model(): ner_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, training=True, seq_len=maxlen_ner) bert_output = ner_model.layers[-9].output X = Lambda(lambda x: x[:, 0:input_shape[0]])(bert_output) X = Bidirectional(LSTM(128, return_sequences=True))(X) output = CRF(3, sparse_target=True)(X) ner_model = Model(ner_model.input, output) for layer in ner_model.layers: layer.trainable = False ner_model.layers[-1].trainable = True ner_model.layers[-2].trainable = True return ner_model
def __init__(self, input_size, max_len, embedding_size=32, gru_size=128): self.model = Sequential() # self.model.add(Embedding(input_size, embedding_size, input_length=max_len)) self.model.add( Bidirectional( GRU(units=gru_size, return_sequences=True, recurrent_dropout=0.2, dropout=0.2))) self.model.add(TimeDistributed(Dense(gru_size, activation="relu"))) self.model.add(MaxPooling1D(pool_length=max_len)) crf = CRF(4) self.model.add(crf) self.model.compile(loss=crf.loss_function, optimizer='adam', metrics=['accuracy'])