def fit_lstm_model(X_train, y_train, n_words, n_tags, seq_len, class_weights, epochs): '''Set up LSTM model with one input - equal length sequences of encoded text''' input_seq = Input(shape=(seq_len, )) '''Pass the GloVe pretrained model weights into the embedding layer''' embedding = Embedding(input_dim=n_words, output_dim=300, weights=[embedding_matrix], trainable=True)(input_seq) embedding = Dropout(0.1)(embedding) '''Add Bidirectional LSTM layer, dense hidden layer, and final output layer''' model = Bidirectional( LSTM(units=64, return_sequences=True, recurrent_dropout=0.1))(embedding) model = TimeDistributed(Dense(64, activation='relu'))(model) output = Dense(n_tags, activation="softmax")(model) '''Compile and fit deep neural network''' model = Model(inputs=input_seq, outputs=output) model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) history = model.fit(X_train, y_train, epochs=epochs, batch_size=32, validation_split=0.1, verbose=1, class_weight=[class_weights]) '''Create simple performance report for the model''' val_loss, val_acc = model.evaluate(X_test, y_test) print(f'Model validation loss was {val_loss}') print(f'Model validation accuracy was {val_acc}') return model, history
def build_qt_inference_model(model_settings): # architecture _input = Input(shape=(model_settings['max_len'], model_settings['emb_dim']), name='input') model = Bidirectional(LSTM(units=100, return_sequences=True, dropout=0.5, recurrent_dropout=0.5), name='bilstm1')(_input) # biLSTM model = Bidirectional(LSTM(units=100, return_sequences=False, dropout=0.5, recurrent_dropout=0.5), name='bilstm2')(model) # 2nd biLSTM _output = Dense(model_settings['n_tags'], activation='softmax', name='output')(model) # a dense layer model = Model(_input, _output) model.compile(optimizer=Nadam(clipnorm=1), loss='categorical_crossentropy', metrics=['accuracy']) model.summary() return model
def build_ep_inference_model(model_settings): # architecture input = Input(shape=(model_settings['max_len'], model_settings['emb_dim']), name='input') model = Bidirectional(LSTM(units=100, return_sequences=True), name='bilstm1')(input) # biLSTM model = Bidirectional(LSTM(units=100, return_sequences=True), name='bilstm2')(model) # 2nd biLSTM model = TimeDistributed(Dense(model_settings['n_tags'], activation=None), name='td')(model) # a dense layer crf = CRF(model_settings['n_tags'], name='crf') # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer=Nadam(lr=0.01, clipnorm=1), loss=losses.crf_loss, metrics=[metrics.crf_accuracy]) model.summary() return model
def get_model(): """ Create and return the model. """ # INPUTS inputs = [] models = [] for embed_mod_id, _ in enumerate(embedding_models): inputs.append( Input(shape=( seq_max_length, feature_vec_lengths[embed_mod_id], ))) models.append( Masking(mask_value=[0] * feature_vec_lengths[embed_mod_id])( inputs[-1])) if args.pos: inputs.append(Input(shape=( seq_max_length, len(Utils.poss) + 2, ))) models.append( Masking(mask_value=[0] * (len(Utils.poss) + 2))(inputs[-1])) # Combinde INPUTS (including masks) if len(models) > 1: model = concatenate(models) else: model = models[0] # CORE MODEL model = Bidirectional( LSTM( 50, return_sequences=True, dropout=0, # ! # dropout=0.1, # dropout=0.25, recurrent_dropout=recurrent_dropout))(model) # (unfold LSTM and) # one-hot encode binary label outputs = TimeDistributed(Dense(2, activation="softmax"))(model) model = Model(inputs=inputs, outputs=outputs) model.compile(optimizer=optimizer, loss=loss, metrics=metrics) return model
def initialize(self, ner_corpus, parameters): ''' A method to initialize the NER model. Attributes ---------- ner_corpus : NER_Corpus | Fully developed NER corpus. parameters : dict | Hyperparameters for Bi-LSTM layers. ''' self.word2vector = ner_corpus.word2vector self.max_sent_len = ner_corpus.max_sent_len self.feature_size = ner_corpus.feature_size self.ner_labels = ner_corpus.ner_labels self.word2id = ner_corpus.word2id self.id2word = ner_corpus.id2word self.X = ner_corpus.X_embedded self.Y = ner_corpus.Y_embedded del ner_corpus self.lstm_units = parameters.get('lstm_units') self.lstm_return_sequences = parameters.get('lstm_return_sequences') self.lstm_recurrent_dropout = parameters.get('lstm_recurrent_dropout') self.dense_units = parameters.get('dense_units') self.dense_activation = parameters.get('dense_activation') _input = Input(shape=(self.max_sent_len, self.feature_size)) model = Bidirectional( LSTM(units=self.lstm_units, return_sequences=self.lstm_return_sequences, recurrent_dropout=self.lstm_recurrent_dropout))(_input) model = TimeDistributed( Dense(units=self.dense_units, activation=self.dense_activation))(model) crf = CRF(len(self.ner_labels)) _output = crf(model) model = Model(inputs=_input, outputs=_output) model.compile(optimizer='rmsprop', loss=crf.loss_function, metrics=[crf.accuracy]) self.model = model
def boundary_sensitive_TCN(k): # shot feature extraction inputs = Input(shape=()) model = inputs model = Bidirectional( LSTM(k, dropout=0.25, recurrent_dropout=0.25, return_sequences=True))(model) model = TimeDistributed(Dense(2, activation='softmax'))(model) model = Model(inputs=inputs, outputs=model) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def train_model(self, X, y, labels, word_index, MAX_SEQUENCE_LENGTH, model_save_directory='./models/'): """ Train deep learning model """ embedding_matrix, nb_words = get_embedding('glove',word_index) input1 = Input(shape=(MAX_SEQUENCE_LENGTH,)) embedding = Embedding(input_dim=len(embedding_matrix), output_dim=self.embedding_dim, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False)(input1) # embedding = Dropout(self.drop_rate_embedding)(embedding) model = Bidirectional(LSTM(units=self.num_lstm_units, return_sequences=True, recurrent_dropout=self.drop_rate_lstm))(embedding) model = TimeDistributed(Dense(units=self.num_lstm_units, activation=self.activation_function))(model) crf = CRF(units=len(labels)) output1 = crf(model) model = Model(input1,output1) model.compile(optimizer='rmsprop',\ loss=crf.loss_function,\ metrics=[crf.accuracy]) print(model.summary()) early_stopping = EarlyStopping(monitor='val_loss', patience=3) STAMP = 'lstm_%f_%.2f' % (self.num_lstm_units, self.drop_rate_lstm) checkpoint_dir = model_save_directory + 'checkpoints/' + str(int(time())) + '/' if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) with open(bst_model_path+".json", "w") as json_file: json_file.write(model.to_json())
def BidirLSTM( n_nodes, n_classes, n_feat, max_len=None, causal=True, loss=loss_fct, optimizer="adam", #adam return_param_str=False): inputs = Input(shape=(None, n_feat)) model = Bidirectional( LSTM(n_nodes, return_sequences=True, kernel_initializer=glorot_normal()))(inputs) # model = LSTM(n_nodes, return_sequences=True)(inputs) # # # Birdirectional LSTM # if not causal: # print("--------------------- Bi-directional ------------------------------") # model_backwards = LSTM(n_nodes, return_sequences=True, go_backwards=True)(inputs) # # model = Merge(mode="concat")([model, model_backwards]) # deprecated # model = Concatenate(axis=2)([model, model_backwards]) # #model = concatenate([model, model_backwards], axis=2) model = TimeDistributed(Dense(n_classes, activation="softmax"))(model) model = Model(input=inputs, output=model) model.compile(optimizer=optimizer, loss=loss_fct, sample_weight_mode="temporal", metrics=['accuracy']) if return_param_str: param_str = "LSTM_N{}".format(n_nodes) if causal: param_str += "_causal" return model, param_str else: return model
def define_model(): wordseq = Input(shape=(max_sent_length, )) charSeq = Input(shape=(max_sent_length, max_wrd_len)) wm = wordmodel(max_sent_length)(wordseq) cm = TimeDistributed(charmodel(max_wrd_len))(charSeq) cm = Reshape((max_sent_length, -1))(cm) combined_input = concatenate([wm, cm]) model = Bidirectional(LSTM(units=100, recurrent_dropout=0.1))(combined_input) out = Dense(10, activation="softmax")(model) # softmax output layer model = Model([wordseq, charSeq], out) #load existing weight if exist if os.path.isfile(outFileName + "-best.hdf5"): model.load_weights(outFileName + "-best.hdf5") model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) #plot_model(model, show_shapes=True, to_file=outFileName+'-plot.png') return model
def draw(self): with open(self.conf.train_dict, "rb") as fp: vocabulary = pickle.load(fp) fp.close() # Model Configuration input_character = Input(shape=(None, ), name="character") feature_character = Embedding(len(vocabulary.keys()) + 1, self.embedding_dim, mask_zero=True)(input_character) feature_character = Dropout(0.1)(feature_character) feature_character = Bidirectional(LSTM(self.bi_rnn_units // 2, return_sequences=True, recurrent_dropout=0.1))(feature_character) input_construction = Input(shape=(None, 4), name="cxn") model = concatenate([feature_character, input_construction]) model = Bidirectional(LSTM(self.bi_rnn_units // 2, return_sequences=True, recurrent_dropout=0.6))(model) output = TimeDistributed(Dense(5, activation="softmax"))(model) model = Model(inputs=[input_character, input_construction], outputs=output) plot_model(model, self.conf.model_image.format("multi_input_and_output_model.png"), show_shapes=True) model.compile("rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]) model.summary() return model
def train(self, epochs, embedding=None): # Embedded Words txt_input = Input(shape=(None, ), name='txt_input') txt_embed = Embedding(input_dim=self.num_words, output_dim=MAX_LEN, input_length=None, name='txt_embedding', trainable=False, weights=([embedding]))(txt_input) txt_drpot = Dropout(0.1, name='txt_dropout')(txt_embed) # Embedded Part of Speech pos_input = Input(shape=(None, ), name='pos_input') pos_embed = Embedding(input_dim=self.num_pos, output_dim=MAX_LEN, input_length=None, name='pos_embedding')(pos_input) pos_drpot = Dropout(0.1, name='pos_dropout')(pos_embed) # Embedded Characters char_in = Input(shape=( None, MAX_LEN_CHAR, ), name="char_input") emb_char = TimeDistributed( Embedding(input_dim=self.num_chars, output_dim=MAX_LEN_CHAR, input_length=None))(char_in) char_enc = TimeDistributed( LSTM(units=20, return_sequences=False, recurrent_dropout=0.5))(emb_char) # Concatenate inputs x = concatenate([txt_drpot, pos_drpot, char_enc], axis=2) x = SpatialDropout1D(0.3)(x) # Deep Layers model = Bidirectional( LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(x) model = Bidirectional( LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model) # Output out = TimeDistributed(Dense(self.num_entities, activation="softmax"))(model) model = Model(inputs=[txt_input, pos_input, char_in], outputs=[out]) model.compile(optimizer="rmsprop", loss='categorical_crossentropy', metrics=['accuracy']) plot_model(model, to_file=self.save_path + 'model_structure.png') print(model.summary()) history = model.fit( [self.X_train, self.train_pos, self.train_characters], np.array(self.Y_train), batch_size=32, epochs=epochs, validation_data=([ self.X_validation, self.valid_pos, self.valid_characters ], np.array(self.Y_validation)), verbose=1) model.save(self.save_path + 'model_ner') test_eval = model.evaluate( [self.X_test, self.test_pos, self.test_characters], np.array(self.Y_test)) print('Test loss:', test_eval[0]) print('Test accuracy:', test_eval[1]) return model, history
crf = CRF(n_tags) # CRF layer # crf = CRF(n_tags,sparse_target=True) # CRF layer out = crf(dense) # output # out = CRF(n_tags)(dense) ### 3. Build Model model = Model(inputs=[word_input,char_input ],outputs=out) batch_size = 32 ephochs = 15 # learning rate decay dataset_size = train_data.shape[0] batches_per_epoch = dataset_size/batch_size lr_decay = (1./(1/32) -1)/batches_per_epoch model.compile( optimizer=Adam(lr=0.012, decay=lr_decay), loss=crf.loss_function, metrics=[crf.accuracy] ) model.summary() from keras.utils.vis_utils import plot_model history = model.fit([X_w_tr,np.array(X_c_tr).reshape((len(X_c_tr), max_len, max_len_char))], np.array(y_tr), batch_size=batch_size, epochs=ephochs, validation_data = ([X_w_v,np.array(X_c_v).reshape((len(X_c_v), max_len, max_len_char))], np.array(y_v)), verbose=1, ) # # history is a dictionary,keys are val_loss,val_acc,loss,acc
X_train, X_test = data.split_train_test(features) y_train, _ = data.split_train_test(y) input = Input(shape=( features.shape[1], features.shape[2], )) model = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(input) model = TimeDistributed(Dense(50, activation="relu"))(model) crf = CRF_2nd(len(data.tag_to_index)) out_layer = crf(model) model = Model(input, out_layer) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() BATCH_SIZE = 64 EPOCHS = 10 history = model.fit(X_train, np.array(y_train), batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.1, verbose=2) def pred2label(pred): out = []
def bilstm(X_train, X_test, Y_train, Y_test, wordembeddings): np.random.seed(1234) tf.random.set_seed(1234) random.seed(1234) max_length_sentence = X_train.str.split().str.len().max() tokenizer = Tokenizer(filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n\'', lower=True) tokenizer.fit_on_texts(X_train) word_index = tokenizer.word_index EMBEDDING_DIM = 300 vocabulary_size = len(word_index) + 1 print('Found %s unique tokens.' % len(word_index)) sequences_train = tokenizer.texts_to_sequences(X_train) sequences_valid = tokenizer.texts_to_sequences(X_test) X_train = pad_sequences(sequences_train, maxlen=max_length_sentence) X_val = pad_sequences(sequences_valid, maxlen=X_train.shape[1]) y_train = np.asarray(Y_train) y_val = np.asarray(Y_test) #print(word_index) ''' print('Shape of data tensor:', X_train.shape) print('Shape of data tensor:', X_val.shape) print('Shape of data tensor:', y_train.shape) print('Shape of data tensor:', y_val.shape) print(X_train) print("*"*100) print(X_val) print("*"*100) print(y_train) print("*"*100) print(y_val) ''' embedding_matrix = np.zeros((vocabulary_size, EMBEDDING_DIM)) for word, i in word_index.items(): if (word in wordembeddings.keys()): embedding_vector = wordembeddings[word] if len(embedding_vector) == 0: #if array is empty embedding_vector = wordembeddings[word.title()] if len(embedding_vector) == 0: embedding_vector = wordembeddings[word.upper()] if len(embedding_vector) == 0: embedding_vector = np.array([ round(np.random.rand(), 8) for i in range(0, 300) ]) else: #print("WORD NOT IN DICT",word) embedding_vector = np.array( [round(np.random.rand(), 8) for i in range(0, 300)]) if len(embedding_vector) != 0: embedding_matrix[i] = embedding_vector embedding_layer = Embedding(vocabulary_size, EMBEDDING_DIM, weights=[embedding_matrix], trainable=False) #Try with True inputs = Input(shape=(X_train.shape[1], )) model = (Embedding(vocabulary_size, EMBEDDING_DIM, input_length=max_length_sentence, weights=[embedding_matrix]))(inputs) model = Bidirectional(GRU(64))( model) # !!!!!!! CHANGE THIS FOR OTHER MODELS model = (Dense(900, activation='relu'))(model) model = (Dense(400, activation='relu'))(model) model = (Dense(250, activation='relu'))(model) model = (Dense(204, activation='softmax'))(model) model = Model(inputs=inputs, outputs=model) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() callbacks = [EarlyStopping(monitor='val_loss')] hist_adam = model.fit( X_train, y_train, batch_size=1000, epochs=200, verbose=1, validation_data=(X_val, y_val), callbacks=callbacks ) #!!!!!!!!!!!!!!!!!!!!!!!CHANGE BATCH SIZE TO 1000 #change epochs to 200 model.save(config.bigru_prepocessed_dataset1_chai ) # !!!!!!! CHANGE THIS FOR OTHER MODELS y_pred = model.predict(X_val) print(y_pred) y_val_class = pd.DataFrame(y_val).idxmax(axis=1) print(y_val_class) y_val_class_argmax = np.argmax(y_val, axis=1) y_pred_class_argmax = np.argmax(y_pred, axis=1) y_pred_class = pd.DataFrame(y_pred).idxmax(axis=1) print(y_pred_class) print(classification_report(y_val_class, y_pred_class)) plt.suptitle('Optimizer : Adam', fontsize=10) plt.ylabel('Loss', fontsize=16) plt.xlabel('Epoch', fontsize=14) plt.plot(hist_adam.history['loss'], color='b', label='Training Loss') plt.plot(hist_adam.history['val_loss'], color='r', label='Validation Loss') plt.legend(loc='upper right') plt.savefig( '/home/ubuntu/asset_classification/results/bigru_model_dataset1_preprocessed_chai.png' ) # !!!!!!! CHANGE THIS FOR OTHER MODELS tf.keras.utils.plot_model( model, to_file=config.bigru_architecture, show_shapes=True) # !!!!!!! CHANGE THIS FOR OTHER MODELS return (y_pred, y_val_class, y_pred_class, y_val_class_argmax, y_pred_class_argmax)
# input_shape=(batch_size, None, n_feat), # dropout=0.5, # name='bilstm', # recurrent_dropout=0.25)(inputs) # attention layer # model = attention_3d_block(model) # Output FC layer model = TimeDistributed(Dense(nb_classes, activation="softmax"))(model) model = Model(inputs=inputs, outputs=model) # model = multi_gpu_model(model, gpus=2) model.compile(loss='categorical_crossentropy', optimizer='adam', sample_weight_mode="temporal", metrics=['accuracy']) model.summary() # train on videos with sample weighting # model.fit(x=X_train_m, # y=Y_train_, # validation_data=(X_vali_m, Y_vali_, M_vali[:, :, 0]), # epochs=nb_epoch, # batch_size=batch_size, # verbose=1, # # sample_weight=M_train[:, :, 0], # sample_weight=sample_weights, # callbacks=[lr_reducer, early_stopper, tensor_board, checkpointer])
def main(self, glove): # get word embeddings utils = wordUtils.Utils() if glove: # use glove self.words_list, self.embedding_matrix = utils.load_glove() unword_n = len(self.words_list) else: self.words_list, self.embedding_matrix = utils.load_word2vec() unword_n = len(self.words_list) # get the training corpus cr = corpusreader.CorpusReader(self.textfile, self.annotfile) corpus = cr.trainseqs print(len(corpus)) train = [] print("Processing training data", datetime.now()) for doc in corpus: tmp_dic = {} tmp_dic['tokens'] = doc['tokens'] # convert SOBIE tags to numbers tags = doc['bio'] tags = [self.lablist[i] for i in tags] tmp_dic['bion'] = tags train.append(tmp_dic) n_emb = 0 n_unk = 0 # get the number of the embedding for idx in range(len(train)): words = train[idx]['tokens'] words_id = [] for i in words: # get the number of the embedding try: # the index of the word in the embedding matrix index = self.words_list.index(i) n_emb = n_emb + 1 except ValueError: # use the embedding full of zeros to identify an unknown word n_unk = n_unk + 1 index = unword_n # the index of the word in the embedding matrix words_id.append(index) train[idx]['tokens'] = words_id # get all sizes from the sequences with training data train_l_d = {} train_l_labels = {} for seq in train: # corpus l = len(seq['tokens']) if l not in train_l_d: train_l_d[l] = [] train_l_d[l].append(seq['tokens']) # labels l1 = len(seq['bion']) if l1 not in train_l_labels: train_l_labels[l1] = [] train_l_labels[l1].append(seq['bion']) sizes = list(train_l_d.keys()) for i in sizes: if len(train_l_d[i]) != len(train_l_labels[i]): print("merda") for m in range(len(train_l_d[i])): if len(train_l_d[i][m]) != len(train_l_labels[i][m]): print("XXX") input = Input(shape=(None,)) el = Embedding(len(self.words_list) + 1, 200, weights=[self.embedding_matrix], trainable=False)(input) model = Bidirectional(LSTM(units=50, return_sequences=True, recurrent_dropout = 0.1))(el) # variational biLSTM model = TimeDistributed(Dense(50, activation="relu"))(model) # a dense layer as suggested by neuralNer crf = CRF(self.lab_len) # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() f_best = -1 f_index = -1 # OK, start actually training for epoch in range(self.epochsN): print("Epoch", epoch, "start at", datetime.now()) # Train in batches of different sizes - randomize the order of sizes # Except for the first few epochs if epoch > 2: random.shuffle(sizes) for size in sizes: batch = train_l_d[size] labs = train_l_labels[size] tx = np.array([seq for seq in batch]) y = [seq for seq in labs] ty = [to_categorical(i, num_classes=self.lab_len) for i in y] # This trains in mini-batches model.fit(tx, np.array(ty), verbose=0, epochs=1) print("Trained at", datetime.now()) # save all epochs save_load_utils.save_all_weights(model, 'words-results/epoch_%s.h5' % epoch) # test the results test_data = 'corpus_char/tmVarCorpus/treated/test_data.txt' test_labels = 'corpus_char/tmVarCorpus/treated/test_labels.tsv' self.test_model(test_data, test_labels, model, glove) f = self.eval() if f > f_best: f_best = f f_index = epoch # Pick the best model, and save it with a useful name print("Choosing the best epoch") shutil.copyfile("words-results/epoch_%s.h5" % f_index, "words_glove_%s.h5" % f_index)
# pu.db # Y = list(df("X")) input = Input(shape=(110, 100)) model = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(input) # variational biLSTM model = TimeDistributed(Dense(50, activation="relu"))( model) # a dense layer as suggested by neuralNer crf = CRF(1) # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer="rmsprop", loss=crf.loss_function) model.summary() # pu.db Y = keras.utils.to_categorical(Y, num_classes=110) Y = Y.reshape((Y.shape[0], Y.shape[1], 1)) all_train = all[:int(0.8 * all.shape[0]), ...] Y_train = Y[:int(0.8 * all.shape[0]), ...] Y_train_dense = np.reshape(Y_train, (Y_train.shape[0], Y_train.shape[1])) Y_train_dense = np.argmax(Y_train_dense, axis=-1) all_test = all[int(0.8 * all.shape[0]):, ...] Y_test = Y[int(0.8 * all.shape[0]):, ...] # pu.db Y_test_dense = np.reshape(Y_test, (Y_test.shape[0], Y_test.shape[1]))