def TrainLstmCrf(data_name, model_name): n_classes = 4 max_len = 75 batch_size = 128 epoch = 100 tags = ['S', 'B', 'I', 'E'] sentences, words = get_sents(datasets=data_name) print(len(sentences), len(words)) word2idx = {w: i + 1 for i, w in enumerate(words)} tag2idx = {t: i for i, t in enumerate(tags)} vocab_size = len(words) X = [[word2idx[w[0]] for w in s] for s in sentences] X = pad_sequences(maxlen=max_len, sequences=X, padding="post", value=vocab_size - 1) y = [[tag2idx[w[1]] for w in s] for s in sentences] y = pad_sequences(maxlen=max_len, sequences=y, padding="post", value=tag2idx["E"]) y = [to_categorical(i, num_classes=n_classes) for i in y] # 获得数据 X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.1) print(len(X_tr), len(y_tr), len(X_te), len(y_te)) s = np.asarray([max_len] * batch_size, dtype='int32') # 建立模型 word_ids = Input(batch_shape=(batch_size, max_len), dtype='int32') sequence_lengths = Input(batch_shape=[batch_size, 1], dtype='int32') print(sequence_lengths) word_embeddings = Embedding(vocab_size, n_classes)(word_ids) blstm = Bidirectional(LSTM(units=50, return_sequences=True))(word_embeddings) model = TimeDistributed(Dense(4, activation='tanh'))(blstm) crf = CrfModel() pred = crf(inputs=[model, sequence_lengths]) model = Model(inputs=[word_ids, sequence_lengths], outputs=[pred]) print("word_ids:{}".format(word_ids)) print("sequence_lengths:{}".format(sequence_lengths)) model.compile(optimizer="rmsprop", loss=crf.loss, metrics=['accuracy']) print(model.summary()) k = 0 for batch_x, batch_y in minibatches(X_tr, y_tr, batch_size=batch_size): model.fit([batch_x, s], np.array(batch_y), epochs=epoch, batch_size=batch_size) k += 1 if k % 50 == 0: model.save("./models/{}_{}".format(k, model_name)) print("saved") # 保存模型 model.save(model_name)
pos_emb = Embedding(input_dim=len(pos), output_dim=10, input_length=max_len)(pos_input) modified_input = keras.layers.concatenate([word_emb, pos_emb]) model_1 = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(modified_input) model = TimeDistributed(Dense(50, activation="relu"))( model_1) # a dense layer as suggested by neuralNer crf = CRF(n_tags) # CRF layer out = crf(model) # output model = Model([input, pos_input], out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) print(model.summary()) history = model.fit([X_tr, X_pos_tr], np.array(y_tr), batch_size=32, epochs=60, validation_split=0.1, verbose=1) #Testing test_pred = model.predict([X_te, X_pos_te], verbose=1) idx2tag = {i: w for w, i in tag2idx.items()} pred_labels = pred2label(test_pred) test_labels = pred2label(y_te) print("Recall, Precision and F-score are", get_recall_precision(test_labels, pred_labels, "Destination")) model.save("BILSTM+CRF_with_pos_without_embeddings.model")
model.compile(optimizer=rmsprop, loss=crf.loss_function, metrics=[crf.accuracy]) #use crf model.summary() # #early stop # filepath="ner_ch_em_v4.hdf5" # checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=2, save_best_only=True, mode='min',save_weights_only=True) #val_acc # early_stop = EarlyStopping(monitor='val_loss', patience=5, mode='min') # callbacks_list = [checkpoint, early_stop] history = model.fit([ X_word_tr, np.array(X_char_tr).reshape((len(X_char_tr), MAX_LEN, max_len_char)) ], np.array(y_tr).reshape(len(y_tr), MAX_LEN, 5), batch_size=BATCH_SIZE, epochs=3, validation_split=0.1, verbose=2, shuffle=True) #,callbacks=callbacks_list # #save json # model_json = model.to_json() # with open("ner_ch_em_v4_json.json", "w") as json_file: # json_file.write(model_json) # # serialize weights to HDF5 # model.save('ner_ch_em_v4_json.h5') # print("Saved model to disk") #save_load_utils.save_all_weights(model,'ner_ch_em_v4.h5')
def create_model(X_train, y_train, X_test, y_test, look_back, num_features): # CNN Model cnn = Sequential() ks1_first = 10 ks1_second = 2 ks2_first = 2 ks2_second = 10 cnn.add( Conv2D(filters=(2), kernel_size=(ks1_first, ks1_second), padding='same', kernel_initializer='TruncatedNormal')) cnn.add(BatchNormalization()) cnn.add(LeakyReLU()) cnn.add(Dropout(0.240)) for _ in range(1): cnn.add( Conv2D(filters=(8), kernel_size=(ks2_first, ks2_second), padding='same', kernel_initializer='TruncatedNormal')) cnn.add(BatchNormalization()) cnn.add(LeakyReLU()) cnn.add(Dropout(0.434)) cnn.add(Flatten()) # RNN Model rnn = Sequential() rnn.add( CuDNNLSTM(3, return_sequences=True, kernel_initializer='TruncatedNormal')) rnn.add(BatchNormalization()) rnn.add(LeakyReLU()) rnn.add(Dropout(0.622)) for _ in range(0): rnn.add( CuDNNLSTM(32, kernel_initializer='TruncatedNormal', return_sequences=True)) rnn.add(BatchNormalization()) rnn.add(LeakyReLU()) rnn.add(Dropout(0.612)) rnn.add( CuDNNLSTM(4, kernel_initializer='TruncatedNormal', return_sequences=False)) rnn.add(BatchNormalization()) rnn.add(LeakyReLU()) rnn.add(Dropout(0.281)) # DNN Model dnn = Sequential() for _ in range(4): dnn.add(Dense(128, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout(0.006)) for _ in range(4): dnn.add(Dense(16, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout(0.08)) for _ in range(4): dnn.add(Dense(256, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout(0.171)) dnn.add(Dense(512, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout(0.257)) dnn.add(Dense(1)) # Putting it all together main_input = Input(shape=( X_train.shape[1], X_train.shape[2])) # Data has been reshaped to (800, 5, 120, 60, 1) reshaped_to_smaller_images = Reshape(target_shape=(24, 5, X_train.shape[2], 1))(main_input) model = TimeDistributed(cnn)( reshaped_to_smaller_images) # this should make the cnn 'run' 5 times? model = rnn(model) # combine timedistributed cnn with rnn model = dnn(model) # add dense # create the model, specify in and output model = Model(inputs=main_input, outputs=model) model.compile(loss='mse', metrics=['mape'], optimizer='adam') early_stopping_monitor = EarlyStopping( patience=50000 ) # Not using earlystopping monitor for now, that's why patience is high bs = 256 epoch_size = 14 schedule = SGDRScheduler( min_lr=4.6e-6, #1e-5 max_lr=4.8e-2, # 1e-2 steps_per_epoch=np.ceil(epoch_size / bs), lr_decay=0.9, cycle_length=5, # 5 mult_factor=1.5) checkpoint1 = ModelCheckpoint("models\\timedist.val_loss.hdf5", monitor='val_loss', verbose=1, save_best_only=True, mode='min') checkpoint2 = ModelCheckpoint("models\\timedist.val_mape.hdf5", monitor='val_mape', verbose=1, save_best_only=True, mode='min') checkpoint4 = ModelCheckpoint("models\\timedist.train_loss.hdf5", monitor='loss', verbose=1, save_best_only=True, mode='min') checkpoint5 = ModelCheckpoint("models\\timedist.train_mape.hdf5", monitor='mape', verbose=1, save_best_only=True, mode='min') result = model.fit( X_train, y_train, batch_size=bs, epochs=4000, # should take 24h ish verbose=1, validation_split=0.2, callbacks=[schedule, checkpoint1, checkpoint2]) pd.DataFrame(result.history).to_csv('models\\timedist_fit_history.csv') #get the highest validation accuracy of the training epochs validation_loss = np.amin(result.history['val_loss']) print('validation loss of epoch:', validation_loss) return model
flag_use_iterator_or_function_to_generate_batches = 2 numer_of_epochs = 10 if flag_method_of_fitting == 1: #(1). Fit using large data already in the form of numpy arrays #Generate batches: if flag_use_iterator_or_function_to_generate_batches == 1: [X, y] = next(training_generator) elif flag_use_iterator_or_function_to_generate_batches == 2: [X, y] = training_generator_object.generate_function(number_of_batches) #Fit: history = conv_model_time_distributed.fit( x=X, y=y, batch_size=batch_size, epochs=1, callbacks=callbacks_list, validation_data=[validation_X, validation_y]) #Plot what's going on: figure(1) subplot(2, 1, 1) plot(history['loss']) ylabel('loss') xlabel('epochs') subplot(2, 1, 2) plot(sqrt(history.history('mse'))) ylabel('std') xlabel('epochs')
def create_model(X_train, y_train, X_test, y_test): # CNN Model cnn = Sequential() ks1_first = {{choice([2, 3, 4, 5, 8, 10])}} ks1_second = {{choice([2, 3, 4, 5, 8, 10])}} ks2_first = {{choice([2, 3, 4, 5, 8, 10])}} ks2_second = {{choice([2, 3, 4, 5, 8, 10])}} cnn.add( Conv2D(filters=({{choice([1, 2, 3, 4, 5, 8])}}), kernel_size=(ks1_first, ks1_second), padding='same', kernel_initializer='TruncatedNormal')) cnn.add(BatchNormalization()) cnn.add(LeakyReLU()) cnn.add(Dropout({{uniform(0, 1)}})) for _ in range({{choice([0, 1, 2, 3])}}): cnn.add( Conv2D(filters=({{choice([4, 8])}}), kernel_size=(ks2_first, ks2_second), padding='same', kernel_initializer='TruncatedNormal')) cnn.add(BatchNormalization()) cnn.add(LeakyReLU()) cnn.add(Dropout({{uniform(0, 1)}})) cnn.add(Flatten()) # RNN Model rnn = Sequential() rnn.add( CuDNNLSTM({{choice([1, 2, 3, 4, 5, 6, 7, 8])}}, return_sequences=True, kernel_initializer='TruncatedNormal')) rnn.add(BatchNormalization()) rnn.add(LeakyReLU()) rnn.add(Dropout({{uniform(0, 1)}})) for _ in range({{choice([0, 1, 2, 3, 4, 8])}}): rnn.add( CuDNNLSTM({{choice([4, 8])}}, kernel_initializer='TruncatedNormal', return_sequences=True)) rnn.add(BatchNormalization()) rnn.add(LeakyReLU()) rnn.add(Dropout({{uniform(0, 1)}})) rnn.add( CuDNNLSTM({{choice([1, 2, 3, 4, 5, 6, 7, 8])}}, kernel_initializer='TruncatedNormal', return_sequences=False)) rnn.add(BatchNormalization()) rnn.add(LeakyReLU()) rnn.add(Dropout({{uniform(0, 1)}})) # DNN Model dnn = Sequential() for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}): dnn.add( Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}}, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout({{uniform(0, 1)}})) for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}): dnn.add( Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}}, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout({{uniform(0, 1)}})) for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}): dnn.add( Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}}, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout({{uniform(0, 1)}})) for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}): dnn.add( Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}}, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout({{uniform(0, 1)}})) dnn.add( Dense({{choice([8, 16, 32, 64, 128, 256, 512, 1024])}}, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout({{uniform(0, 1)}})) dnn.add(Dense(1)) # Putting it all together main_input = Input(shape=( X_train.shape[1], X_train.shape[2])) # Data has been reshaped to (800, 5, 120, 60, 1) reshaped_to_smaller_images = Reshape(target_shape=(24, 5, X_train.shape[2], 1))(main_input) model = TimeDistributed(cnn)( reshaped_to_smaller_images) # this should make the cnn 'run' 5 times? model = rnn(model) # combine timedistributed cnn with rnn model = dnn(model) # add dense # create the model, specify in and output model = Model(inputs=main_input, outputs=model) model.compile(loss='mse', metrics=['mape'], optimizer='nadam') early_stopping_monitor = EarlyStopping( patience=15 ) # Not using earlystopping monitor for now, that's why patience is high bs = {{choice([32, 64, 128, 256])}} epoch_size = 1 if bs == 32: epoch_size = 109 elif bs == 64: epoch_size = 56 elif bs == 128: epoch_size = 28 elif bs == 256: epoch_size = 14 #bs = 256 #epoch_size = 14 schedule = SGDRScheduler( min_lr={{uniform(1e-8, 1e-5)}}, #1e-5 max_lr={{uniform(1e-3, 1e-1)}}, # 1e-2 steps_per_epoch=np.ceil(epoch_size / bs), lr_decay=0.9, cycle_length=5, # 5 mult_factor=1.5) result = model.fit(X_train, y_train, batch_size=bs, epochs=100, verbose=1, validation_split=0.2, callbacks=[early_stopping_monitor, schedule]) #get the highest validation accuracy of the training epochs hist = pd.DataFrame(result.history['val_loss']).fillna( 9999) #replace nan with 9999 val_loss = np.amin(hist.values) print('Best validation loss of epoch:', val_loss) K.clear_session() # Clear the tensorflow session (Free up RAM) return { 'loss': val_loss, 'status': STATUS_OK } # Not returning model to save RAM
def model_with_padding(self, DICT, n_char): # get sequences and labels separated. # convert BIO tags to numbers sequences, labels = self.get_seq(DICT) # sequences = sequences[:100] # labels = labels[:100] # X = pad_sequences(sequences, maxlen=self.w_arit_mean, padding='post', truncating='post') # y_pad = pad_sequences(labels, maxlen=self.w_arit_mean, padding='post', truncating='post') X = pad_sequences(sequences, maxlen=self.maxSeqLength, padding='post') y_pad = pad_sequences(labels, maxlen=self.maxSeqLength, padding='post') y = [to_categorical(i, num_classes=self.lab_len) for i in y_pad] # early stopping and best epoch #early_stop = keras.callbacks.EarlyStopping(monitor='loss', patience=2, verbose=0, mode='auto') #filepath = "max-seq.h5" #checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='max') #callbacks_list = [checkpoint, early_stop] # Set up the keras model input = Input(shape=(self.maxSeqLength, )) el = Embedding(n_char + 1, 200, name="embed")(input) bl1 = Bidirectional(LSTM(128, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm1")(el) bl2 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm2")(bl1) bl3 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm3")(bl2) model = TimeDistributed(Dense(self.lab_len, activation="relu"))(bl3) crf = CRF(self.lab_len) # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() #treinar com 32, 147, 245, 735 history = model.fit(X, np.array(y), batch_size=32, epochs=self.epochsN, validation_split=0.0, verbose=1) # save all epochs save_load_utils.save_all_weights(model, 'max_seq_%s_32b.h5' % self.epochsN)
def model_no_padding(self, DICT, n_char): # convert BIO tags to numbers self.convert_tags() ''' check if bion contains 'B' and 'I' for i in self.train_data: print(i['bion']) ''' for i in range(len(self.train_data)): corp = self.train_data[i]['corpus'] corp_num = [] for c in corp: corp_num.append(DICT.get(c)) self.train_data[i]['corpus'] = corp_num # get all sizes from the sequences with training data train_l_d = {} train_l_labels = {} for seq in self.train_data: # corpus l = len(seq['corpus']) if l not in train_l_d: train_l_d[l] = [] train_l_d[l].append(seq['corpus']) # labels l1 = len(seq['bion']) if l1 not in train_l_labels: train_l_labels[l1] = [] train_l_labels[l1].append(seq['bion']) ''' for i in range(len(train_l_d[110])): print(len(train_l_d[110][i]) == len(train_l_labels[110][i])) print() print("\n\n") for i in range(len(train_l_d[31])): print(len(train_l_d[31][i]) == len(train_l_labels[31][i])) print("\n\n") for i in range(len(train_l_d[103])): print(len(train_l_d[103][i]) == len(train_l_labels[103][i])) print("\n\n") exit() ''' sizes = list(train_l_d.keys()) # Set up the keras model il = Input(shape=(None, ), dtype='int32') el = Embedding(n_char + 1, 200, name="embed")(il) bl1 = Bidirectional(LSTM(128, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm1")(el) bl2 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm2")(bl1) bl3 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm3")(bl2) model = TimeDistributed(Dense(self.num_labs, activation="relu"))(bl3) crf = CRF(self.num_labs) # CRF layer out = crf(model) # output model = Model(il, out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() f_best = -1 f_index = -1 # OK, start actually training for epoch in range(self.epochsN): print("Epoch", epoch, "start at", datetime.now()) # Train in batches of different sizes - randomize the order of sizes # Except for the first few epochs if epoch > 2: random.shuffle(sizes) for size in sizes: batch = train_l_d[size] labs = train_l_labels[size] tx = np.array([seq for seq in batch]) y = [seq for seq in labs] ty = [to_categorical(i, num_classes=self.num_labs) for i in y] # This trains in mini-batches model.fit(tx, np.array(ty), verbose=0, epochs=1) print("Trained at", datetime.now()) # save all epochs save_load_utils.save_all_weights( model, 'mini-batch-results/epoch_%s.h5' % epoch) # test the results self.test_minibatch(DICT, model) f = self.eval() if f > f_best: f_best = f f_index = epoch # Pick the best model, and save it with a useful name print("Choosing the best epoch") shutil.copyfile("mini-batch-results/epoch_%s.h5" % f_index, "minibatch_%s.h5" % f_index)