def dataset_build(self, opt): fields = onmt.IO.get_fields("text", 0, 0) train = preprocess.build_dataset('train', fields, opt) onmt.IO.build_vocab(train, opt.data_type, opt.share_vocab, opt.src_vocab_size, opt.src_words_min_frequency, opt.tgt_vocab_size, opt.tgt_words_min_frequency) preprocess.build_dataset('valid', fields, opt)
def train_it(train_path, checkpoint_filepath, model_path, start, span): dataset = build_dataset(train_path) train_x, train_y = [], [] valid_x, valid_y = [], [] rng = np.random.RandomState(0) k = 0 for x, y in dataset.as_numpy_iterator(): x = [str(i, 'utf-8') for i in x] y = [str(i, 'utf-8') for i in y] rnum = rng.rand() k += 1 if rnum < start or rnum >= start + span: train_x += [x] train_y += [y] else: valid_x += [x] valid_y += [y] # dataset = dataset.batch(32) print('====' * 8) print('total = ', k) print('start , span = ', (start, span)) print('len train = ', len(train_x)) # checkpoint_filepath = './checkpoint' if not os.path.exists(os.path.dirname(checkpoint_filepath)): os.mkdir(os.path.dirname(checkpoint_filepath)) # model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( # filepath=checkpoint_filepath, # save_weights_only=True, # monitor='val_accuracy', # mode='max', # save_best_only=True) model = BiLSTM_CRF_Model(bert_embed, sequence_length=100) evaluator = Evaluator(model, checkpoint_filepath, valid_x, valid_y) model.fit(train_x, train_y, valid_x, valid_y, batch_size=64, epochs=20, callbacks=[evaluator]) model.save(model_path)
def predict_it(test_path, model_path, output_path): bert_embed = BertEmbedding(bert_path) dataset = build_dataset(test_path) test_x, test_y = [], [] for x, y in dataset.as_numpy_iterator(): x = [str(i, 'utf-8') for i in x] y = [str(i, 'utf-8') for i in y] test_x += [x] test_y += [y] # 加载保存模型 loaded_model = kashgari.utils.load_model('saved_ner_model') # loaded_model = tf.keras.models.load_model(model_path) loaded_model.tf_model.load_weights(model_path) # 使用模型进行预测 test_y = loaded_model.predict(test_x) with open(output_path, 'w') as f: for y in test_y: f.write('\t'.join(y) + '\n') print('predict_it done {} {} {}'.format(test_path, model_path, output_path))
def train(): # to change learnig rate every 100 interations # def Scheduler(epoch): # lr = K.eval(model.optimizer.lr) # if epoch == 10: # new_lr = lr * 0.1 # # elif epoch == 12: # # new_lr = 0.0001 # # elif epoch == 25: # # new_lr = 0.002 # elif epoch != 0 and epoch % 30 == 0: # new_lr = lr * 0.1 # else: # new_lr = lr # model.optimizer.lr.assign(new_lr) # return new_lr X, Y = preprocess.build_dataset() print('X shape:', X.shape) print('Y shape:', Y.shape) temp_model_file = os.path.join(config.PATH, "temp_model.h5") if os.path.exists(temp_model_file): model = load_model(temp_model_file) print("LSTM Network loaded") else: model = create_model() print("LSTM Network created") # model summary print("Model Summary:") print(model.summary()) # define the checkpoint and learning rate change filepath = os.path.join( config.CHKPT_PATH, "weights-improvement-{epoch:03d}-{val_acc:.4f}.hdf5") checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.1, patience=10, min_lr=0.000001) logfilepath = os.path.join(config.PATH, "logs.csv") logger = CSVLogger(logfilepath) stopper = EarlyStopping(monitor='val_acc', min_delta=0.00001, patience=15, verbose=1, mode='auto') # lr_change = LearningRateScheduler(Scheduler) callbacks_list = [ checkpoint, reduce_lr, logger, LearningRatePrinter(), stopper ] # fit the model history = model.fit(X, Y, batch_size=config.BATCH_SIZE, validation_split=0.2, verbose=2, epochs=config.NUM_EPOCHS, callbacks=callbacks_list) print("LSTM Network trained") # save model model.save(config.MODEL_FILE) print("LSTM Network saved") # serialize model to JSON model_json = model.to_json() json_filename = os.path.join(config.PATH, "model_json.json") with open(json_filename, "w") as json_file: json_file.write(model_json) # delete the existing model del model # save history pkl_filename = os.path.join(config.PATH, "history.pkl") pickle.dump(history.history, open(pkl_filename, "wb")) # statistics of training the model # summarize history for accuracy plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('LSTM accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt_filename = os.path.join(config.PATH, 'LSTM accuracy.png') plt.savefig(plt_filename) plt.show() # summarize history for loss plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('LSTM loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt_filename = os.path.join(config.PATH, 'LSTM loss.png') plt.savefig(plt_filename) plt.show()
# 'soc.religion.christian', # 'talk.politics.guns', # 'talk.politics.mideast', # 'talk.religion.misc'] categories = [ 'comp.os.ms-windows.misc', 'rec.motorcycles', 'sci.space', 'sci.crypt', 'sci.electronics', 'soc.religion.christian', 'talk.politics.guns', 'talk.politics.mideast', ] vocab, train_set, test_set = build_dataset(train_size=4000, test_rate=0.1, categories=categories) vocab_size = len(vocab) input_dim = 256 hidden_dim = 128 output_dim = len(categories) in_channels = 1 out_channels = 256 kernel_sizes = [3, 4, 5] keep_proba = 0.5 print('vocab size: ', vocab_size, ' output_dim: ', output_dim) print('train size: ', len(train_set), ' test size: ', len(test_set)) model = RNN(vocab_size, input_dim, hidden_dim, output_dim) # model = CNN(vocab_size, input_dim, output_dim, in_channels, out_channels, kernel_sizes, keep_proba)
@author: cbasu """ from preprocess import build_dataset, get_corpus, make_mappings import numpy as np import argparse import os parser = argparse.ArgumentParser(description='Trains a LSTM on Text provided.') parser.add_argument('file_path', metavar='F', type=str, help='A path to a text file.') args = parser.parse_args() corpus = get_corpus(args.file_path) inputs, outputs = build_dataset(corpus) word_to_id, id_to_word = make_mappings(corpus) from lstm import LSTM clf = LSTM(inputs.shape[1], outputs.shape[1], 250, 128) for i in range(10000): clf.fit(inputs, outputs, learning_rate=.001, epochs=1) generated = [] index = np.random.randint(len(inputs)) init = inputs[index] hprev, cprev = clf.hidden_states[-1], clf.internal_memory[-1] generated.append(id_to_word[np.argmax(init)])