def main(_): FLAGS.start_string = FLAGS.start_string converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path =\ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = converter.text_to_arr(FLAGS.start_string) arr = model.sample(FLAGS.max_length, start, converter.vocab_size) print(converter.arr_to_text(arr))
def main(_): FLAGS.start_string = FLAGS.start_string convert = TextConvert(fname=FLAGS.convert_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path = tf.train.latest_checkpoint( FLAGS.checkpoint_path) model = CharRNN(convert.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = convert.text2arr(FLAGS.start_string) arr = model.sample(FLAGS.max_length, start, convert.vocab_size) res = convert.arr2text(arr) print('get result: \n', res)
def generate(): tf.compat.v1.disable_eager_execution() converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path =\ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = converter.text_to_arr(FLAGS.start_string) arr = model.sample(FLAGS.max_length, start, converter.vocab_size) return converter.arr_to_text(arr)
def main(_): FLAGS.start_string = FLAGS.start_string.decode('utf-8') converter = TextConverter(filename=FLAGS.converter_path) #创建文本转化器 if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path = tf.train.latest_checkpoint( FLAGS.checkpoint_path) #下载最新模型 model = CharRNN(converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) #加载模型 start = converter.text_to_arr(FLAGS.start_string) #将input text转为id arr = model.sample(FLAGS.max_length, start, converter.vocab_size) #输出为生成的序列 print(converter.arr_to_text(arr))
def run(): model_path = os.path.join(args.model_path, args.model_name) if os.path.exists(model_path) is False: os.makedirs(model_path) dict_path = os.path.join(model_path, 'dictionary.txt') word2id_path = os.path.join(model_path, 'word2id.pkl') feature_path = os.path.join(model_path, 'feature.npy') label_path = os.path.join(model_path, 'label.npy') print("data_path " + args.data_path) print("model_path " + model_path) print("dict_path " + dict_path) print("word2id_path " + word2id_path) print("feature_path " + feature_path) print("label_path " + label_path) data_maker = MakeData(raw_path=args.data_path, dict_path=dict_path, word2id_path=word2id_path, feature_path=feature_path, label_path=label_path, low_frequency=0) data_maker.pretreatment_data() data_maker.delete_low_frequency() file = open(args.data_path, 'r', encoding='utf-8') content = file.read() file.close() text_array = data_maker.text2array(content) data_maker.save_dictionary() data_maker.make_data(text_array, args.batch_size, args.steps) model = CharRNN(vocab_size=data_maker.vocab_size, feature_path=feature_path, label_path=label_path, lstm_size=args.lstm_size, dropout_rate=args.dropout_rate, embedding_size=args.embedding_size) model.train(data_stream=data_stream(feature_path, label_path, data_maker.vocab_size), epochs=args.epochs, model_path=os.path.join(model_path, 'checkpoint'))
def main(): logging.root.setLevel(logging.NOTSET) inputs, token_to_idx, idx_to_token = load_dataset(file_name=sys.argv[2]) #coloredlogs.install(level='DEBUG') num_layers = 2 rnn_type = 'lstm' dropout = 0.5 emb_size = 50 hidden_size = 256 learning_rate = 0.001 n_tokens = len(idx_to_token) model = CharRNN(num_layers=num_layers, rnn_type=rnn_type, dropout=dropout, n_tokens=n_tokens, emb_size=emb_size, hidden_size=hidden_size, pad_id=token_to_idx[PAD_TOKEN]) if torch.cuda.is_available(): model = model.cuda() optimiser = optim.Adam(model.parameters(), lr=learning_rate) s1 = "bababac bababa bacc bac bacc" s2 = "bababac baba bac bacc bac" s3 = "baba" s4 = "ccab cab ccab ababab cababab" try: model, optimiser, epoch, valid_loss_min = load_ckp( checkpoint_fpath=sys.argv[1], model=model, optimiser=optimiser) score(model, token_to_idx, idx_to_token, seed_phrase=s1) score(model, token_to_idx, idx_to_token, seed_phrase=s2) score(model, token_to_idx, idx_to_token, seed_phrase=s3) score(model, token_to_idx, idx_to_token, seed_phrase=s4) except KeyboardInterrupt: print('Aborted!')
def main(_): FLAGS.start_string = FLAGS.start_string converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path = \ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(converter.vocab_size, sampling=True, state_size=FLAGS.state_size, n_layers=FLAGS.n_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = converter.text_to_data(FLAGS.start_string) data = model.sample(FLAGS.max_length, start, converter.vocab_size) # for c in converter.data_to_text(data): # for d in c: # # print(d,end="") # time.sleep(0.5) print(converter.data_to_text(data))
def main(_): converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path = \ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = converter.text_to_arr(FLAGS.start_string) # JS/Html/CSS for i in range(0, 1): print('Generating: ' + str(i)) file_path = '../../BrowserFuzzingData/generated/' + FLAGS.file_type + '/' + str( i) + '.' + FLAGS.file_type # f = open(file_path, "x") arr = model.sample(FLAGS.max_length, start, converter.vocab_size) content = converter.arr_to_text(arr) content = content.replace("\\t", "\t") content = content.replace("\\r", "\r") content = content.replace("\\n", "\n") if FLAGS.file_type.__eq__('js'): print(content) # f.write(content) # f.close() elif FLAGS.file_type.__eq__('html'): content = post_process(content) f.write(content) f.close() # TODO: 预留给CSS,暂不作任何处理 else: pass
def main(_): model_path = os.path.join('model', FLAGS.name) if not os.path.exists(model_path): os.makedirs(model_path) with open(FLAGS.input_file, encoding='utf-8') as f: text = f.read() converter = TextConverter(text, FLAGS.max_vocab) converter.save_to_file(os.path.join(model_path, 'converter.pkl')) arr = converter.text_to_arr(text) g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps) model = CharRNN(converter.vocab_size, num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.train(g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n)
def main(_): if os.path.exists(FLAGS.model_name) is False: os.mkdir(FLAGS.model_name) converter = TextTransform(FLAGS.input_file, FLAGS.max_vocab) converter.save_to_file(os.path.join(FLAGS.model_name, 'converter.pkl')) arr = converter.text_to_arr(converter.text) g = batch_generate(arr, FLAGS.num_seqs, FLAGS.num_steps, FLAGS.epoch_size) model = CharRNN(converter.vocab_size, num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.train(g, FLAGS.model_name, FLAGS.log_every_n)
def train(): # Preprocessing with open('holmes.txt', 'r') as f: text = f.read() vocab = set(text) vocab_to_int = {c: i for i, c in enumerate(vocab)} int_to_vocab = dict(enumerate(vocab)) no_classes = len(vocab) pickle.dump((int_to_vocab, vocab_to_int, no_classes), open('./saves/data.p', 'wb')) encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32) # Initialize the model model = CharRNN(no_classes=no_classes) saver = tf.train.Saver(max_to_keep=100) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) counter = 0 # Training for e in range(epochs): new_state = sess.run(model.initial_state) for x, y in get_batches(encoded, model.no_seqs, model.seq_len): counter += 1 start = time.time() feed = {model.inputs: x, model.targets: y, model.initial_state: new_state} batch_loss, new_state, _ = sess.run( [model.loss, model.final_state, model.train_op], feed_dict=feed) end = time.time() print('Epoch: {} '.format(e + 1), 'Loss: {:.4f} '.format(batch_loss), '{:.4f} sec/batch'.format((end - start))) if (counter % save_every_n == 0): saver.save(sess, "saves/{}.ckpt".format(counter)) saver.save(sess, "saves/{}.ckpt".format(counter))
def main(_): model_path = os.path.join('model', FLAGS.name) if not os.path.exists(model_path): os.makedirs(model_path) with open(FLAGS.input_file_path, 'r', encoding='utf-8') as f: text = f.read() tc = util.TextConverter(text, FLAGS.max_vocab) tc.save_vocab(os.path.join('vocab', FLAGS.name)) output_size = tc.vocab_size batch_generator = util.batch_generator(tc.text_to_arr(text), FLAGS.batch_size, FLAGS.seq_size) model = CharRNN(output_size=output_size, batch_size=FLAGS.batch_size, seq_size=FLAGS.seq_size, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob) model.train(batch_generator, max_steps=FLAGS.max_steps, model_save_path=model_path, save_with_steps=FLAGS.save_every_n_steps, log_with_steps=FLAGS.log_every_n_steps)
def sample(checkpoint, n_samples, lstm_size, word_size, prime="The "): samples = [c for c in prime] mdl = CharRNN(len(words), lstm_size=lstm_size, sampling=True) saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, checkpoint) new_state = sess.run(mdl.init_state) for c in prime: x = np.zeros((1, 1)) x[0, 0] = word_to_id[c] feed = { mdl.inputs: x, mdl.keep_prob: 1., mdl.init_state: new_state } preds, new_state = sess.run([mdl.prediction, mdl.final_state], feed_dict=feed) c = pick_top_n(preds, len(words)) # add character to samples samples.append(id_to_word[c]) # generate characters for i in range(n_samples): x[0, 0] = c feed = { mdl.inputs: x, mdl.keep_prob: 1., mdl.init_state: new_state } preds, new_state = sess.run([mdl.prediction, mdl.final_state], feed_dict=feed) c = pick_top_n(preds, len(words)) samples.append(id_to_word[c]) return ''.join(samples)
def main(_): model_path = os.path.join('model', FLAGS.name) arr, converter = initialize_converter(model_path) g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps) model = CharRNN( num_classes=converter.vocab_size, num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size, text_converter=converter ) model.train( g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n )
def main(_): FLAGS.start_string = FLAGS.start_string converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path =\ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = converter.text_to_arr(FLAGS.start_string) arr = model.predict(FLAGS.max_length, start, converter.vocab_size, 10) for c, p in arr: prediction = converter.arr_to_text(c) prediction = remove_return(prediction) # 如果有中文字生成,请将 {1:^14} 改为 {1:{4}^14} 以修复对齐问题。 # {1:^14}中的 14 随着生成的字符数量而定,一般可以设为字符数+4 print("{0} -> {1:^14} {2} {3}".format(FLAGS.start_string, prediction, "probability:", p, chr(12288)))
def main(_): ## 对数据进行预处理。调用read_utils.py模块中的文本转换类TextConverter,获取经过频数挑选的字符并且得到相应的index。 ## 然后调用batch_generator函数得到一个batch生成器。 model_path = os.path.join('model', FLAGS.name) # 路径拼接 print("模型保存位置: ", model_path) if os.path.exists(model_path) is False: os.makedirs(model_path) # 递归创建目录 # Python读取文件中的汉字方法:导入codecs,添加encoding='utf-8' with codecs.open(FLAGS.input_file, encoding='utf-8') as f: print("建模训练数据来源", FLAGS.input_file) text = f.read() # 返回一个词典文件 converter = TextConverter(text, FLAGS.max_vocab) # 将经过频数挑选的字符序列化保存 converter.save_to_file(os.path.join(model_path, 'converter.pkl')) arr = converter.text_to_arr(text) #得到每个字符的index g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps) # 得到一个batch生长期 print(converter.vocab_size) # 打印字符数量 ## 数据处理完毕后,调用model.py模块的CharRNN类构造循环神经网络,最后调用train()函数对神经网络进行训练 model = CharRNN(converter.vocab_size, #字符分类的数量 num_seqs=FLAGS.num_seqs, #一个batch中的序列数 num_steps=FLAGS.num_steps, #一个序列中的字符数 lstm_size=FLAGS.lstm_size, #每个cell的节点数量 num_layers=FLAGS.num_layers, #RNN的层数 learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size ) model.train(g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, )
def experiment_fn(run_config, params): char_rnn = CharRNN() estimator = tf.estimator.Estimator(model_fn=char_rnn.model_fn, model_dir=Config.train.model_dir, params=params, config=run_config) data_loader = TextLoader(Config.data.data_dir, batch_size=params.batch_size, seq_length=params.seq_length) Config.data.vocab_size = data_loader.vocab_size train_X, test_X, train_y, test_y = data_loader.make_train_and_test_set() train_input_fn, train_input_hook = dataset.get_train_inputs( train_X, train_y) test_input_fn, test_input_hook = dataset.get_test_inputs(test_X, test_y) experiment = tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=test_input_fn, train_steps=Config.train.train_steps, #min_eval_frequency=Config.train.min_eval_frequency, train_monitors=[ train_input_hook, hook.print_variables( variables=['training/output_0', 'prediction_0'], vocab=data_loader.vocab, every_n_iter=Config.train.check_hook_n_iter) ], eval_hooks=[test_input_hook], #eval_steps=None ) return experiment
def create_model(session, num_classes, sampling, model_path): model = CharRNN( num_classes, FLAGS.batch_size, # max_time, FLAGS.lstm_size, FLAGS.num_layers, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, FLAGS.grad_clip, sampling, FLAGS.keep_prob, FLAGS.use_embedding, FLAGS.embedding_size, FLAGS.use_sample_loss) ckpt = tf.train.get_checkpoint_state(model_path) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.global_variables_initializer()) return model
import os from multiprocessing import Value import numpy as np import torch from flask import Flask, request, jsonify, render_template from torch.nn import functional as F from model import CharRNN from settings import * from utils import load_dict, create_tune_header app = Flask(__name__) print("Environment:", app.config["ENV"]) # Create and load model model = CharRNN(n_char) model.load_state_dict(torch.load(default_model_path, map_location='cpu')) model.eval() # Load necessary dictionaries int2char = load_dict(int2char_path) char2int = load_dict(char2int_path) counter = Value("i", 0) error_message = "We created some tunes, but it seems like we can't create music from these melodies.Please try again!" print("Ready!") @app.route("/")
help='initialize network from checkpoint') args = parser.parse_args() if not os.path.isdir(args.save_dir): raise OSError('Directory {args.save_dir} does not exist.') with open(args.in_file, 'r') as f: text = f.read() int2char, char2int = get_lookup_tables(text) encoded = np.array([char2int[ch] for ch in text]) chars = tuple(char2int.keys()) if args.init_from is None: net = CharRNN(chars, n_hidden=args.rnn_size, n_layers=args.num_layers) else: net = load_model(args.init_from) val_loss = train(net, encoded, epochs=args.num_epochs, n_seqs=args.batch_size, n_steps=args.seq_length, lr=args.learning_rate, cuda=args.gpu, print_every=args.print_every) save_file = 'checkpoint1.ckpt' save_model(net, os.path.join(args.save_dir, save_file))
included_extensions = ['.json'] # files = [directory + '/' + fn for fn in os.listdir(directory) if any(fn.endswith(ext) for ext in included_extensions)] files = ['raw_data/out.json'] random.shuffle(files) print('Preparing data...') provider = DataProvider(files, batch_size=batch_size, padding_value=0) vocab = provider.vocab vocab_size = len(vocab) print('Vocab size: ', vocab_size) model = CharRNN(vocab_size=vocab_size, target_size=vocab_size, embedding_dim=embedding_dim, hidden_size=hidden_size, num_layers=num_layers) criterion = nn.NLLLoss(ignore_index=vocab.padding_idx) optimizer = optim.Adam(model.parameters(), lr=learning_rate) # optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) # optimizer = optim.Adadelta(model.parameters(), lr=learning_rate) # optimizer = optim.RMSprop(model.parameters(), lr=learning_rate) e = 0 try: checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) e = checkpoint['epoch']
# Read command line args mode = sys.argv[1] model_dir = sys.argv[2] # Use most recent checkpoint checkpoint_filename = [x for x in os.listdir(model_dir) if x[-4:] == 'ckpt'][-1] # Load vocabulary print('Loading vocabulary...') with open(os.path.join('.', model_dir, 'char-map.txt')) as f: vocab = [chr(int(line)) for line in f.readlines()] # Restore model print('Loading model...') model = CharRNN(vocab) # Start session and poll for text to autocomplete with tf.Session() as sess: model.restore_checkpoint(sess, os.path.join('.', model_dir, checkpoint_filename)) input_seq = None while True: if mode == 'autocomplete': print('---') print('Enter context (or type \'quit\' to quit):') lines = [] while True: line = input() if line: lines.append(line) else:
from load import words, word_to_id, encoded from model import CharRNN, get_batches batch_size = 30 n_steps = 20 #one to one RNN, affect loss calculation lstm_size = 1024 n_layers = 2 learning_rate = 0.001 keep_prob = 0.5 eporchs = 5 report_every_n = 50 mdl = CharRNN(len(words), batch_size=batch_size, n_steps=n_steps, lstm_size=lstm_size, n_layers=n_layers, learning_rate=learning_rate) train_logs = './train_logs' train_summary_writer = tf.summary.FileWriter(train_logs, graph=tf.get_default_graph()) tf.summary.scalar("loss", mdl.loss) merged_sum = tf.summary.merge_all() saver = tf.train.Saver(max_to_keep=100) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) counter = 0 for e in range(eporchs): new_state = sess.run(mdl.init_state)
return open(file, 'r', encoding=charenc).read() quotes = read_file(file) tokens = list(set(''.join(quotes))) token_to_id = {token: idx for idx, token in enumerate(tokens)} id_to_token = {idx: token for token, idx in token_to_id.items()} num_tokens = len(tokens) # encode the text encoded = np.array([token_to_id[ch] for ch in quotes]) # Here we have loaded in a model that trained over 2 epochs `rnn_20_epoch.net` with open('rnn_50_epoch.net', 'rb') as f: checkpoint = torch.load(f) loaded = CharRNN(num_tokens, n_hidden=checkpoint['n_hidden'], n_layers=checkpoint['n_layers']) loaded.load_state_dict(checkpoint['state_dict']) def predict(net, char, h=None, top_k=None): ''' Given a character, predict the next character. Returns the predicted character and the hidden state. ''' # tensor inputs x = np.array([[token_to_id[char]]]) inputs = torch.from_numpy(x) if (train_on_gpu): inputs = inputs.cuda() # detach hidden state from history
encoded = np.array([token_to_id[ch] for ch in quotes]) # check if GPU is available train_on_gpu = torch.cuda.is_available() if (train_on_gpu): print('Training on GPU!') else: print( 'No GPU available, training on CPU; consider making n_epochs very small.' ) # define and print the net n_hidden = 512 n_layers = 4 net = CharRNN(num_tokens=num_tokens, n_hidden=n_hidden, n_layers=n_layers) print(net) def train(net, data, token_to_id=token_to_id, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10): ''' Training a network
from model import CharRNN import preprocess as pre import numpy as np from model import * if __name__ == '__main__': classes, classDict = pre.build_class() model = CharRNN() model.load_model() sent = "The mobile phase consisted of methanol, acetonitrile and dichloromethane (42:42:16) with a flow rate of 1.0 mL/min at 30°C." words = sent.split(" ") sample = np.array(pre.words2sample(words), dtype=int) labels = model.predict(sample) tags = [classes[label] for label in labels] size = len(words) for i in range(size): print(words[i], ":::", tags[i])
# encoding the text and map each character to an integer and vice versa # We create two dictionaries: # 1. int2char, which maps integers to characters # 2. char2int, which maps characters to integers chars = tuple(set(text)) int2char = dict(enumerate(chars)) char2int = {ch: ii for ii, ch in int2char.items()} # encode the text encoded = np.array([char2int[ch] for ch in text]) # init model n_hidden = args.n_hidden n_layers = args.n_layers net = CharRNN(chars, n_hidden, n_layers) # declaring the hyperparameters batch_size = args.batch_size seq_length = args.seq_length n_epochs = args.n_epochs # train the model train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=50)
def main(): # Parse command line arguments argparser = argparse.ArgumentParser() argparser.add_argument('--train_set', type=str, required=True) argparser.add_argument('--valid_set', type=str, required=True) argparser.add_argument('--model', type=str, default="gru") argparser.add_argument('--model_file', type=str, default='None') argparser.add_argument('--n_epochs', type=int, default=30) argparser.add_argument('--hidden_size', type=int, default=200) argparser.add_argument('--n_layers', type=int, default=3) argparser.add_argument('--learning_rate', type=float, default=0.01) argparser.add_argument('--chunk_len', type=int, default=200) argparser.add_argument('--batch_size', type=int, default=300) argparser.add_argument('--num_workers', type=int, default=8) argparser.add_argument('--cuda', action='store_true') argparser.add_argument('--cpu', action='store_true') args = argparser.parse_args() # Initialize models and start training if args.model_file == 'None': decoder = CharRNN( n_characters, args.hidden_size, n_characters, model=args.model, n_layers=args.n_layers, ) epoch_from = 1 prev_valid_loss = sys.maxsize old_filename = None else: if args.cpu: decoder = torch.load(args.model_file, map_location=lambda storage, loc: storage) else: decoder = torch.load(args.model_file) info = args.model_file.split('_') args.model = info[0] epoch_from = int(info[1][5:]) + 1 args.n_layers = int(info[2][7:]) args.hidden_size = int(info[5][2:]) prev_valid_loss = float(info[7][4:-3]) old_filename = args.model_file print( "successfully loaded model! Continuing from epoch {0} with valid loss {1}" .format(epoch_from, prev_valid_loss)) optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate) criterion = nn.CrossEntropyLoss() if args.cuda: decoder.cuda() start = time.time() train_dataset = WordDataset(args.train_set, args.chunk_len) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, drop_last=True) valid_dataset = WordDataset(args.valid_set, args.chunk_len) valid_dataloader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, drop_last=True) try: print('Training for maximum {} epochs...'.format(args.n_epochs)) for epoch in range(epoch_from, args.n_epochs + 1): train_loss, num_samples = 0, 0 for s in tqdm(train_dataloader): input_, target = prep_data(s['input'], s['target'], args.cuda) train_loss += train(decoder, optimizer, criterion, input_, target, args.batch_size, args.chunk_len, args.cuda) num_samples += 1 train_loss /= num_samples valid_loss, num_samples = 0, 0 for s in valid_dataloader: input_, target = prep_data(s['input'], s['target'], args.cuda) valid_loss += evaluate(decoder, criterion, input_, target, args.batch_size, args.chunk_len, args.cuda) num_samples += 1 valid_loss /= num_samples elapsed = time_since(start) pcnt = epoch / args.n_epochs * 100 log = ( '{} elapsed - epoch #{} ({:.1f}%) - training loss (BPC) {:.2f} ' '- validation loss (BPC) {:.2f}') print(log.format(elapsed, epoch, pcnt, train_loss, valid_loss)) if valid_loss > prev_valid_loss: print('No longer learning, just overfitting, stopping here.') break else: filename = model_file_name(decoder, epoch, train_loss, valid_loss) torch.save(decoder, filename) print('Saved as {}'.format(filename)) if old_filename: os.remove(old_filename) old_filename = filename prev_valid_loss = valid_loss except KeyboardInterrupt: print("Saving before quit...") try: valid_loss except: valid_loss = 'no_val' filename = model_file_name(decoder, epoch, train_loss, valid_loss) torch.save(decoder, filename) print('Saved as {}'.format(filename))
def main(): """ Main function Here, you should instantiate 1) DataLoaders for training and validation. Try SubsetRandomSampler to create these DataLoaders. 3) model 4) optimizer 5) cost function: use torch.nn.CrossEntropyLoss """ parser = argparse.ArgumentParser() parser.add_argument('--val_ratio', type=float, default=.5, help='The ratio for valid set') parser.add_argument('--n_layers', type=int, default=4, help='Number of stacked RNN layers') parser.add_argument('--n_hidden', type=int, default=512, help='Number of hidden neurons of RNN cells') parser.add_argument('--drop_prob', type=float, default=0.1, help='Dropout probability') parser.add_argument('--num_epochs', type=int, default=100, help='The number of epochs') parser.add_argument('--lr', type=float, default=0.001, help='Learning rate') parser.add_argument('--device', type=str, default='gpu', help='For cpu: \'cpu\', for gpu: \'gpu\'') parser.add_argument('--batch_size', type=int, default=256, help='Size of batches for training') parser.add_argument('--model_save_dir', type=str, default='../model', help='Directory for saving model.') parser.add_argument('--results_save_dir', type=str, default='../results', help='Directory for saving results.') parser.add_argument('--rnn', type=bool, default=True, help='Train vanilla rnn model') parser.add_argument('--lstm', type=bool, default=True, help='Train lstm model') parser.add_argument('--chunk_size', type=int, default=30, help='Chunk size(sequence length)') parser.add_argument('--s_step', type=int, default=3, help='Sequence step') args = parser.parse_args() n_cpu = multiprocessing.cpu_count() if args.device == 'gpu': args.device = 'cuda' device = torch.device(args.device) chunk_size = args.chunk_size s_step = args.s_step num_epochs = args.num_epochs batch_size = args.batch_size val_ratio = args.val_ratio shuffle_dataset = True random_seed = 42 datasets = dataset.Shakespeare('shakespeare_train.txt', chunk_size, s_step) dataset_size = len(datasets) indices = list(range(dataset_size)) split = int(np.floor(val_ratio * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) trn_loader = DataLoader(datasets, batch_size=batch_size, sampler=train_sampler, num_workers=n_cpu) val_loader = DataLoader(datasets, batch_size=batch_size, sampler=valid_sampler, num_workers=n_cpu) chars = datasets.chars print('-----Train Vanilla RNN Model-----') if args.rnn: model = CharRNN(chars, args).to(device) optimizer = Adam(model.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss() rnn_trn_loss, rnn_val_loss = [], [] best_val_loss = np.inf for epoch in range(args.num_epochs): epoch_time = time.time() trn_loss = train(model, trn_loader, device, criterion, optimizer) val_loss = validate(model, val_loader, device, criterion) rnn_trn_loss.append(trn_loss) rnn_val_loss.append(val_loss) print('Epoch: %3s/%s...' % (epoch + 1, num_epochs), 'Train Loss: %.4f...' % trn_loss, 'Val Loss: %.4f...' % val_loss, 'Time: %.4f' % (time.time() - epoch_time)) if val_loss < best_val_loss: best_val_loss = val_loss torch.save(model.state_dict(), '%s/rnn.pt' % args.model_save_dir) value, idx = np.array(rnn_val_loss).min(), np.array( rnn_val_loss).argmin() plt.figure(figsize=(8, 6)) plt.title('Vanilla RNN Model training and validation loss') plt.plot(np.arange(1, args.num_epochs + 1), rnn_trn_loss, 'g', label='Train Loss') plt.plot(np.arange(1, args.num_epochs + 1), rnn_val_loss, 'r', label='Val Loss') plt.grid(True) plt.legend(loc='upper right') plt.annotate('min epoch: %s \n\ min valid loss: %.5f' % (idx, value), (idx, value), xytext=(-60, 20), textcoords='offset points', arrowprops={'arrowstyle': '->'}) plt.savefig('%s/rnn_loss.png' % args.results_save_dir, dpi=300) print('-----Train LSTM Model-----') if args.lstm: model = CharLSTM(chars, args).to(device) optimizer = Adam(model.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss() lstm_trn_loss, lstm_val_loss = [], [] best_val_loss = np.inf for epoch in range(args.num_epochs): epoch_time = time.time() trn_loss = train(model, trn_loader, device, criterion, optimizer) val_loss = validate(model, val_loader, device, criterion) lstm_trn_loss.append(trn_loss) lstm_val_loss.append(val_loss) print('Epoch: %3s/%s...' % (epoch + 1, num_epochs), 'Train Loss: %.4f...' % trn_loss, 'Val Loss: %.4f...' % val_loss, 'Time: %.4f' % (time.time() - epoch_time)) if val_loss < best_val_loss: best_val_loss = val_loss torch.save(model.state_dict(), '%s/lstm.pt' % args.model_save_dir) value, idx = np.array(lstm_val_loss).min(), np.array( lstm_val_loss).argmin() plt.figure(figsize=(8, 6)) plt.title('LSTM Model training and validation loss') plt.plot(np.arange(1, args.num_epochs + 1), lstm_trn_loss, 'g', label='Train Loss') plt.plot(np.arange(1, args.num_epochs + 1), lstm_val_loss, 'r', label='Val Loss') plt.grid(True) plt.legend(loc='upper right') plt.annotate('min epoch: %s \n\ min valid loss: %.5f' % (idx, value), (idx, value), xytext=(-60, 20), textcoords='offset points', arrowprops={'arrowstyle': '->'}) plt.savefig('%s/lstm_loss.png' % args.results_save_dir, dpi=300)
# 数字_字符映射字典 int_to_vocab = dict(enumerate(vocab)) # 对文本进行转码 encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32) # 初始化参数 batch_size = 100 num_steps = 100 lstm_size = 512 num_layers = 2 learning_rate = 0.001 keep_prob = 0.5 epochs = 20 # 没n轮进行一次变量保存 save_every_n = 200 model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps, lstm_size=lstm_size, num_layers=num_layers, learning_rate=learning_rate) saver = tf.train.Saver(max_to_keep=100) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) counter = 0 for e in range(epochs): # train network new_state = sess.run(model.initial_state) loss = 0 for x, y in get_batches(encoded, batch_size, num_steps): counter += 1 start = time.time() feed = {model.inputs: x, model.targets: y, model.initial_state: new_state, model.keep_prob: keep_prob}