def experiment_fn(run_config, params): data = Data(FLAGS) data.initialize_word_vectors() model = Seq2seq(data.vocab_size, FLAGS, data.embeddings_mat) estimator = tf.estimator.Estimator( model_fn=model.make_graph, # model_dir=FLAGS.model_dir, config=run_config, params=FLAGS) train_input_fn, train_feed_fn = data.make_input_fn('train') eval_input_fn, eval_feed_fn = data.make_input_fn('test') print_vars = [ 'source', 'predict' # 'decoder_output', # 'actual' ] print_inputs = tf.train.LoggingTensorHook(print_vars, every_n_iter=FLAGS.print_every, formatter=data.get_formatter( ['source', 'predict'])) experiment = tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, train_steps=FLAGS.iterations, min_eval_frequency=FLAGS.print_every, train_monitors=[tf.train.FeedFnHook(train_feed_fn), print_inputs], eval_hooks=[tf.train.FeedFnHook(eval_feed_fn)], eval_steps=10) return experiment
def train_eval(x_train, x_test, is_peeky): if is_peeky: model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size) else: model = Seq2seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose) acc = float(correct_num) / len(x_test) acc_list.append(acc) print('val acc %.3f%%' % (acc * 100)) return acc_list
def main() -> None: (x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt') char_to_id, id_to_char = sequence.get_vocab() vocab_size = len(char_to_id) wordvec_size = 16 hidden_size = 128 batch_size = 128 max_epoch = 25 max_grad = 5.0 model = Seq2seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(1, max_epoch + 1): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose) acc = float(correct_num) / len(x_test) acc_list.append(acc) print(f'val acc {acc*100}%') print('DONE')
def __init__(self, trainable=True): self.trainable = trainable self.seq2seq = Seq2seq(trainable=False) self.seq2seq.build() init_op = tf.global_variables_initializer() self.sess = tf.Session() self.sess.run(init_op) self.seq2seq.init(self.sess)
def __init__(self): self.seq2seq = Seq2seq() self.seq2seq.build() init_op = tf.global_variables_initializer() self.sess = tf.Session() self.sess.run(init_op) self.reverse_vocab = preprocessor.load_reverse_vocab( Config.vocab_file_path) self.restore_variables(self.sess)
def evaluate(model, data, k=1): beam_search = Seq2seq(model.encoder, TopKDecoder(model.decoder, k)) input_vocab = data.fields[GlobalNames.src_field_name].vocab output_vocab = data.fields[GlobalNames.tgt_field_name].vocab pred_machine = Predictor(beam_search, input_vocab, output_vocab) result = [ " ".join(pred_machine.predict(item.src)) for item in data.examples ] return result
def main(args, load_exclude_set, restoreCallback): logging.basicConfig(\ filename=0,\ level=logging.DEBUG,\ format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s',\ datefmt='%H:%M:%S') if args.debug: debug() logging.info(json.dumps(args, indent=2)) cuda_init(0, args.cuda) volatile = Storage() volatile.load_exclude_set = load_exclude_set volatile.restoreCallback = restoreCallback data_class = SingleTurnDialog.load_class(args.dataset) data_arg = Storage() data_arg.file_id = args.datapath wordvec_class = WordVector.load_class(args.wvclass) if wordvec_class is None: wordvec_class = Glove def load_dataset(data_arg, wvpath, embedding_size): wv = wordvec_class(wvpath) dm = data_class(**data_arg) return dm, wv.load(embedding_size, dm.vocab_list) if args.cache: dm, volatile.wordvec = try_cache( load_dataset, (data_arg, args.wvpath, args.embedding_size), args.cache_dir, data_class.__name__ + "_" + wordvec_class.__name__) else: dm, volatile.wordvec = load_dataset(data_arg, args.wvpath, args.embedding_size) volatile.dm = dm param = Storage() param.args = args param.volatile = volatile model = Seq2seq(param) if args.mode == "train": model.train_process() elif args.mode == "test": test_res = model.test_process() for key, val in test_res.items(): if isinstance(val, bytes): test_res[key] = str(val) json.dump(test_res, open("./result.json", "w")) else: raise ValueError("Unknown mode")
def main(args, load_exclude_set, restoreCallback): logging.basicConfig(\ filename=0,\ level=logging.DEBUG,\ format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s',\ datefmt='%H:%M:%S') if args.debug: debug() logging.info(json.dumps(args, indent=2)) cuda_init(0, args.cuda) volatile = Storage() volatile.load_exclude_set = load_exclude_set volatile.restoreCallback = restoreCallback data_class = SingleTurnDialog.load_class(args.dataset) data_arg = Storage() data_arg.file_id = args.datapath + "#OpenSubtitles" data_arg.tokenizer = PretrainedTokenizer( BertTokenizer.from_pretrained(args.bert_vocab)) data_arg.pretrained = "bert" wordvec_class = WordVector.load_class(args.wvclass) if wordvec_class is None: wordvec_class = Glove def load_dataset(data_arg, wvpath, embedding_size): wv = wordvec_class(wvpath) dm = data_class(**data_arg) return dm, wv.load_matrix(embedding_size, dm.frequent_vocab_list) if args.cache: dm, volatile.wordvec = try_cache( load_dataset, (data_arg, args.wvpath, args.embedding_size), args.cache_dir, data_class.__name__ + "_" + wordvec_class.__name__) else: dm, volatile.wordvec = load_dataset(data_arg, args.wvpath, args.embedding_size) volatile.dm = dm param = Storage() param.args = args param.volatile = volatile model = Seq2seq(param) if args.mode == "train": model.train_process() elif args.mode == "test": model.test_process() else: raise ValueError("Unknown mode")
def load_model(model_path, device): state = torch.load(model_path, map_location=device) params = state['parameter'] if params['name'] == 'Transformer': params.pop('name') model = Transformer(**params) else: model = Seq2seq(**params) model.to(device) model.load_state_dict(state['state_dict']) return model, state['src_lang'], state['tgt_lang'], state[ 'src_vocab'], state['tgt_vocab']
def main(): seq2seq = Seq2seq(lr=0.3, init_range=0.3) for i in range(1000): cost = seq2seq.train([2, 1], [2]) cost += seq2seq.train([1], [1]) cost += seq2seq.train([3, 1], [3]) if i % 100 == 0: print 'Epoch:', i print 'training cost:', cost / 3 print[2, 1], '->', seq2seq.predict([2, 1]) print[1], '->', seq2seq.predict([1]) print[3, 1], '->', seq2seq.predict([3, 1]) print
def main(args): # tf.logging._logger.setLevel(logging.INFO) tf.logging.set_verbosity(logging.INFO) data = Data(FLAGS) model = Seq2seq(data.vocab_size, FLAGS) input_fn, feed_fn = data.make_input_fn() print_inputs = tf.train.LoggingTensorHook( ['source', 'target', 'predict'], every_n_iter=FLAGS.print_every, formatter=data.get_formatter(['source', 'target', 'predict'])) estimator = tf.estimator.Estimator( model_fn=model.make_graph, model_dir=FLAGS.model_dir) #, params=FLAGS) estimator.train(input_fn=input_fn, hooks=[tf.train.FeedFnHook(feed_fn), print_inputs], steps=FLAGS.iterations)
def __init__(self): self.data = Data(FLAGS) model = Seq2seq(self.data.vocab_size, FLAGS) estimator = tf.estimator.Estimator(model_fn=model.make_graph, model_dir=FLAGS.model_dir) def input_fn(): inp = tf.placeholder(tf.int64, shape=[None, None], name='input') output = tf.placeholder(tf.int64, shape=[None, None], name='output') tf.identity(inp[0], 'source') tf.identity(output[0], 'target') dict = {'input': inp, 'output': output} return tf.estimator.export.ServingInputReceiver(dict, dict) self.predictor = tf.contrib.predictor.from_estimator( estimator, input_fn)
def main(): seq2seq = Seq2seq(lr=0.3, init_range=0.3) for i in range(1000): import random cost = 0 for t in range(10): #a=random.randrange(9) #b=random.randrange(9) cost = seq2seq.train([1, 1], [1]) cost += seq2seq.train([8, 1], [1]) cost += seq2seq.train([7, 1], [1]) cost += seq2seq.train([9, 1], [1]) cost += seq2seq.train([4, 1], [1]) cost += seq2seq.train([3, 1], [1]) cost += seq2seq.train([1, 1], [1]) cost += seq2seq.train([4, 1], [1]) cost += seq2seq.train([0, 2], [2]) cost += seq2seq.train([3, 2], [2]) cost += seq2seq.train([5, 2], [2]) cost += seq2seq.train([6, 2], [2]) cost += seq2seq.train([1, 2], [2]) cost += seq2seq.train([9, 2], [2]) cost += seq2seq.train([8, 2], [2]) cost += seq2seq.train([7, 2], [2]) cost += seq2seq.train([6, 2], [2]) cost += seq2seq.train([5, 2], [2]) cost += seq2seq.train([4, 2], [2]) cost += seq2seq.train([3, 2], [2]) cost += seq2seq.train([2, 2], [2]) cost += seq2seq.train([1, 2], [2]) print('training cost:', cost / 22) if i % 100 == 0: print('Epoch:', i) print('training cost:', cost / 3) a = random.randrange(9) b = random.randrange(9) print([5, 2], '->', seq2seq.predict([5, 2]))
def main(args): logging.basicConfig(\ filename=0,\ level=logging.DEBUG,\ format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s',\ datefmt='%H:%M:%S') if args.debug: debug() logging.info(json.dumps(args, indent=2)) cuda_init(0, args.cuda) volatile = Storage() data_class = SingleTurnDialog.load_class(args.dataset) wordvec_class = WordVector.load_class(args.wvclass) if wordvec_class is None: wordvec_class = Glove if args.cache: dm = try_cache(data_class, (args.datapath, ), args.cache_dir) volatile.wordvec = try_cache(\ lambda wv, ez, vl: wordvec_class(wv).load(ez, vl), \ (args.wvpath, args.embedding_size, dm.vocab_list), args.cache_dir, wordvec_class.__name__) else: dm = data_class(args.datapath) wv = wordvec_class(args.wvpath) volatile.wordvec = wv.load(args.embedding_size, dm.vocab_list) volatile.dm = dm param = Storage() param.args = args param.volatile = volatile model = Seq2seq(param) if args.mode == "train": model.train_process() elif args.mode == "test": model.test_process() else: raise ValueError("Unknown mode")
def __init__(self, checkpoint='checkpoint', directory='coco'): self.data = Data(directory + '/train_source.txt', directory + '/train_target.txt', directory + '/train_vocab.txt') model = Seq2seq(self.data.vocab_size) estimator = tf.estimator.Estimator(model_fn=model.make_graph, model_dir=checkpoint) def input_fn(): inp = tf.placeholder(tf.int64, shape=[None, None], name='input') output = tf.placeholder(tf.int64, shape=[None, None], name='output') tf.identity(inp[0], 'source') tf.identity(output[0], 'target') dict = {'input': inp, 'output': output} return tf.estimator.export.ServingInputReceiver(dict, dict) self.predictor = tf.contrib.predictor.from_estimator( estimator, input_fn)
def build_model(encoder_vocab, decoder_vocab): model = Seq2seq(encoder_vocab_size=encoder_vocab.get_vocab_size(), encoder_embedding_size=opt.encoder_embedding_size, encoder_hidden_size=opt.encoder_hidden_size, encoder_num_layers=opt.encoder_num_layers, encoder_bidirectional=opt.encoder_bidirectional, decoder_vocab_size=decoder_vocab.get_vocab_size(), decoder_embedding_size=opt.decoder_embedding_size, decoder_hidden_size=opt.decoder_hidden_size, decoder_num_layers=opt.decoder_num_layers, decoder_attn_type=opt.decoder_attn_type, dropout_ratio=opt.dropout_ratio, padding_idx=PAD_id, tied=opt.tied, device=device) print(model) model.to(device=device) return model
def vectorize_nn(word_index, embedding_matrix, sentences, max_num_vectors=200, num_features=200, batch_size=32, latent_dim=200, timesteps=200, epochs=5): print("Substituting words in descriptions by their vector representetion") vec_sentences = [] sec_count = 0 for i, sentence in enumerate(sentences): # Print run information sec_count += 1 if sec_count % 5000 == 0: print('{} descriptions have been preprocessed.'.format(sec_count)) vector = seqWords2seqVec(sentence, word_index, embedding_matrix, max_num_vectors, num_features) # Store vector vector = np.asarray(vector) vec_sentences.append(vector) print("All words have been subtituted by their vector representation") vec_sentences = np.asarray(vec_sentences) vec_sentences = np.reshape(vec_sentences, (len(sentences), num_vectors, num_features)) # Create and train Neural net s2s = Seq2seq(num_vectors, latent_dim, timesteps, batch_size, word_index, embedding_matrix) print("Training autoencoder...") s2s.fit(vec_sentences, epochs) print("Getting vector representation of each description...") predictions = s2s.predict(vec_sentences) print("Saving neural network...") s2s.encoder.save('model/encoder.h5') return predictions
def main(): seq2seq = Seq2seq() last_seq = None cost = 0 for i in range(100000): X = [randint(1, 2) for _ in range(randint(1, 10))] Y = [x for x in X if x == 1] cost += seq2seq.train(X, Y) if i % 1000 == 0: print i, '\t', cost / 1000 cost = 0 X = [randint(1, 2) for _ in range(randint(1, 10))] Y = seq2seq.predict(X) print X, '->', Y seq2seq.lr /= 2
def __init__(self, config): super(Model, self).__init__() self.config = config self.embedding = Embedding(config) self.seq2seq = Seq2seq(config)
# Reverse input? ================================================= is_reverse = False # True if is_reverse: x_train, x_test = x_train[:, ::-1], x_test[:, ::-1] # ================================================================ # ハイパーパラメータの設定 vocab_size = len(char_to_id) wordvec_size = 16 hideen_size = 128 batch_size = 128 max_epoch = 25 max_grad = 5.0 # Normal or Peeky? ============================================== model = Seq2seq(vocab_size, wordvec_size, hideen_size) # model = PeekySeq2seq(vocab_size, wordvec_size, hideen_size) # ================================================================ optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)):
def main(args): logging.basicConfig( filename=0, level=logging.DEBUG, format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s', datefmt='%H:%M:%S') if args.debug: debug() logging.info(json.dumps(args, indent=2)) cuda_init(args.cuda_num, args.cuda) volatile = Storage() volatile.load_exclude_set = args.load_exclude_set volatile.restoreCallback = args.restoreCallback if args.dataset == 'WizardOfWiki': data_class = WizardOfWiki elif args.dataset == 'HollE': data_class = HollE else: raise ValueError wordvec_class = WordVector.load_class(args.wvclass) if wordvec_class is None: wordvec_class = Glove if not os.path.exists(args.cache_dir): os.mkdir(args.cache_dir) args.cache_dir = os.path.join(args.cache_dir, args.dataset) if not os.path.exists(args.out_dir): os.mkdir(args.out_dir) args.out_dir = os.path.join(args.out_dir, args.dataset) if not os.path.exists(args.model_dir): os.mkdir(args.model_dir) if args.dataset not in args.model_dir: args.model_dir = os.path.join(args.model_dir, args.dataset) if args.cache: dm = try_cache(data_class, (args.datapath, ), args.cache_dir) volatile.wordvec = try_cache( lambda wv, ez, vl: wordvec_class(wv).load_matrix(ez, vl), (args.wvpath, args.embedding_size, dm.vocab_list), args.cache_dir, wordvec_class.__name__) else: dm = data_class(args.datapath) wv = wordvec_class(args.wvpath) volatile.wordvec = wv.load_matrix(args.embedding_size, dm.vocab_list) volatile.dm = dm param = Storage() param.args = args param.volatile = volatile model = Seq2seq(param) if args.mode == "train": model.train_process() elif args.mode == "test": model.test_process() elif args.mode == 'dev': model.test_dev() else: raise ValueError("Unknown mode")
word2idx.update({'start_id': start_id}) word2idx.update({'end_id': end_id}) idx2word = idx2word + ['start_id', 'end_id'] src_vocab_size = tgt_vocab_size = src_vocab_size + 2 num_epochs = 10 vocabulary_size = src_vocab_size decoder_seq_length = 25 model_ = Seq2seq( decoder_seq_length=decoder_seq_length, cell_enc=tf.keras.layers.GRUCell, cell_dec=tf.keras.layers.GRUCell, n_layer=3, n_units=1024, embedding_layer=tl.layers.Embedding(vocabulary_size=vocabulary_size, embedding_size=emb_dim), ) # Uncomment below statements if you have already saved the model load_weights = tl.files.load_npz(name='WinterMute_rms1024_cornell.npz') tl.files.assign_weights(load_weights, model_) optimizer = tf.optimizers.Adam(learning_rate=0.001) #optimizer = tf.optimizers.RMSprop(learning_rate=0.001) model_.train() for epoch in range(num_epochs):
def main(): # 读取训练集 data_set = [] with open('./data/train_set.txt') as f: for line in f: data_set.append(json.loads(line)) print("num_data: %s" % len(data_set)) # 读取验证集 validation_set = [] with open('./data/valid_set.txt') as f: for line in f: validation_set.append(json.loads(line)) print("num_validation: %s" % len(validation_set)) # 读取词汇表 vocabulary = [] with open('./data/vocabulary.txt') as f: for line in f: vocabulary.append(line.strip()) vocabulary = vocabulary[:NUM_SYMBOL] print("num_symbol: %s" % len(vocabulary)) # 载入词向量 vectors = {} with open('./data/glove.840B.300d.txt', encoding='utf-8') as f: for line in f: line = line.strip() word = line[:line.find(' ')] vector = line[line.find(' ') + 1:] vectors[word] = vector # 构造我们词汇表的词嵌入 embed = [] for word in vocabulary: if word in vectors: embed.append(list(map(float, vectors[word].split()))) else: embed.append(np.zeros((300), dtype=np.float32)) embed = np.array(embed, dtype=np.float32) print("embed_shape: ", np.shape(embed)) config = tf.ConfigProto() config.gpu_options.allow_growth = True # 允许 GPU 自动分配资源 with tf.Session(config=config) as sess: model = Seq2seq(NUM_SYMBOL, NUM_UNITS, NUM_LAYERS, embed, max_length=MAX_LENGTH, learning_rate=LEARNING_RATE, max_gradient_norm=MAX_GRADIENT_NORM, output_alignments=OUTPUT_ALIGNMENTS) tf.global_variables_initializer().run() op_in = model.symbol2index.insert( tf.constant(vocabulary, dtype=tf.string), tf.constant(list(range(NUM_SYMBOL)), dtype=tf.int64)) sess.run(op_in) op_out = model.index2symbol.insert( tf.constant(list(range(NUM_SYMBOL)), dtype=tf.int64), tf.constant(vocabulary, dtype=tf.string)) sess.run(op_out) if TRAIN: if TRAIN_CONTINUE: model.saver.restore(sess, tf.train.latest_checkpoint(TRAIN_DIR)) print(model.print_parameters()) epoch = 0 while True: random.shuffle(data_set) start = 0 while start < len(data_set): if start >= len(data_set): break end = start + BATCH_SIZE if end >= len(data_set): batch_data = get_data(data_set[start:]) else: batch_data = get_data(data_set[start:end]) _, loss, total_loss = model.train(sess, batch_data) if model.global_step.eval() % 20000 == 0: print("epoch: %s" % epoch, end=" ") print("global_step: %s" % model.global_step.eval(), end=" ") print("start: %s" % start, end=" ") print("loss = ", loss, end=" ") print("ppl = ", np.exp(loss)) model.saver.save(sess, '%s/checkpoint' % TRAIN_DIR, global_step=model.global_step) start = end epoch = epoch + 1 loss_per_data = evalueate(sess, model, validation_set) print("mean loss of per data on validation set: ", loss_per_data) if TEST: model.saver.restore(sess, tf.train.latest_checkpoint(TRAIN_DIR)) fr = open('./data/test_set.txt', 'r', encoding='utf8') fw = open('./data/result.txt', 'w', encoding='utf8') test_set = [] for line in fr: test_set.append(json.loads(line)) total_len = 0 for data in test_set: total_len += (len(data['response']) + 1) loss_per_data = evalueate(sess, model, test_set) ppl_per_word = loss_per_data * len(test_set) / total_len ppl = "ppl on test set: %f" % np.exp(ppl_per_word) fw.write(ppl) for data in fr: post = data['post'] posts_len = [len(post)] posts_string = [post] words = model.inference( sess, { "posts_len": np.array(posts_len, dtype=np.int32), "posts_string": np.array(posts_string) }) word = words[0][0] word = [str(item, encoding="utf-8") for item in word] data['result'] = word fw.write(json.dumps(data) + '\n') fw.close() fr.close() if INFERENCE: model.saver.restore(sess, tf.train.latest_checkpoint(TRAIN_DIR)) while True: print("post: ", end=" ") post = input() post = post.split() posts_len = [len(post)] posts_string = [post] words = model.inference( sess, { "posts_len": np.array(posts_len, dtype=np.int32), "posts_string": np.array(posts_string) }) word = words[0][0] word = [str(item, encoding="utf-8") for item in word] word = " ".join(word) word = word[:word.find('_EOS')] print("response: ", word)
def main(): args = args_set('big') # Create save dir create_dirs(args.save_dir) # Check CUDA if torch.cuda.is_available(): args.cuda = True args.device = torch.device("cuda" if args.cuda else "cpu") print("Using CUDA: {}".format(args.cuda)) # Set seeds set_seeds(seed=1234, cuda=args.cuda) # load state model_spatial = SpatialModel(num_input_channels=5, out_num=1053, dropout_p=args.dropout_p) model_time = Seq2seq(num_features=1053, hidden_size=512, input_seq_len=args.input_seq_len, pred_seq_len=args.pred_seq_len, batch_size=1) # model_time = Seq2seq_attn(num_features=1053, # input_seq_len=args.input_seq_len, # pred_seq_len=args.pred_seq_len, # batch_size=1, # dropout=args.dropout_p) # model_time = Seq2seq_mlp(num_features=1053, # input_seq_len=args.input_seq_len, # pred_seq_len=args.pred_seq_len, # batch_size=1, device=args.device) resume = os.path.join(args.save_dir, 'check_point_{}'.format(40)) print('Resuming model check point from {}\n'.format(40)) check_point = torch.load(resume) model_spatial.load_state_dict(check_point['model_spatial']) model_spatial.to(args.device) model_time.load_state_dict(check_point['model_time']) model_time.to(args.device) # data = DataPrepare(save_dir=args.save_dir, data_folder=args.data_folder, # train_size=args.train_size, # val_size=args.val_size, # test_size=args.test_size, # input_seq_len=args.input_seq_len, # pred_seq_len=args.pred_seq_len, shuffle=True) # data.create_data() test_exps = np.load('exp_list.npy') scales = np.load('scales.npy') tester = Tester(test_exps=test_exps, data_folder=args.data_folder, scales=scales, input_seq_len=args.input_seq_len, pred_seq_len=args.pred_seq_len, model_spatial=model_spatial, model_time=model_time, extract_num=4, save_dir=args.save_dir, save_sample_path=args.save_sample_path, device='cuda') tester.run_test_loop()
embedding_size=512, num_layers=1, dropout=0, is_training=True) conv_decoder = ConvDecoder(len(word_to_index), max_target_length + 2, hidden_size=128, embedding_size=512, num_layers=1, dropout=0, is_training=True) examples = np.array(examples) examples_target = np.array(examples_target) seq2seq = Seq2seq(conv_encoder, conv_decoder, len(word_to_index)) seq_output = seq2seq(examples, examples_target) seq_output = seq_output.data.numpy() sentences = [index_to_word_sentence(seq) for seq in seq_output] print(sentences) while True: new_text = input('type in text to predict:') new_text_token = np.array( [[word_to_index[token] for token in new_text.lower().split()]]) new_text_token = np.concatenate([new_text_token, [[1]]], axis=1) outputs = seq2seq(new_text_token, is_training=False) outputs = outputs.data.numpy() sentences = [index_to_word_sentence(seq) for seq in outputs]
encoder = EncoderRNN(len(src.vocab), max_len, hidden_size, bidirectional=bidirectional, rnn_cell='lstm', variable_lengths=True) decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2, dropout_p=0.2, use_attention=True, bidirectional=bidirectional, rnn_cell='lstm', eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) if torch.cuda.is_available(): seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # train t = SupervisedTrainer(loss=loss, batch_size=32, checkpoint_every=50, print_every=10, expt_dir=opt.expt_dir) seq2seq = t.train(seq2seq, train,
mode = 'train' rnn_size = 1024 num_layers = 1 max_encoder_steps = 30 max_decoder_steps = 30 embedding_size = 256 data_processor = DataProcessor(mode) idx2word_dict = data_processor.get_dictionary() vocab_size = len(idx2word_dict) model = Seq2seq(rnn_size=rnn_size, num_layers=num_layers, batch_size=batch_size, vocab_size=vocab_size, mode=mode, max_encoder_steps=max_encoder_steps, max_decoder_steps=max_decoder_steps, embedding_size=embedding_size) # TODO: sampling probability for each epoch def func(x): return 2 - 2 / (1 + np.exp(0.3 * (x - 2 * epochs))) sampling_prob = func(np.arange(epochs)) with tf.Session() as sess: sess.run(tf.global_variables_initializer())
from chainer.backends import cuda import chainer.functions as F import chainer.links as L from chainer import training from chainer.training import extensions from chainer.training.triggers import MinValueTrigger from chainer import serializers from seq2seq import Seq2seq UNK = 0 EOS = 1 #model = Seq2seq(3, 7334, 6829, 1024) #model = Seq2seq(3, 5838, 6829, 1024) model = Seq2seq(3, 21148, 6829, 1024) serializers.load_npz("LSTM_step2title.model", model) #serializers.load_npz("LSTM_ing2title2.model", model) print("model LSTM_step2title loaded.") gpu = 3 if gpu >= 0: chainer.backends.cuda.get_device(gpu).use() model.to_gpu(gpu) f = open('test_title.txt', 'r') # 書き込みモードで開く target = f.readlines() # 引数の文字列をファイルに書き込む f.close() # ファイルを閉じる f = open('test_ing.txt', 'r') # 書き込みモードで開く source = f.readlines() # 引数の文字列をファイルに書き込む
hidden_size = 128 bidirectional = True encoder = EncoderRNN(len(src.vocab), max_len, hidden_size, bidirectional=bidirectional, variable_lengths=True) decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else hidden_size, dropout_p=0.2, use_attention=True, bidirectional=bidirectional, eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) if torch.cuda.is_available(): seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer. # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5) scheduler = StepLR(optimizer.optimizer, 1) optimizer.set_scheduler(scheduler) # train
def main(): # Arguments args = args_set('big') # Create save dir create_dirs(args.save_dir) # Check CUDA if torch.cuda.is_available(): args.cuda = True args.device = torch.device("cuda" if args.cuda else "cpu") print("Using CUDA: {}".format(args.cuda)) # Set seeds set_seeds(seed=1234, cuda=args.cuda) dataset = SpatialTimeDataset(args.save_sample_path) # create model model_spatial = SpatialModel(num_input_channels=dataset[0][0].shape[1], out_num=1053, dropout_p=args.dropout_p) # model_time = Seq2seq_mlp(num_features=1053, # input_seq_len=args.input_seq_len, # pred_seq_len=args.pred_seq_len, # batch_size=args.batch_size, device=args.device) model_time = Seq2seq(num_features=1053, hidden_size=512, input_seq_len=args.input_seq_len, pred_seq_len=args.pred_seq_len, batch_size=args.batch_size) # model_time = Seq2seq_attn(num_features=data.targets_time['train'].shape[2], # input_seq_len=args.input_seq_len, # pred_seq_len=args.pred_seq_len, # batch_size=args.batch_size, # dropout=args.dropout_p) optimizer = optim.Adam([{ 'params': model_spatial.parameters() }, { 'params': model_time.parameters() }], lr=args.learning_rate, weight_decay=1e-4) # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, # milestones=[12, 25, 37], # gamma=0.1, # last_epoch = start_epoch-1) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.5, patience=1) start_epoch = args.resume train_state = args_train_state( early_stopping_criteria=args.early_stopping_criteria, learning_rate=args.learning_rate) if args.resume: resume = os.path.join(args.save_dir, 'check_point_{}'.format(args.resume)) print('Resuming model check point from {}\n'.format(resume)) check_point = torch.load(resume) start_epoch = check_point['epoch'] model_spatial.load_state_dict(check_point['model_spatial']) model_spatial.to(args.device) model_time.load_state_dict(check_point['model_time']) model_time.to(args.device) optimizer.load_state_dict(check_point['optimizer']) train_state = check_point['train_state'] scheduler.optimizer = optimizer scheduler.last_epoch = start_epoch - 1 scheduler.cooldown_counter = check_point['lr']['cooldown_counter'] scheduler.best = check_point['lr']['best'] scheduler.num_bad_epochs = check_point['lr']['num_bad_epochs'] scheduler.mode_worse = check_point['lr']['mode_worse'] scheduler.is_better = check_point['lr']['is_better'] # define train class trainer = Trainer(dataset=dataset, model_spatial=model_spatial, model_time=model_time, optimizer=optimizer, scheduler=scheduler, device=args.device, teacher_forcing_ratio=args.teacher_forcing_ratio, train_state=train_state) # train & validation print('start train29 training...') for epoch_index in range(start_epoch, args.num_epochs): epoch_start = time.time() trainer.train_state['epoch_index'] = epoch_index + 1 dataset.set_split('train') batch_generator_train = dataset.generate_batches( batch_size=args.batch_size, collate_fn=collate_fn, shuffle=args.shuffle, device=args.device) trainer.run_train_loop(batch_generator_train, args.alpha, device=args.device) epoch_end = time.time() print('\nEntire epoch train time cost: {:.2f} min'.format( (epoch_end - epoch_start) / 60)) dataset.set_split('val') batch_generator_val = dataset.generate_batches( batch_size=args.batch_size, collate_fn=collate_fn, shuffle=False, device=args.device) trainer.run_val_loop(batch_generator_val, device=args.device) # check point save_name = os.path.join( args.save_dir, 'check_point_{}'.format(trainer.train_state['epoch_index'])) check_point = { 'epoch': trainer.train_state['epoch_index'], 'model_spatial': trainer.model_spatial.state_dict(), 'model_time': trainer.model_time.state_dict(), 'optimizer': trainer.optimizer.state_dict(), 'train_state': trainer.train_state, 'lr': { 'cooldown_counter': trainer.scheduler.cooldown_counter, 'best': trainer.scheduler.cooldown_counter, 'num_bad_epochs': trainer.scheduler.num_bad_epochs, 'mode_worse': trainer.scheduler.mode_worse, 'is_better': trainer.scheduler.is_better } } torch.save(check_point, save_name) if trainer.train_state['stop_early']: break #plot loss plot_performance(trainer.train_state['train_loss'], trainer.train_state['val_loss'], args.save_dir) print('start testing...') test_exps = np.load('exp_list.npy', allow_pickle=True) scales = np.load('scales.npy', allow_pickle=True) # test tester = Tester(test_exps=test_exps, data_folder=args.data_folder, scales=scales, input_seq_len=args.input_seq_len, pred_seq_len=args.pred_seq_len, model_spatial=model_spatial, model_time=model_time, extract_num=4, save_dir=args.save_dir, save_sample_path=args.save_sample_path, device='cuda') tester.run_test_loop()