def main(argv): argparser = build_argparser() args = argparser.parse_args(argv) with open(args.data, encoding=ENCODING) as i_: text = i_.read() text_encoder = utils.TextEncoder() text_encoder.build_vocab([text], VOCAB_SIZE) gen = build_batch_generator(text_encoder.encode(text), BATCH_SIZE, MAX_LEN) inputs = tf.placeholder(tf.int32, (None, None), name='inputs') targets = tf.placeholder(tf.int32, (None, None), name='targets') rnn_cell = tf.contrib.rnn.MultiRNNCell( [tf.contrib.rnn.GRUCell(HIDDEN_SIZE) for _ in range(N_LAYERS)]) initial_state = rnn_cell.zero_state(BATCH_SIZE, dtype=tf.float32) train_outputs, inference_outputs = rnn_model(inputs, MAX_LEN, rnn_cell, initial_state, EMB_SIZE, VOCAB_SIZE) with tf.variable_scope('loss'): masks = tf.ones_like(targets, dtype=tf.float32) loss = tf.contrib.seq2seq.sequence_loss(train_outputs.rnn_output, targets, masks) with tf.variable_scope('opt'): optimizer = tf.train.AdamOptimizer() train_op = optimizer.minimize(loss) checkpoint_path = os.path.join(MODEL_DIR, 'model.ckpt') saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sum_loss = 0 for step in range(1, STEPS + 1): inputs_, outputs_ = next(gen) loss_, _ = sess.run([loss, train_op], feed_dict={ inputs: inputs_, targets: outputs_ }) sum_loss += loss_ if step % 100 == 0: saver.save(sess, checkpoint_path, global_step=step) print(step, sum_loss / 100) sum_loss = 0 # 直前の input_ に対する推定結果を例示 inferences = sess.run(inference_outputs.sample_id, feed_dict={inputs: inputs_[:, :1]}) for _ in range(3): print('---- SEED ----') print(text_encoder.decode(inputs_[_, :1])) print('---- OUTPUT ----') print(text_encoder.decode(inferences[_]))
def load_data(dataset, opt, vocab, vocabulary_path): if dataset == "atomic": data_loader = load_atomic_data(opt, vocab) elif dataset == "conceptnet": data_loader = load_conceptnet_data(opt, vocab) # Initialize TextEncoder encoder_path = vocabulary_path + "encoder_bpe_40000.json" bpe_path = vocabulary_path + "vocab_40000.bpe" text_encoder = utils.TextEncoder(encoder_path, bpe_path) text_encoder.encoder = data_loader.vocab_encoder text_encoder.decoder = data_loader.vocab_decoder return data_loader, text_encoder
def main(argv): argparser = build_argparser() args = argparser.parse_args(argv) train_file = os.path.join(args.data_dir, TRAIN_FILE_NAME) valid_file = os.path.join(args.data_dir, VALID_FILE_NAME) test_file = os.path.join(args.data_dir, TEST_FILE_NAME) train_df = pd.read_csv(train_file) valid_df = pd.read_csv(valid_file) test_df = pd.read_csv(test_file) label_names = sorted(train_df.label.unique()) print('Labels: {}'.format(label_names)) text_encoder = utils.TextEncoder() text_encoder.build_vocab(train_df.text, VOCAB_SIZE) train_x, train_y = build_datamart(train_df, label_names, text_encoder, MAX_LEN) valid_x, valid_y = build_datamart(valid_df, label_names, text_encoder, MAX_LEN) test_x, test_y = build_datamart(test_df, label_names, text_encoder, MAX_LEN) train_gen = build_generator(train_x, train_y, BATCH_SIZE) inputs = tf.placeholder(tf.int32, (None, MAX_LEN), name='inputs') labels = tf.placeholder(tf.int32, (None, ), name='labels') rnn_cell = tf.contrib.rnn.GRUCell(HIDDEN_SIZE) pred, softmax, loss, acc, train_op = rnn_model(inputs, labels, rnn_cell, len(label_names), EMB_SIZE, text_encoder.vocab_size, HIDDEN_SIZE) checkpoint_path = os.path.join(MODEL_DIR, 'model.ckpt') saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) loss_sum_ = 0 for step in range(1, STEPS + 1): # 学習用バッチを取得 batch_x, batch_y = next(train_gen) loss_, _ = sess.run([loss, train_op], feed_dict={ inputs: batch_x, labels: batch_y }) loss_sum_ += loss_ # 100ステップごとに検証用データで精度を確認 if step % 100 == 0: pred_, valid_loss_, acc_ = sess.run([pred, loss, acc], feed_dict={ inputs: valid_x, labels: valid_y }) # 学習用データについてのロスは過去100ステップの平均をとる avg_loss_ = loss_sum_ / 100 loss_sum_ = 0 print( 'step: {}, train loss: {}, valid loss: {}, acc: {}'.format( step, avg_loss_, valid_loss_, acc_)) saver.save(sess, checkpoint_path, global_step=step) # Test ckpt = tf.train.get_checkpoint_state(MODEL_DIR) with tf.Session() as sess: saver.restore(sess=sess, save_path=ckpt.model_checkpoint_path) pred_, loss_, acc_ = sess.run([pred, loss, acc], feed_dict={ inputs: test_x, labels: test_y }) print('loss: {}, acc: {}'.format(loss_, acc_))
def main(argv): argparser = build_argparser() args = argparser.parse_args(argv) train_df = pd.read_csv(args.train_data) input_text_encoder = utils.TextEncoder() input_text_encoder.build_vocab(train_df.input, INPUT_VOCAB_SIZE) output_text_encoder = utils.TextEncoder() output_text_encoder.build_vocab(train_df.output, OUTPUT_VOCAB_SIZE) train_gen = build_batch_generator( train_df.input.map(input_text_encoder.encode), train_df.output.map(output_text_encoder.encode), INPUT_MAX_LEN, OUTPUT_MAX_LEN, BATCH_SIZE) if args.test_data: test_df = pd.read_csv(args.test_data) test_inputs_, test_len_ = build_test_data( test_df.input.map(input_text_encoder.encode), INPUT_MAX_LEN, ) encoder_cell = tf.contrib.rnn.MultiRNNCell( [tf.contrib.rnn.GRUCell(HIDDEN_SIZE) for _ in range(N_LAYERS)]) decoder_cell = tf.contrib.rnn.MultiRNNCell( [tf.contrib.rnn.GRUCell(HIDDEN_SIZE) for _ in range(N_LAYERS)]) encoder_inputs = tf.placeholder(tf.int32, (None, None), name='encoder_inputs') encoder_len = tf.placeholder(tf.int32, (None, ), name='encoder_len') decoder_inputs = tf.placeholder(tf.int32, (None, None), name='decoder_inputs') decoder_len = tf.placeholder(tf.int32, (None, ), name='decoder_len') targets = tf.placeholder(tf.int32, (None, None), name='targets') train_outputs, inference_outputs = seq2seq(encoder_inputs, encoder_len, encoder_cell, decoder_inputs, decoder_len, OUTPUT_MAX_LEN, decoder_cell) with tf.variable_scope('loss'): masks = tf.sequence_mask(decoder_len, OUTPUT_MAX_LEN, dtype=tf.float32, name='masks') loss = tf.contrib.seq2seq.sequence_loss( logits=train_outputs.rnn_output, targets=targets, weights=masks) tf.summary.scalar('train_loss', loss) with tf.variable_scope('opt'): optimizer = tf.train.AdamOptimizer() train_op = optimizer.minimize(loss) checkpoint_path = os.path.join(MODEL_DIR, 'model.ckpt') saver = tf.train.Saver() if not args.no_train: with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(MODEL_DIR) if ckpt: print('Use checkpoint file: ' + ckpt.model_checkpoint_path) saver.restore(sess=sess, save_path=ckpt.model_checkpoint_path) sum_loss = 0 for step in range(1, STEPS + 1): encoder_inputs_, encoder_len_, decoder_inputs_, decoder_outputs_, decoder_len_ = next( train_gen) loss_, _ = sess.run( [loss, train_op], feed_dict={ encoder_inputs: encoder_inputs_, encoder_len: encoder_len_, decoder_inputs: decoder_inputs_, decoder_len: decoder_len_, targets: decoder_outputs_ }) sum_loss += loss_ if step % 100 == 0: saver.save(sess, checkpoint_path, global_step=step) print(step, sum_loss / 100) sum_loss = 0 # 直前の input_ に対する推定結果を例示 train_inferences = sess.run( inference_outputs.sample_id, feed_dict={ encoder_inputs: encoder_inputs_, encoder_len: encoder_len_, decoder_inputs: [[ utils.TextEncoder.RESERVED_TOKENS.index( '<BOS>') ]] * len(encoder_inputs_) }) for _ in range(3): print('---- INPUT (TRAIN) ----') print(input_text_encoder.decode(encoder_inputs_[_])) print('---- OUTPUT ----') print(output_text_encoder.decode(train_inferences[_])) # テストデータについてもはじめの3件だけ結果を見る if args.test_data: test_inferences = sess.run( inference_outputs.sample_id, feed_dict={ encoder_inputs: test_inputs_[:3], encoder_len: test_len_[:3], decoder_inputs: [[ utils.TextEncoder.RESERVED_TOKENS.index( '<BOS>') ]] * len(test_inputs_[:3]) }) for _ in range(len(test_inferences)): print('---- INPUT (TEST) ----') print(input_text_encoder.decode(test_inputs_[_])) print('---- OUTPUT ----') print( output_text_encoder.decode(test_inferences[_])) if args.test_data: ckpt = tf.train.get_checkpoint_state(MODEL_DIR) print('Use checkpoint file: ' + ckpt.model_checkpoint_path) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess=sess, save_path=ckpt.model_checkpoint_path) inferences = sess.run( inference_outputs.sample_id, feed_dict={ encoder_inputs: test_inputs_, encoder_len: test_len_, decoder_inputs: [[utils.TextEncoder.RESERVED_TOKENS.index('<BOS>')]] * len(test_inputs_) }) for _ in range(len(test_inputs_)): print('---- INPUT ----') print(input_text_encoder.decode(test_inputs_[_])) print('---- OUTPUT ----') print(output_text_encoder.decode(inferences[_]))