def translate(): textloader = TextLoader(ORIGINAL_DATA_DIR, SOURCE_VOCAB_SIZE, TARGET_VOCAB_SIZE, SOURCE_LANG, TARGET_LANG, _buckets, batch_size=1) s_vocab_path = os.path.join( ORIGINAL_DATA_DIR, "vocab%d.%s" % (SOURCE_VOCAB_SIZE, SOURCE_LANG)) t_vocab_path = os.path.join( ORIGINAL_DATA_DIR, "vocab%d.%s" % (TARGET_VOCAB_SIZE, TARGET_LANG)) s_vocab, _ = textloader.init_vocabulary(s_vocab_path) _, t_id2vocab = textloader.init_vocabulary(t_vocab_path) # create seq2seq model model = Seq2SeqNMT(SOURCE_VOCAB_SIZE, TARGET_VOCAB_SIZE, _buckets, HIDDEN_UNITS, N_LAYERS, batch_size=1, learning_rate=LR) model.build_graph(train=False) chpt = tf.train.get_checkpoint_state(TRAIN_DIR) if chpt: print("restore model paramters from %s" % chpt.model_checkpoint_path) model.restore(chpt.model_checkpoint_path) else: print("init a new model.") model.init_sess() TEST_SENTENCE_PATH = os.path.join(ORIGINAL_DATA_DIR, "test.%s" % SOURCE_LANG) f_s = open(TEST_SENTENCE_PATH, 'r') step = 0 for sentence in f_s: step += 1 word_ids = textloader.sentence_to_token_id( tf.compat.as_bytes(sentence), s_vocab) # find out the buckets bid = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] >= len(word_ids): bid = i break encoder_inputs, decoder_inputs, target_weights = textloader.get_batch( {bid: [(word_ids, [])]}, bid) _, output_logits = model.predict(encoder_inputs, decoder_inputs, target_weights, bid) # greedy decoder outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] if TextLoader.EOS_ID in outputs: outputs = outputs[:outputs.index(TextLoader.EOS_ID)] result = [tf.compat.as_str(t_id2vocab[output]) for output in outputs] print "source(%d): %s" % (step, sentence) print "inference(%d): %s" % (step, result)
def train(): textloader = TextLoader(ORIGINAL_DATA_DIR, SOURCE_VOCAB_SIZE, TARGET_VOCAB_SIZE, SOURCE_LANG, TARGET_LANG, _buckets, BATCH_SIZE) source_train, target_train, source_dev, target_dev, _, _ = textloader.prepare_data( ) train_data = textloader.read_data(source_train, target_train) dev_data = textloader.read_data(source_dev, target_dev) textloader.init_train_bucket(train_data) # create seq2seq model model = Seq2SeqNMT(SOURCE_VOCAB_SIZE, TARGET_VOCAB_SIZE, _buckets, HIDDEN_UNITS, N_LAYERS, BATCH_SIZE, LR) model.build_graph() chpt = tf.train.get_checkpoint_state(TRAIN_DIR) if chpt: print("restore model paramters from %s" % chpt.model_checkpoint_path) model.restore(chpt.model_checkpoint_path) else: print("init a new model.") model.init_sess() current_step = 0 avg_time = 0.0 avg_loss = 0.0 for _ in range(ITERATION): bid = textloader.pick_bucket() encoder_inputs, decoder_inputs, target_weights = textloader.get_batch( train_data, bid) begin_time = time.time() gradient_norm, loss = model.train_batch(encoder_inputs, decoder_inputs, target_weights, bid) avg_time += (time.time() - begin_time) / STEP_PER_CKP avg_loss += loss / STEP_PER_CKP current_step += 1 if current_step % STEP_PER_CKP == 0: print( "total step %d learning rate %.4f avg-time %.2f avg-loss: %6f" % (model.global_step.eval(session=model.sess), model.learning_rate.eval(session=model.sess), avg_time, avg_loss)) chk_path = os.path.join(TRAIN_DIR, "nmt.ckpt." + str(current_step)) model.save(chk_path) avg_time = 0.0 avg_loss = 0.0