def translate():
    textloader = TextLoader(ORIGINAL_DATA_DIR,
                            SOURCE_VOCAB_SIZE,
                            TARGET_VOCAB_SIZE,
                            SOURCE_LANG,
                            TARGET_LANG,
                            _buckets,
                            batch_size=1)
    s_vocab_path = os.path.join(
        ORIGINAL_DATA_DIR, "vocab%d.%s" % (SOURCE_VOCAB_SIZE, SOURCE_LANG))
    t_vocab_path = os.path.join(
        ORIGINAL_DATA_DIR, "vocab%d.%s" % (TARGET_VOCAB_SIZE, TARGET_LANG))
    s_vocab, _ = textloader.init_vocabulary(s_vocab_path)
    _, t_id2vocab = textloader.init_vocabulary(t_vocab_path)

    # create seq2seq model
    model = Seq2SeqNMT(SOURCE_VOCAB_SIZE,
                       TARGET_VOCAB_SIZE,
                       _buckets,
                       HIDDEN_UNITS,
                       N_LAYERS,
                       batch_size=1,
                       learning_rate=LR)
    model.build_graph(train=False)

    chpt = tf.train.get_checkpoint_state(TRAIN_DIR)
    if chpt:
        print("restore model paramters from %s" % chpt.model_checkpoint_path)
        model.restore(chpt.model_checkpoint_path)
    else:
        print("init a new model.")
        model.init_sess()

    TEST_SENTENCE_PATH = os.path.join(ORIGINAL_DATA_DIR,
                                      "test.%s" % SOURCE_LANG)
    f_s = open(TEST_SENTENCE_PATH, 'r')
    step = 0

    for sentence in f_s:
        step += 1
        word_ids = textloader.sentence_to_token_id(
            tf.compat.as_bytes(sentence), s_vocab)
        # find out the buckets
        bid = len(_buckets) - 1
        for i, bucket in enumerate(_buckets):
            if bucket[0] >= len(word_ids):
                bid = i
                break

        encoder_inputs, decoder_inputs, target_weights = textloader.get_batch(
            {bid: [(word_ids, [])]}, bid)
        _, output_logits = model.predict(encoder_inputs, decoder_inputs,
                                         target_weights, bid)
        # greedy decoder
        outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits]
        if TextLoader.EOS_ID in outputs:
            outputs = outputs[:outputs.index(TextLoader.EOS_ID)]
        result = [tf.compat.as_str(t_id2vocab[output]) for output in outputs]
        print "source(%d): %s" % (step, sentence)
        print "inference(%d): %s" % (step, result)
Exemple #2
0
def train():
    textloader = TextLoader(ORIGINAL_DATA_DIR, SOURCE_VOCAB_SIZE,
                            TARGET_VOCAB_SIZE, SOURCE_LANG, TARGET_LANG,
                            _buckets, BATCH_SIZE)
    source_train, target_train, source_dev, target_dev, _, _ = textloader.prepare_data(
    )
    train_data = textloader.read_data(source_train, target_train)
    dev_data = textloader.read_data(source_dev, target_dev)

    textloader.init_train_bucket(train_data)

    # create seq2seq model
    model = Seq2SeqNMT(SOURCE_VOCAB_SIZE, TARGET_VOCAB_SIZE, _buckets,
                       HIDDEN_UNITS, N_LAYERS, BATCH_SIZE, LR)
    model.build_graph()

    chpt = tf.train.get_checkpoint_state(TRAIN_DIR)
    if chpt:
        print("restore model paramters from %s" % chpt.model_checkpoint_path)
        model.restore(chpt.model_checkpoint_path)
    else:
        print("init a new model.")
        model.init_sess()

    current_step = 0
    avg_time = 0.0
    avg_loss = 0.0
    for _ in range(ITERATION):
        bid = textloader.pick_bucket()
        encoder_inputs, decoder_inputs, target_weights = textloader.get_batch(
            train_data, bid)
        begin_time = time.time()
        gradient_norm, loss = model.train_batch(encoder_inputs, decoder_inputs,
                                                target_weights, bid)
        avg_time += (time.time() - begin_time) / STEP_PER_CKP
        avg_loss += loss / STEP_PER_CKP

        current_step += 1

        if current_step % STEP_PER_CKP == 0:
            print(
                "total step %d learning rate %.4f avg-time %.2f avg-loss: %6f"
                % (model.global_step.eval(session=model.sess),
                   model.learning_rate.eval(session=model.sess), avg_time,
                   avg_loss))

            chk_path = os.path.join(TRAIN_DIR, "nmt.ckpt." + str(current_step))
            model.save(chk_path)
            avg_time = 0.0
            avg_loss = 0.0