コード例 #1
0
ファイル: model.py プロジェクト: archmaester/RCGAN
def train_epoch(epoch, samples, labels, sess, Z, X, CG, CD, CS,accuracy, D_loss, G_loss, D_solver, G_solver, 
                batch_size, use_time, D_rounds, G_rounds, seq_length, 
                latent_dim, num_generated_features, cond_dim, max_val, WGAN_clip, one_hot):
    """
    Train generator and discriminator for one epoch.
    """
    for batch_idx in range(0, int(len(samples) / batch_size) - (D_rounds + (cond_dim > 0)*G_rounds), D_rounds + (cond_dim > 0)*G_rounds):
        # update the discriminator
        for d in range(D_rounds):
            
            X_mb, Y_mb = data_utils.get_batch(samples, batch_size, batch_idx + d, labels)
            Z_mb = sample_Z(batch_size, seq_length, latent_dim, use_time)
            if cond_dim > 0:
                # CGAN
                Y_mb = Y_mb.reshape(-1, cond_dim)
#                 if one_hot:
#                     # change all of the labels to a different one
#                     offsets = np.random.choice(cond_dim-1, batch_size) + 1
#                     new_labels = (np.argmax(Y_mb, axis=1) + offsets) % cond_dim
#                     Y_wrong = np.zeros_like(Y_mb)
#                     Y_wrong[np.arange(batch_size), new_labels] = 1
#                 else:
#                     # flip all of the bits (assuming binary...)
# #                     Y_wrong = 1 - Y_mb
                
                _ = sess.run(D_solver, feed_dict={X: X_mb, Z: Z_mb, CD: Y_mb, CG: Y_mb})
            
            else:
                _ = sess.run(D_solver, feed_dict={X: X_mb, Z: Z_mb})
            if WGAN_clip:
                # clip the weights
                _ = sess.run([clip_disc_weights])
        # update the generator
        for g in range(G_rounds):
            if cond_dim > 0:
                # note we are essentially throwing these X_mb away...
                X_mb, Y_mb = data_utils.get_batch(samples, batch_size, batch_idx + D_rounds + g, labels)
                Y_mb = Y_mb.reshape(-1,1) 
                _ = sess.run(G_solver,
                        feed_dict={Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time), CG: Y_mb})
            else:
                _ = sess.run(G_solver,
                        feed_dict={Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time)})
    # at the end, get the loss
    if cond_dim > 0:
        D_loss_curr, G_loss_curr, acc = sess.run([D_loss, G_loss, accuracy], feed_dict={X: X_mb, Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time), CG: Y_mb, CD: Y_mb})
        print("monish",acc)
        D_loss_curr = np.mean(D_loss_curr)
        G_loss_curr = np.mean(G_loss_curr)
    else:
        D_loss_curr, G_loss_curr = sess.run([D_loss, G_loss], feed_dict={X: X_mb, Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time)})
        D_loss_curr = np.mean(D_loss_curr)
        G_loss_curr = np.mean(G_loss_curr)
    return D_loss_curr, G_loss_curr
コード例 #2
0
    def step(self, dataset, training=False):
        '''
        one epoch
        '''
        if (training):
            next_batch = data_utils.get_batch(dataset, self.batch_size,
                                              self.input_dim, self.output_dim)
            self.cur_batch_size = self.batch_size
        else:
            self.cur_batch_size = len(dataset)
            next_batch = data_utils.get_batch(dataset,
                                              self.cur_batch_size,
                                              self.input_dim,
                                              self.output_dim,
                                              shuffle=False)

        loss = 0
        while True:
            try:
                input_data, output_data, seq_length, seq_length2 = self.sess.run(
                    next_batch)
                if (input_data.shape[0] != self.cur_batch_size):
                    continue
                if (training):
                    pre_loss, _ = self.sess.run(
                        [self.loss, self.train_op],
                        feed_dict={
                            self.train_input: input_data,
                            self.train_output: output_data,
                            self.seq_length: seq_length
                        })
                    loss = self.sess.run(self.loss,
                                         feed_dict={
                                             self.train_input: input_data,
                                             self.train_output: output_data,
                                             self.seq_length: seq_length
                                         })
                    print('bach: %d preloss %.4f loss: %.4f' %
                          (input_data.shape[0], pre_loss, loss))
                else:
                    loss = self.sess.run(self.loss,
                                         feed_dict={
                                             self.train_input: input_data,
                                             self.train_output: output_data,
                                             self.seq_length: seq_length
                                         })
            except tf.errors.OutOfRangeError:
                #if (not training):
                #    loss = loss / ind
                break

        return loss
コード例 #3
0
ファイル: app.py プロジェクト: eleleung/chatbot
def get_response(message):
    line = str.encode(message)
    if len(line) > 0 and line[-1] == '\n':
        line = line[:-1]
    if line == '':
        response = 'What did you say?'
        output_file.write('Human: ' + message + '\n' + 'Bot: ' +
                          str(response) + '\n')
        return message, response

    token_ids = data_utils.sentence2id(enc_vocab, line)
    if len(token_ids) > max_length:
        response = ('The maximum length I can handle is ', max_length)
        output_file.write('Human: ' + message + '\n' + 'Bot: ' +
                          str(response) + '\n')
        return message, response

    bucket_id = chatbot.find_right_bucket(len(token_ids))
    encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch(
        [(token_ids, [])], bucket_id, batch_size=1)
    _, _, output_logits = chatbot.run_step(sess, model, encoder_inputs,
                                           decoder_inputs, decoder_masks,
                                           bucket_id, True)
    response = chatbot.construct_response(output_logits, inv_dec_vocab)
    output_file.write('Human: ' + message + '\n' + 'Bot: ' + str(response) +
                      '\n')

    return message, response
コード例 #4
0
def train(train_set, test_set, vocabulary):
    with tf.Session() as session:
        model = create_model(session, False)

        loss = 0.0
        current_step = 0
        previous_losses = []
        while True:
            current_step += 1
            encoder_inputs, decoder_inputs, target_weights = data_utils.get_batch(
                train_set, BATCH_SIZE, MODEL_LENGTH)

            _, step_loss, _ = model.step(session, encoder_inputs,
                                         decoder_inputs, target_weights, False)
            loss += step_loss / STEPS_PER_CHECKPOINT

            if current_step % STEPS_PER_CHECKPOINT == 0:
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                print "global step %d learning rate %.4f perpexity %.2f" % (
                    current_step, model.learning_rate.eval(), perplexity)
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    session.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                loss = 0.0

                checkpoint_path = os.path.join(DATA_PATH,
                                               "spell_correction.ckpt")
                model.saver.save(session,
                                 checkpoint_path,
                                 global_step=model.global_step)

                test(session, model, vocabulary, test_set)
コード例 #5
0
ファイル: model.py プロジェクト: yonghelu/GANs_AD
def train_epoch(epoch, samples, labels, sess, Z, X, D_loss, G_loss, D_solver, G_solver,
                batch_size, use_time, D_rounds, G_rounds, seq_length,
                latent_dim, num_signals):
    """
    Train generator and discriminator for one epoch.
    """
    # for batch_idx in range(0, int(len(samples) / batch_size) - (D_rounds + (cond_dim > 0) * G_rounds), D_rounds + (cond_dim > 0) * G_rounds):
    for batch_idx in range(0, int(len(samples) / batch_size) - (D_rounds + G_rounds), D_rounds + G_rounds):
        # update the discriminator
        X_mb, Y_mb = data_utils.get_batch(samples, batch_size, batch_idx, labels)
        Z_mb = sample_Z(batch_size, seq_length, latent_dim, use_time)
        for d in range(D_rounds):
            # run the discriminator solver
            _ = sess.run(D_solver, feed_dict={X: X_mb, Z: Z_mb})

        # update the generator
        for g in range(G_rounds):
            # run the generator solver
            _ = sess.run(G_solver, feed_dict={Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time)})

    # at the end, get the loss
    D_loss_curr, G_loss_curr = sess.run([D_loss, G_loss], feed_dict={X: X_mb,
                                                                     Z: sample_Z(batch_size, seq_length, latent_dim,
                                                                                 use_time=use_time)})
    D_loss_curr = np.mean(D_loss_curr)
    G_loss_curr = np.mean(G_loss_curr)


    return D_loss_curr, G_loss_curr
コード例 #6
0
def test(sess,
         dataset,
         out_dir,
         input_dim,
         output_dim,
         apply_cmvn=False,
         param_cmvn=None):
    next_batch = data_utils.get_batch(dataset,
                                      1,
                                      input_dim,
                                      output_dim,
                                      shuffle=False)
    ind = 0
    while True:
        try:
            input, output, seq_length = sess.run(next_batch)
            output = numpy.reshape(output, (-1, output_dim))
            output = output[:seq_length[0]]
            print(seq_length[0])
            if (apply_cmvn):
                output = output * param_cmvn[1] + param_cmvn[0]

            filename = os.path.basename(dataset[ind]).split('.')[0] + '.cmp'
            numpy.savetxt(out_dir + '/' + filename, output, fmt='%f')
            print('write one %s' % filename)
            ind += 1
        except tf.errors.OutOfRangeError:
            break
コード例 #7
0
def train(epoch):
    model.train() # Turn on the train mode
    total_loss = 0.
    start_time = time.time()
    for batch, i in enumerate(range(0, train_data.size(0)-1, bptt)):

        inputs, targets = get_batch(train_data, i, bptt)
        inputs = inputs.to(device)
        targets = targets.to(device)
        model.zero_grad()

        optimizer.zero_grad()
        output = model(inputs)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()

        total_loss += loss.item()
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt,  scheduler.get_lr()[0],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
コード例 #8
0
ファイル: neural_gpu_trainer.py プロジェクト: ALISCIFP/models
def single_test(bin_id, model, sess, nprint, batch_size, dev, p, print_out=True,
                offset=None, beam_model=None):
  """Test model on test data of length l using the given session."""
  if not dev[p][bin_id]:
    data.print_out("  bin %d (%d)\t%s\tppl NA errors NA seq-errors NA"
                   % (bin_id, data.bins[bin_id], p))
    return 1.0, 1.0, 0.0
  inpt, target = data.get_batch(
      bin_id, batch_size, dev[p], FLAGS.height, offset)
  if FLAGS.beam_size > 1 and beam_model:
    loss, res, new_tgt, scores = m_step(
        model, beam_model, sess, batch_size, inpt, target, bin_id,
        FLAGS.eval_beam_steps, p)
    score_avgs = [sum(s) / float(len(s)) for s in scores]
    score_maxs = [max(s) for s in scores]
    score_str = ["(%.2f, %.2f)" % (score_avgs[i], score_maxs[i])
                 for i in xrange(FLAGS.eval_beam_steps)]
    data.print_out("  == scores (avg, max): %s" % "; ".join(score_str))
    errors, total, seq_err = data.accuracy(inpt, res, target, batch_size,
                                           nprint, new_tgt, scores[-1])
  else:
    loss, res, _, _ = model.step(sess, inpt, target, False)
    errors, total, seq_err = data.accuracy(inpt, res, target, batch_size,
                                           nprint)
  seq_err = float(seq_err) / batch_size
  if total > 0:
    errors = float(errors) / total
  if print_out:
    data.print_out("  bin %d (%d)\t%s\tppl %.2f errors %.2f seq-errors %.2f"
                   % (bin_id, data.bins[bin_id], p, data.safe_exp(loss),
                      100 * errors, 100 * seq_err))
  return (errors, seq_err, loss)
コード例 #9
0
def test(sess, model, vocabulary, test_set):
    rev_vocabulary = {v: k for k, v in vocabulary.items()}
    rev_vocabulary[data_utils.PAD_ID] = ''
    rev_vocabulary[data_utils.GO_ID] = ''
    rev_vocabulary[data_utils.UNK_ID] = ''

    encoder_inputs, decoder_inputs, target_weights = data_utils.get_batch(
        test_set, BATCH_SIZE, MODEL_LENGTH)
    _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs,
                                     target_weights, True)

    output_matrix = np.empty((len(output_logits), BATCH_SIZE))
    for lenIdx in xrange(len(output_logits)):
        output_matrix[lenIdx] = np.array(
            [int(np.argmax(logit)) for logit in output_logits[lenIdx]])

    typos = visualize(rev_vocabulary, encoder_inputs, should_reverse=True)
    rewrites = visualize(rev_vocabulary, decoder_inputs)
    guesses = visualize(rev_vocabulary, output_matrix)
    total = 0
    correct = 0
    for i in xrange(len(typos)):
        total += 1
        correct += 1 if rewrites[i] == guesses[i] else 0
        print typos[i], ' - ', rewrites[i], ' - ', guesses[i]
    print 'total: ', total, ' corrected: ', correct, ' acc: ', correct / (
        total + 0.0)
コード例 #10
0
def train():
    """
    Train the bot.
    """
    # test_buckets, data_buckets: <type "list">:
    #     [[[[Context], [Response]], ], ]]
    #     test_buckets[0]: first bucket
    #     test_buckets[0][0]: first pair of the first bucket
    #     test_buckets[0][0][0], test_buckets[0][0][1]: Context and response
    #     test_buckets[0][0][0][0]: word index of the first words
    # train_buckets_scale: list of increasing numbers from 0 to 1 that
    #     we"ll use to select a bucket. len(train_buckets_scale) = len(BUCKETS)
    test_buckets, data_buckets, train_buckets_scale = _get_buckets()

    # in train mode, we need to create the backward path, so forward_only is False
    model = ChatBotModel(False, config.BATCH_SIZE)
    # build graph
    model.build_graph()
    saver = tf.train.Saver()

    with tf.Session() as sess:
        print("Running session...")
        sess.run(tf.global_variables_initializer())
        check_restore_parameters(sess, saver)

        iteration = model.global_step.eval()
        total_loss = 0
        logging.info("Training...")
        try:
            while True:
                skip_step = _get_skip_step(iteration)
                bucket_id = _get_random_bucket(train_buckets_scale)
                encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch(
                    data_buckets[bucket_id],
                    bucket_id,
                    batch_size=config.BATCH_SIZE)
                start = time.time()
                _, step_loss, _ = run_step(sess, model, encoder_inputs,
                                           decoder_inputs, decoder_masks,
                                           bucket_id, False)
                total_loss += step_loss
                iteration += 1

                if iteration % skip_step == 0:
                    logging.info(
                        "Training @ iter {:d}: loss {:.4f}, time {:.4f}".
                        format(iteration, total_loss / skip_step,
                               time.time() - start))
                    total_loss = 0
                    saver.save(sess,
                               os.path.join(config.CPT_PATH, "chatbot"),
                               global_step=model.global_step)
                    if iteration % (10 * skip_step) == 0:
                        logging.info("Testing...")
                        # Run evals on development set and print their loss
                        _eval_test_set(sess, model, test_buckets)
                    sys.stdout.flush()
        except KeyboardInterrupt:
            logging.info("Training interrupted.")
コード例 #11
0
def chat(question):
    """
    In test mode, we don"t to create the backward path.
    """
    _, enc_vocab = data_utils.load_vocab(
        os.path.join(config.DATA_PATH, "vocab.enc"))
    # `inv_dec_vocab` <type "list">: id2word.
    inv_dec_vocab, _ = data_utils.load_vocab(
        os.path.join(config.DATA_PATH, "vocab.dec"))

    model = ChatBotModel(True, batch_size=1)
    model.build_graph()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        check_restore_parameters(sess, saver)
        output_file = open(os.path.join(config.DATA_PATH,
                                        config.TERMINAL_OUTPUT),
                           "a+",
                           encoding="utf-8")
        # Decode from standard input.
        max_length = config.BUCKETS[-1][0]
        print(
            "Welcome to TensorBro. Say something. Enter to exit. Max length is",
            max_length)

        line = question
        if hasattr(line, "decode"):
            # If using Python 2
            # FIXME: UnicodeError when deleting Chinese in terminal.
            line = line.decode("utf-8")
        if len(line) > 0 and line[-1] == "\n":
            line = line[:-1]
        if not line:
            pass
        output_file.write("HUMAN ++++ " + line + "\n")
        # Get token-ids for the input sentence.
        token_ids = data_utils.sentence2id(enc_vocab, line)
        if len(token_ids) > max_length:
            print("Max length I can handle is:", max_length)
            # line = _get_user_input()
            pass
        # Which bucket does it belong to?
        bucket_id = find_right_bucket(len(token_ids))
        # Get a 1-element batch to feed the sentence to the model.
        encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch(
            [(token_ids, [])], bucket_id, batch_size=1)
        # Get output logits for the sentence.
        _, _, output_logits = run_step(sess, model, encoder_inputs,
                                       decoder_inputs, decoder_masks,
                                       bucket_id, True)
        response = construct_response(output_logits, inv_dec_vocab)
        print(response)
        output_file.write("BOT ++++ " + response + "\n")

        output_file.write("=============================================\n")
        output_file.close()
コード例 #12
0
def train():
    """ Train the bot """
    test_buckets, data_buckets, train_buckets_scale = get_buckets()
    model = ChatBotModel(False, config.BATCH_SIZE)
    model.build_graph()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        print('Running session')
        sess.run(tf.global_variables_initializer())
        check_restore_parameters(sess, saver)

        iteration = model.global_step.eval()
        total_loss = 0

        file_writer = tf.summary.FileWriter(
            os.path.join(config.LOG_PATH, 'tensorboard'), sess.graph)
        training_loss_summary = tf.Summary()
        testing_loss_summary = tf.Summary()
        while True:
            skip_step = get_skip_step(iteration)
            bucket_id = get_random_bucket(train_buckets_scale)
            encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch(
                data_buckets[bucket_id],
                bucket_id,
                batch_size=config.BATCH_SIZE)
            start = time.time()
            _, step_loss, _ = run_step(sess, model, encoder_inputs,
                                       decoder_inputs, decoder_masks,
                                       bucket_id, False)
            total_loss += step_loss
            iteration += 1

            if iteration % skip_step == 0:
                print('Iter {}: loss {}, time {}'.format(
                    iteration, total_loss / skip_step,
                    time.time() - start))

                bucket_value = training_loss_summary.value.add()
                bucket_value.tag = "training_loss_bucket_%d" % bucket_id
                bucket_value.simple_value = step_loss
                file_writer.add_summary(training_loss_summary,
                                        model.global_step.eval())

                start = time.time()
                total_loss = 0
                saver.save(sess,
                           os.path.join(config.CPT_PATH, 'chatbot'),
                           global_step=model.global_step)

                if iteration % (10 * skip_step) == 0:
                    # Run evals on development set and print their loss
                    eval_test_set(sess, model, test_buckets,
                                  testing_loss_summary, file_writer)
                    start = time.time()
                sys.stdout.flush()
コード例 #13
0
    def __gen_training_data(for_training):
        x = []
        y = []

        for index, seq_len in enumerate(cnf.bins):
            data, labels = data_gen.get_batch(seq_len, cnf.batch_size,
                                              for_training, cnf.task)
            x += [data]
            y += [labels]

        return x, y
コード例 #14
0
def single_test(l, model, sess, task, nprint, batch_size, print_out=True,
                offset=None):
  """Test model on test data of length l using the given session."""
  inpt, target = data.get_batch(l, batch_size, False, task, offset)
  _, res, _, steps = model.step(sess, inpt, target, False)
  errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, nprint)
  seq_err = float(seq_err) / batch_size
  if total > 0:
    errors = float(errors) / total
  if print_out:
    data.print_out("  %s len %d errors %.2f sequence-errors %.2f"
                   % (task, l, 100*errors, 100*seq_err))
  return errors, seq_err, (steps, inpt, [np.argmax(o, axis=1) for o in res])
コード例 #15
0
def evaluate(eval_model, data_source):
    eval_model.eval() # Turn on the evaluation mode
    total_loss = 0.
    # ntokens = len(TEXT.vocab.stoi)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, bptt):
            inputs, targets = get_batch(data_source, i, bptt)
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = eval_model(inputs)
            output_flat = output.view(-1, ntokens)
            total_loss += len(inputs) * criterion(output_flat, targets).item()
    return total_loss / (len(data_source) - 1)
コード例 #16
0
def single_test(l,
                model,
                sess,
                task,
                nprint,
                batch_size,
                print_out=True,
                offset=None,
                ensemble=None,
                get_steps=False):
    """Test model on test data of length l using the given session."""
    inpt, target = data.get_batch(l, batch_size, False, task, offset)
    _, res, _, steps = model.step(sess,
                                  inpt,
                                  target,
                                  False,
                                  get_steps=get_steps)
    errors, total, seq_err = data.accuracy(inpt, res, target, batch_size,
                                           nprint)
    seq_err = float(seq_err) / batch_size
    if total > 0:
        errors = float(errors) / total
    if print_out:
        data.print_out("  %s len %d errors %.2f sequence-errors %.2f" %
                       (task, l, 100 * errors, 100 * seq_err))
    # Ensemble eval.
    if ensemble:
        results = []
        for m in ensemble:
            model.saver.restore(sess, m)
            _, result, _, _ = model.step(sess, inpt, target, False)
            m_errors, m_total, m_seq_err = data.accuracy(
                inpt, result, target, batch_size, nprint)
            m_seq_err = float(m_seq_err) / batch_size
            if total > 0:
                m_errors = float(m_errors) / m_total
            data.print_out(
                "     %s len %d m-errors %.2f m-sequence-errors %.2f" %
                (task, l, 100 * m_errors, 100 * m_seq_err))
            results.append(result)
        ens = [sum(o) for o in zip(*results)]
        errors, total, seq_err = data.accuracy(inpt, ens, target, batch_size,
                                               nprint)
        seq_err = float(seq_err) / batch_size
        if total > 0:
            errors = float(errors) / total
        if print_out:
            data.print_out(
                "  %s len %d ens-errors %.2f ens-sequence-errors %.2f" %
                (task, l, 100 * errors, 100 * seq_err))
    return errors, seq_err, (steps, inpt, [np.argmax(o, axis=1) for o in res])
コード例 #17
0
ファイル: api3.py プロジェクト: zoey-wangzw/medical-chatbot
def _eval_test_set(sess, model, test_buckets):
    for bucket_id in range(len(config.BUCKETS)):
        if len(test_buckets[bucket_id]) == 0:
            print("  Test: empty bucket {:d}".format(bucket_id))
            continue
        start = time.time()
        encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch(
            test_buckets[bucket_id],
            bucket_id,
            batch_size=config.BATCH_SIZE)
        _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs,
                                   decoder_masks, bucket_id, True)
        logging.info("Test bucket {:d}: loss {:.4f}, time {:.4f}".format(
            bucket_id, step_loss, time.time() - start))
コード例 #18
0
def chat():
    """ in test mode, we don't to create the backward path """
    _, enc_vocab = data_utils.load_vocab(
        os.path.join(config.PROCESSED_PATH, 'vocab.enc'))
    inv_dec_vocab, _ = data_utils.load_vocab(
        os.path.join(config.PROCESSED_PATH, 'vocab.dec'))

    model = ChatBotModel(True, batch_size=1)
    model.build_graph()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        check_restore_parameters(sess, saver)
        output_file = open(
            '/Users/EleanorLeung/Documents/CITS4404/chatbot/output_convo.txt',
            'a+')
        # Decode from standard input.
        max_length = config.BUCKETS[-1][0]
        print('Talk to me! Enter to exit. Max length is', max_length)
        while True:
            line = str.encode(get_user_input())
            if len(line) > 0 and line[-1] == '\n':
                line = line[:-1]
            if line == '':
                break
            output_file.write('HUMAN: ' + str(line) + '\n')
            token_ids = data_utils.sentence2id(enc_vocab, line)
            if len(token_ids) > max_length:
                print('Max length I can handle is:', max_length)
                line = get_user_input()
                continue
            bucket_id = find_right_bucket(len(token_ids))
            # Get a 1-element batch to feed the sentence to the model.
            encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch(
                [(token_ids, [])], bucket_id, batch_size=1)
            # Get output logits for the sentence.
            _, _, output_logits = run_step(sess, model, encoder_inputs,
                                           decoder_inputs, decoder_masks,
                                           bucket_id, True)
            response = construct_response(output_logits, inv_dec_vocab)
            print(response)
            output_file.write('BOT: ' + response + '\n')
        output_file.write('=============================================\n')
        output_file.close()
コード例 #19
0
ファイル: neural_gpu_trainer.py プロジェクト: 1206lyp/models
def interactive():
  """Interactively probe an existing model."""
  with tf.Session() as sess:
    model, _, _, _, _, _ = initialize(sess)
    sys.stdout.write("Input to Neural GPU, e.g., 0 1. Use -1 for PAD.\n")
    sys.stdout.write("> ")
    sys.stdout.flush()
    inpt = sys.stdin.readline()
    while inpt:
      ids = [data.to_id(s) for s in inpt.strip().split()]
      inpt, target = data.get_batch(len(ids), 1, False, "",
                                    preset=(ids, [0 for _ in ids]))
      _, res, _, _ = model.step(sess, inpt, target, False)
      res = [np.argmax(o, axis=1) for o in res]
      res = [o for o in res[:len(ids)] if o > 0]
      print "  " + " ".join([data.to_symbol(output[0]) for output in res])
      sys.stdout.write("> ")
      sys.stdout.flush()
      inpt = sys.stdin.readline()
コード例 #20
0
def interactive():
  """Interactively probe an existing model."""
  with tf.Session() as sess:
    model, _, _, _, _, _ = initialize(sess)
    sys.stdout.write("Input to Neural GPU, e.g., 0 1. Use -1 for PAD.\n")
    sys.stdout.write("> ")
    sys.stdout.flush()
    inpt = sys.stdin.readline()
    while inpt:
      ids = [data.to_id(s) for s in inpt.strip().split()]
      inpt, target = data.get_batch(len(ids), 1, False, "",
                                    preset=(ids, [0 for _ in ids]))
      _, res, _, _ = model.step(sess, inpt, target, False)
      res = [np.argmax(o, axis=1) for o in res]
      res = [o for o in res[:len(ids)] if o > 0]
      print("  " + " ".join([data.to_symbol(output[0]) for output in res]))
      sys.stdout.write("> ")
      sys.stdout.flush()
      inpt = sys.stdin.readline()
コード例 #21
0
def evaluate_CNN(X_test, Y_test, X_raw):
  
  graph = tf.Graph()
  with graph.as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=ALLOW_SOFT_PLACEMENT,
        log_device_placement=LOG_DEVICE_PLACEMENT
        )
    sess = tf.Session(config=session_conf)
    with sess.as_default():
      # Load the saved mta graph and restore variables
      tf.saved_model.loader.load(sess, [tag_constants.SERVING], MODEL_DIR)
      
      # Get the placeholders from the graph by name
      input_x = graph.get_operation_by_name("input_x").outputs[0]
      input_y = graph.get_operation_by_name("input_y").outputs[0]
      dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
      
      # Tensors we want to evaluate
      predictions = graph.get_operation_by_name("output/predictions").outputs[0]
      
      # Generate batches for one epoch
      batches = get_batch(list(X_test), BATCH_SIZE, 1, shuffle=False)
      
      # Collect the predictions here
      all_predictions = []
      
      for x_test_batch in batches:
        batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0})
        all_predictions = np.concatenate([all_predictions, batch_predictions])
        
  # Print the accuracy if _test is defined
  if Y_test is not None:
    correct_predicions = float(sum(all_predictions == Y_test))
    print("total number of test examples: {}".format(len(Y_test)))
    print("Accuracy: {:g}".format(correct_predicions/float(len(Y_test))))
  
  predictions_csv = np.column_stack((np.array(X_raw), all_predictions))
  out_path = os.path.join(MODEL_DIR, "..", "prediction.csv")
  print("Saving evaluation to {0}".format(out_path))
  with open(out_path, 'w', encoding="utf8", newline="") as f:
    csv.writer(f).writerows(predictions_csv)
コード例 #22
0
    def test(self, dataset, out_dir, apply_cmvn=False, param_cmvn=None):
        self.cur_batch_size = 2
        next_batch = data_utils.get_batch(dataset,
                                          self.cur_batch_size,
                                          self.input_dim,
                                          self.output_dim,
                                          shuffle=True)
        ind = 0
        while True:
            try:
                input_data, output_data, seq_length, _ = self.sess.run(
                    next_batch)
                if (input_data.shape[0] != self.cur_batch_size):
                    continue
                output, loss = self.sess.run(
                    [self.logits, self.loss],
                    feed_dict={
                        self.train_input: input_data,
                        self.train_output: output_data,
                        self.seq_length: seq_length
                    })

                output_data = output_data[1]
                output = output[0]
                output = output[:seq_length[0]]
                output_data = output_data[:seq_length[1]]
                if (apply_cmvn):
                    output = output * param_cmvn[1] + param_cmvn[0]
                    output_data = output_data * param_cmvn[1] + param_cmvn[0]

                filename = os.path.basename(
                    dataset[ind]).split('.')[0] + '.cmp'
                tr_filename = os.path.basename(
                    dataset[ind]).split('.')[0] + '_tr.cmp'
                print('%.4f %s' % (loss, filename))
                numpy.savetxt(out_dir + '/' + filename, output, fmt='%f')
                numpy.savetxt(out_dir + '/' + tr_filename,
                              output_data,
                              fmt='%f')
                ind += 1
            except tf.errors.OutOfRangeError:
                break
コード例 #23
0
def eval_test_set(sess, model, test_buckets, testing_loss_summary,
                  file_writer):
    """ Evaluate on the test set. """
    for bucket_id in range(len(config.BUCKETS)):
        if len(test_buckets[bucket_id]) == 0:
            print("  Test: empty bucket %d" % (bucket_id))
            continue
        start = time.time()
        encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch(
            test_buckets[bucket_id], bucket_id, batch_size=config.BATCH_SIZE)
        _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs,
                                   decoder_masks, bucket_id, True)

        bucket_value = testing_loss_summary.value.add()
        bucket_value.tag = "testing_loss_bucket_%d" % bucket_id
        bucket_value.simple_value = step_loss
        file_writer.add_summary(testing_loss_summary, model.global_step.eval())

        print('Test bucket {}: loss {}, time {}'.format(
            bucket_id, step_loss,
            time.time() - start))
コード例 #24
0
ファイル: wechat.py プロジェクト: JasonK93/Deechat
def wechat_text(msg):
    '''
    get the response to msg
    :param msg: the type of the msg is 'Text'
    :return: response to msg by using seq2seq model
    '''
    text = msg["Text"]
    token_ids = data_utils.sentence2id(enc_vocab, text)
    if len(token_ids) > max_length:
        print("Max length I can handle is:", max_length)
        # line = _get_user_input()
    # Which bucket does it belong to?
    bucket_id = find_right_bucket(len(token_ids))
    # Get a 1-element batch to feed the sentence to the model.
    encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch(
        [(token_ids, [])], bucket_id, batch_size=1)
    # Get output logits for the sentence.
    _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs,
                                   decoder_masks, bucket_id, True)
    response = construct_response(output_logits, inv_dec_vocab)

    return response
コード例 #25
0
ファイル: api3.py プロジェクト: zoey-wangzw/medical-chatbot
def seq_pred(question):
    _, enc_vocab = data_utils.load_vocab(os.path.join(config.DATA_PATH, "vocab.enc"))
    inv_dec_vocab, _ = data_utils.load_vocab(os.path.join(config.DATA_PATH, "vocab.dec"))
    model = ChatBotModel(True, batch_size=1)
    model.build_graph()
    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        check_restore_parameters(sess, saver)
        max_length = config.BUCKETS[-1][0]
        line = question
        if hasattr(line, "decode"):
            # If using Python 2
            # FIXME: UnicodeError when deleting Chinese in terminal.
            line = line.decode("utf-8")
        if len(line) > 0 and line[-1] == "\n":
            line = line[:-1]
        if not line:
            pass

        token_ids = data_utils.sentence2id(enc_vocab, line)
        if len(token_ids) > max_length:
            line = question
            pass

        bucket_id = find_right_bucket(len(token_ids))
        # Get a 1-element batch to feed the sentence to the model.
        encoder_inputs, decoder_inputs, decoder_masks = data_utils.get_batch(
            [(token_ids, [])], bucket_id, batch_size=1)
        # Get output logits for the sentence.
        _, _, output_logits = run_step(sess, model, encoder_inputs,
                                       decoder_inputs, decoder_masks,
                                       bucket_id, True)
        response = construct_response(output_logits, inv_dec_vocab)
        answer = response
        return answer
コード例 #26
0
ファイル: neural_gpu_trainer.py プロジェクト: 1206lyp/models
def single_test(l, model, sess, task, nprint, batch_size, print_out=True,
                offset=None, ensemble=None, get_steps=False):
  """Test model on test data of length l using the given session."""
  inpt, target = data.get_batch(l, batch_size, False, task, offset)
  _, res, _, steps = model.step(sess, inpt, target, False, get_steps=get_steps)
  errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, nprint)
  seq_err = float(seq_err) / batch_size
  if total > 0:
    errors = float(errors) / total
  if print_out:
    data.print_out("  %s len %d errors %.2f sequence-errors %.2f"
                   % (task, l, 100*errors, 100*seq_err))
  # Ensemble eval.
  if ensemble:
    results = []
    for m in ensemble:
      model.saver.restore(sess, m)
      _, result, _, _ = model.step(sess, inpt, target, False)
      m_errors, m_total, m_seq_err = data.accuracy(inpt, result, target,
                                                   batch_size, nprint)
      m_seq_err = float(m_seq_err) / batch_size
      if total > 0:
        m_errors = float(m_errors) / m_total
      data.print_out("     %s len %d m-errors %.2f m-sequence-errors %.2f"
                     % (task, l, 100*m_errors, 100*m_seq_err))
      results.append(result)
    ens = [sum(o) for o in zip(*results)]
    errors, total, seq_err = data.accuracy(inpt, ens, target,
                                           batch_size, nprint)
    seq_err = float(seq_err) / batch_size
    if total > 0:
      errors = float(errors) / total
    if print_out:
      data.print_out("  %s len %d ens-errors %.2f ens-sequence-errors %.2f"
                     % (task, l, 100*errors, 100*seq_err))
  return errors, seq_err, (steps, inpt, [np.argmax(o, axis=1) for o in res])
コード例 #27
0
ファイル: generation.py プロジェクト: hyunnibal/ETRI
            vis_sample = sess.run(G_sample, feed_dict={Z: vis_Z, CG: vis_C})
        else:
            vis_sample = sess.run(G_sample, feed_dict={Z: vis_Z})
        plotting.visualise_at_epoch(vis_sample, data,
                                    predict_labels, one_hot, epoch, identifier, num_epochs,
                                    resample_rate_in_min, multivariate_mnist, seq_length, labels=vis_C)

    # compute mmd2 and, if available, prob density
    if epoch % eval_freq == 0:
        t = time() - t0
        print('%d\t%.2f\t%.4f\t%.4f' % (epoch, t, D_loss_curr, G_loss_curr))
        if 'eICU' in data:
            gen_samples = []
            labels_gen_samples = []
            for batch_idx in range(int(len(train_seqs) / batch_size)):
                X_mb, Y_mb = data_utils.get_batch(train_seqs, batch_size, batch_idx, train_targets)
                z_ = model.sample_Z(batch_size, seq_length, latent_dim, use_time=use_time)
                gen_samples_mb = sess.run(G_sample, feed_dict={Z: z_, CG: Y_mb})
                gen_samples.append(gen_samples_mb)
                labels_gen_samples.append(Y_mb)

            for batch_idx in range(int(len(vali_seqs) / batch_size)):
                X_mb, Y_mb = data_utils.get_batch(vali_seqs, batch_size, batch_idx, vali_targets)
                z_ = model.sample_Z(batch_size, seq_length, latent_dim, use_time=use_time)
                gen_samples_mb = sess.run(G_sample, feed_dict={Z: z_, CG: Y_mb})
                gen_samples.append(gen_samples_mb)
                labels_gen_samples.append(Y_mb)

            gen_samples = np.vstack(gen_samples)
            labels_gen_samples = np.vstack(labels_gen_samples)
コード例 #28
0
def train():
    """Train the model."""
    batch_size = FLAGS.batch_size
    tasks = FLAGS.task.split("-")
    with tf.Session() as sess:
        (model, min_length, max_length, checkpoint_dir, curriculum,
         _) = initialize(sess)
        quant_op = neural_gpu.quantize_weights_op(512, 8)
        max_cur_length = min(min_length + 3, max_length)
        prev_acc_perp = [1000000 for _ in xrange(3)]
        prev_seq_err = 1.0

        # Main traning loop.
        while True:
            global_step, pull, max_cur_length, learning_rate = sess.run(
                [model.global_step, model.pull, model.cur_length, model.lr])
            acc_loss, acc_total, acc_errors, acc_seq_err = 0.0, 0, 0, 0
            acc_grad_norm, step_count, step_time = 0.0, 0, 0.0
            for _ in xrange(FLAGS.steps_per_checkpoint):
                global_step += 1
                task = random.choice(tasks)

                # Select the length for curriculum learning.
                l = np.random.randint(max_cur_length - min_length +
                                      1) + min_length
                # Prefer longer stuff 60% of time.
                if np.random.randint(100) < 60:
                    l1 = np.random.randint(max_cur_length - min_length +
                                           1) + min_length
                    l = max(l, l1)
                # Mixed curriculum learning: in 25% of cases go to any larger length.
                if np.random.randint(100) < 25:
                    l1 = np.random.randint(max_length - min_length +
                                           1) + min_length
                    l = max(l, l1)

                # Run a step and time it.
                start_time = time.time()
                inp, target = data.get_batch(l, batch_size, True, task)
                noise_param = math.sqrt(
                    math.pow(global_step, -0.55) *
                    prev_seq_err) * FLAGS.grad_noise_scale
                loss, res, gnorm, _ = model.step(sess, inp, target, True,
                                                 noise_param)
                step_time += time.time() - start_time
                acc_grad_norm += float(gnorm)

                # Accumulate statistics only if we did not exceed curriculum length.
                if l < max_cur_length + 1:
                    step_count += 1
                    acc_loss += loss
                    errors, total, seq_err = data.accuracy(
                        inp, res, target, batch_size, 0)
                    acc_total += total
                    acc_errors += errors
                    acc_seq_err += seq_err

            # Normalize and print out accumulated statistics.
            acc_loss /= step_count
            step_time /= FLAGS.steps_per_checkpoint
            acc_seq_err = float(acc_seq_err) / (step_count * batch_size)
            prev_seq_err = max(0.0,
                               acc_seq_err - 0.02)  # No noise at error < 2%.
            acc_errors = float(
                acc_errors) / acc_total if acc_total > 0 else 1.0
            msg1 = "step %d step-time %.2f" % (global_step, step_time)
            msg2 = "lr %.8f pull %.3f" % (learning_rate, pull)
            msg3 = ("%s %s grad-norm %.8f" %
                    (msg1, msg2, acc_grad_norm / FLAGS.steps_per_checkpoint))
            data.print_out(
                "%s len %d ppx %.8f errors %.2f sequence-errors %.2f" %
                (msg3, max_cur_length, data.safe_exp(acc_loss),
                 100 * acc_errors, 100 * acc_seq_err))

            # If errors are below the curriculum threshold, move curriculum forward.
            if curriculum > acc_seq_err:
                if FLAGS.quantize:
                    # Quantize weights.
                    data.print_out("  Quantizing parameters.")
                    sess.run([quant_op])
                # Increase current length (until the next with training data).
                do_incr = True
                while do_incr and max_cur_length < max_length:
                    sess.run(model.cur_length_incr_op)
                    for t in tasks:
                        if data.train_set[t]: do_incr = False
                # Forget last perplexities if we're not yet at the end.
                if max_cur_length < max_length:
                    prev_acc_perp.append(1000000)
                # Either increase pull or, if it's large, average parameters.
                if pull < 0.1:
                    sess.run(model.pull_incr_op)
                else:
                    data.print_out("  Averaging parameters.")
                    sess.run(model.avg_op)
                    if acc_seq_err < (curriculum / 3.0):
                        sess.run(model.lr_decay_op)

            # Lower learning rate if we're worse than the last 3 checkpoints.
            acc_perp = data.safe_exp(acc_loss)
            if acc_perp > max(prev_acc_perp[-3:]):
                sess.run(model.lr_decay_op)
            prev_acc_perp.append(acc_perp)

            # Save checkpoint.
            checkpoint_path = os.path.join(checkpoint_dir, "neural_gpu.ckpt")
            model.saver.save(sess,
                             checkpoint_path,
                             global_step=model.global_step)

            # Run evaluation.
            bound = data.bins[-1] + 1
            for t in tasks:
                l = min_length
                while l < max_length + EXTRA_EVAL and l < bound:
                    _, seq_err, _ = single_test(l, model, sess, t,
                                                FLAGS.nprint, batch_size)
                    l += 1
                    while l < bound + 1 and not data.test_set[t][l]:
                        l += 1
                if seq_err < 0.05:  # Run larger test if we're good enough.
                    _, seq_err = multi_test(data.forward_max, model, sess, t,
                                            FLAGS.nprint, batch_size * 4)
            if seq_err < 0.01:  # Super-large test on 1-task large-forward models.
                if data.forward_max > 4000 and len(tasks) == 1:
                    multi_test(data.forward_max, model, sess, tasks[0],
                               FLAGS.nprint, batch_size * 16, 0)
コード例 #29
0
optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
train_step = optimizer.minimize(loss, global_step=global_step)
session.run([tf.global_variables_initializer(), tf.tables_initializer()])
saver = tf.train.Saver()

for epoch in range(NUM_EPOCH_):  # set total epochs
    print("Epoch: ", epoch)
    for epoch2 in range(2):  # epochs per mini batch
        Image_x = []
        Image_x2 = []
        Text_x = []
        Label_y = []
        j = 0
        if (getBatch):
            ran = random.randint(0, len(df) - 200)
            mini_batch = data_utils.get_batch(ran, ran + 200, df)
        for d in mini_batch:
            Label_y_ = []
            for i in range(80):
                Label_y_.append(0)
            Image_x.append(d.img_feat)
            r = random.randint(0, 4)
            Text_x.append(d.sentences[r])
            Label_y_[j] = 1
            r2 = random.randint(0, 4)
            Text_x.append(d.sentences[r2])
            Label_y_[j + 1] = 1
            Label_y.append(Label_y_)
            j += 2
        Image_x = np.asarray(Image_x)
        Label_y = np.asarray(Label_y)
コード例 #30
0
 def supply_test_data(self, length, batch_size):
     data, labels = data_gen.get_batch(length, batch_size, False, cnf.task)
     return [data], [labels]
コード例 #31
0
    def lstm_cell():
        cell = tf.contrib.rnn.LSTMCell(params.hidden_size)
        return tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
    with tf.name_scope('rnn_layer'):
        mlstm_cell = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(params.layer_num)], state_is_tuple=True)
        # init_state = mlstm_cell.zero_state(tf.shape(x)[0], dtype=tf.float32)
        outputs, state = tf.nn.dynamic_rnn(mlstm_cell,
                                           inputs=x,
                                           sequence_length=None,
                                           initial_state=None,
                                           dtype=tf.float32,
                                           time_major=False)
        output = state[-1][1]
    with tf.name_scope('calculate_accuracy'):
        cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=output))
        correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    with tf.name_scope('train'):
        train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)
    with tf.name_scope('calculate_loss'):
        loss = tf.reduce_mean(tf.nn.sigmoid(labels=y, logits=output))
        # y_ = tf.nn.sigmoid(output)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(params.epoch):
            for x, y in data_utils.get_batch(x, y, params.batch_size, params.seq_length):
                l, acc, _ = sess.run([loss, acc, train_step], feed_dict={x: x, y: y})
                print("Step: {:>4}, Loss: {:.4f}, Acc: {:.4%}".format(i, l, acc))


コード例 #32
0
ファイル: neural_gpu_trainer.py プロジェクト: ALISCIFP/models
def evaluate():
  """Evaluate an existing model."""
  batch_size = FLAGS.batch_size * FLAGS.num_gpus
  with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
    (model, beam_model, _, _, _,
     (_, dev_set, en_vocab_path, fr_vocab_path), _, sess) = initialize(sess)
    for p in FLAGS.problem.split("-"):
      for bin_id in xrange(len(data.bins)):
        if (FLAGS.task >= 0 and bin_id > 4) or (FLAGS.nprint == 0 and
                                                bin_id > 8 and p == "wmt"):
          break
        single_test(bin_id, model, sess, FLAGS.nprint, batch_size, dev_set, p,
                    beam_model=beam_model)
    path = FLAGS.test_file_prefix
    xid = "" if FLAGS.task < 0 else ("%.4d" % (FLAGS.task+FLAGS.decode_offset))
    en_path, fr_path = path + ".en" + xid, path + ".fr" + xid
    # Evaluate the test file if they exist.
    if path and tf.gfile.Exists(en_path) and tf.gfile.Exists(fr_path):
      data.print_out("Translating test set %s" % en_path)
      # Read lines.
      en_lines, fr_lines = [], []
      with tf.gfile.GFile(en_path, mode="r") as f:
        for line in f:
          en_lines.append(line.strip())
      with tf.gfile.GFile(fr_path, mode="r") as f:
        for line in f:
          fr_lines.append(line.strip())
      # Tokenize and convert to ids.
      en_vocab, _ = wmt.initialize_vocabulary(en_vocab_path)
      _, rev_fr_vocab = wmt.initialize_vocabulary(fr_vocab_path)
      if FLAGS.simple_tokenizer:
        en_ids = [wmt.sentence_to_token_ids(
            l, en_vocab, tokenizer=wmt.space_tokenizer,
            normalize_digits=FLAGS.normalize_digits)
                  for l in en_lines]
      else:
        en_ids = [wmt.sentence_to_token_ids(l, en_vocab) for l in en_lines]
      # Translate.
      results = []
      for idx, token_ids in enumerate(en_ids):
        if idx % 5 == 0:
          data.print_out("Translating example %d of %d." % (idx, len(en_ids)))
        # Which bucket does it belong to?
        buckets = [b for b in xrange(len(data.bins))
                   if data.bins[b] >= len(token_ids)]
        if buckets:
          result, result_cost = [], 100000000.0
          for bucket_id in buckets:
            if data.bins[bucket_id] > MAXLEN_F * len(token_ids) + EVAL_LEN_INCR:
              break
            # Get a 1-element batch to feed the sentence to the model.
            used_batch_size = 1  # batch_size
            inp, target = data.get_batch(
                bucket_id, used_batch_size, None, FLAGS.height,
                preset=([token_ids], [[]]))
            loss, output_logits, _, _ = model.step(
                sess, inp, target, None, beam_size=FLAGS.beam_size)
            outputs = [int(o[0]) for o in output_logits]
            loss = loss[0] - (data.bins[bucket_id] * FLAGS.length_norm)
            if FLAGS.simple_tokenizer:
              cur_out = outputs
              if wmt.EOS_ID in cur_out:
                cur_out = cur_out[:cur_out.index(wmt.EOS_ID)]
              res_tags = [rev_fr_vocab[o] for o in cur_out]
              bad_words, bad_brack = wmt.parse_constraints(token_ids, res_tags)
              loss += 1000.0 * bad_words + 100.0 * bad_brack
            # print (bucket_id, loss)
            if loss < result_cost:
              result = outputs
              result_cost = loss
          final = linearize(result, rev_fr_vocab)
          results.append("%s\t%s\n" % (final, fr_lines[idx]))
          # print result_cost
          sys.stderr.write(results[-1])
          sys.stderr.flush()
        else:
          sys.stderr.write("TOOO_LONG\t%s\n" % fr_lines[idx])
          sys.stderr.flush()
      if xid:
        decode_suffix = "beam%dln%dn" % (FLAGS.beam_size,
                                         int(100 * FLAGS.length_norm))
        with tf.gfile.GFile(path + ".res" + decode_suffix + xid, mode="w") as f:
          for line in results:
            f.write(line)
コード例 #33
0
    def fit(self, train, val=None, out_dir='log/', verbose=False):
        """
            Trains the model using the input training set.
            
            Summarizes the loss and training (and validation) accuracies.
            
            Input
            =====
            - train: a pair for the features and labels of the training set.
            - val: a pair for the features and labels of the validation set, optional.
            - out_dir: the directory location for summary files relative to root directory, optional.
            - verbose: a boolean flag. If set to true, loss values and execution time is printed to the console, optional.
        """

        X_train, y_train = train
        if (val):
            X_val, y_val = val

        # All of the built ops will be associated with the default global graph instance
        with tf.Graph().as_default():

            # Create a model instance
            features, labels = lu.input_placeholders()

            logits = self.inference(features, self.hidden_dim)

            loss = self.loss(logits, labels)

            train_step = self.training(loss)

            accuracy = self.evaluation(logits, labels)

            conf_matrix = tf.confusion_matrix(
                tf.argmax(labels, 1),
                tf.argmax(logits, 1),
                num_classes=config.NUM_ACTIVITIES)

            # Create summarizers
            loss_summary = tf.summary.scalar('loss', loss)
            weights_summary = lu.get_histogram_summary(len(self.hidden_dim))
            train_acc_summary = tf.summary.scalar('train_acc', accuracy)
            val_acc_summary = tf.summary.scalar('val_acc', accuracy)
            train_summary = tf.summary.merge(
                [loss_summary, weights_summary, train_acc_summary])

            # Create Model Saver
            saver = tf.train.Saver()

            # Create a session
            sess = tf.Session()
            summary_writer = tf.summary.FileWriter(out_dir, sess.graph)

            # train the model
            init_vars = tf.global_variables_initializer()
            sess.run(init_vars)

            for step in range(self.num_steps):
                start_time = time.time()
                X_batch, y_batch = du.get_batch(train,
                                                batch_size=self.batch_size)
                feed_dict = {features: X_batch, labels: y_batch}

                _, loss_value, summary_str = sess.run(
                    [train_step, loss, loss_summary], feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)

                if (step % 100 == 0):
                    # evaluate model on train dataset
                    feed_dict = {features: X_train, labels: y_train}
                    _, summary_str = sess.run([accuracy, train_summary],
                                              feed_dict=feed_dict)
                    summary_writer.add_summary(summary_str, step)

                    # evaluate model on validation dataset
                    if (val):
                        feed_dict = {features: X_val, labels: y_val}
                        _, summary_str = sess.run([accuracy, val_acc_summary],
                                                  feed_dict=feed_dict)
                        summary_writer.add_summary(summary_str, step)

                    if (verbose):
                        duration = time.time() - start_time
                        print 'Step %d, loss = %.3f (%.3f sec)' % (
                            step, loss_value, duration)

                        summary_writer.flush()

        self.sess = sess
        self.acc = accuracy
        self.conf_matrix = conf_matrix
        self.features = features
        self.labels = labels

        # Save the model
        save_path = out_dir + 'model/'
        os.mkdir(save_path)
        saver.save(sess, save_path + 'model.ckpt')
コード例 #34
0
ファイル: neural_gpu_trainer.py プロジェクト: 1206lyp/models
def train():
  """Train the model."""
  batch_size = FLAGS.batch_size
  tasks = FLAGS.task.split("-")
  with tf.Session() as sess:
    (model, min_length, max_length, checkpoint_dir,
     curriculum, _) = initialize(sess)
    quant_op = neural_gpu.quantize_weights_op(512, 8)
    max_cur_length = min(min_length + 3, max_length)
    prev_acc_perp = [1000000 for _ in xrange(3)]
    prev_seq_err = 1.0

    # Main traning loop.
    while True:
      global_step, pull, max_cur_length, learning_rate = sess.run(
          [model.global_step, model.pull, model.cur_length, model.lr])
      acc_loss, acc_total, acc_errors, acc_seq_err = 0.0, 0, 0, 0
      acc_grad_norm, step_count, step_time = 0.0, 0, 0.0
      for _ in xrange(FLAGS.steps_per_checkpoint):
        global_step += 1
        task = random.choice(tasks)

        # Select the length for curriculum learning.
        l = np.random.randint(max_cur_length - min_length + 1) + min_length
        # Prefer longer stuff 60% of time.
        if np.random.randint(100) < 60:
          l1 = np.random.randint(max_cur_length - min_length+1) + min_length
          l = max(l, l1)
        # Mixed curriculum learning: in 25% of cases go to any larger length.
        if np.random.randint(100) < 25:
          l1 = np.random.randint(max_length - min_length + 1) + min_length
          l = max(l, l1)

        # Run a step and time it.
        start_time = time.time()
        inp, target = data.get_batch(l, batch_size, True, task)
        noise_param = math.sqrt(math.pow(global_step, -0.55) *
                                prev_seq_err) * FLAGS.grad_noise_scale
        loss, res, gnorm, _ = model.step(sess, inp, target, True, noise_param)
        step_time += time.time() - start_time
        acc_grad_norm += float(gnorm)

        # Accumulate statistics only if we did not exceed curriculum length.
        if l < max_cur_length + 1:
          step_count += 1
          acc_loss += loss
          errors, total, seq_err = data.accuracy(inp, res, target,
                                                 batch_size, 0)
          acc_total += total
          acc_errors += errors
          acc_seq_err += seq_err

      # Normalize and print out accumulated statistics.
      acc_loss /= step_count
      step_time /= FLAGS.steps_per_checkpoint
      acc_seq_err = float(acc_seq_err) / (step_count * batch_size)
      prev_seq_err = max(0.0, acc_seq_err - 0.02)  # No noise at error < 2%.
      acc_errors = float(acc_errors) / acc_total if acc_total > 0 else 1.0
      msg1 = "step %d step-time %.2f" % (global_step, step_time)
      msg2 = "lr %.8f pull %.3f" % (learning_rate, pull)
      msg3 = ("%s %s grad-norm %.8f"
              % (msg1, msg2, acc_grad_norm / FLAGS.steps_per_checkpoint))
      data.print_out("%s len %d ppx %.8f errors %.2f sequence-errors %.2f" %
                     (msg3, max_cur_length, data.safe_exp(acc_loss),
                      100*acc_errors, 100*acc_seq_err))

      # If errors are below the curriculum threshold, move curriculum forward.
      if curriculum > acc_seq_err:
        if FLAGS.quantize:
          # Quantize weights.
          data.print_out("  Quantizing parameters.")
          sess.run([quant_op])
        # Increase current length (until the next with training data).
        do_incr = True
        while do_incr and max_cur_length < max_length:
          sess.run(model.cur_length_incr_op)
          for t in tasks:
            if data.train_set[t]: do_incr = False
        # Forget last perplexities if we're not yet at the end.
        if max_cur_length < max_length:
          prev_acc_perp.append(1000000)
        # Either increase pull or, if it's large, average parameters.
        if pull < 0.1:
          sess.run(model.pull_incr_op)
        else:
          data.print_out("  Averaging parameters.")
          sess.run(model.avg_op)
          if acc_seq_err < (curriculum / 3.0):
            sess.run(model.lr_decay_op)

      # Lower learning rate if we're worse than the last 3 checkpoints.
      acc_perp = data.safe_exp(acc_loss)
      if acc_perp > max(prev_acc_perp[-3:]):
        sess.run(model.lr_decay_op)
      prev_acc_perp.append(acc_perp)

      # Save checkpoint.
      checkpoint_path = os.path.join(checkpoint_dir, "neural_gpu.ckpt")
      model.saver.save(sess, checkpoint_path,
                       global_step=model.global_step)

      # Run evaluation.
      bound = data.bins[-1] + 1
      for t in tasks:
        l = min_length
        while l < max_length + EXTRA_EVAL and l < bound:
          _, seq_err, _ = single_test(l, model, sess, t,
                                      FLAGS.nprint, batch_size)
          l += 1
          while l < bound + 1 and not data.test_set[t][l]:
            l += 1
        if seq_err < 0.05:  # Run larger test if we're good enough.
          _, seq_err = multi_test(data.forward_max, model, sess, t,
                                  FLAGS.nprint, batch_size * 4)
      if seq_err < 0.01:  # Super-large test on 1-task large-forward models.
        if data.forward_max > 4000 and len(tasks) == 1:
          multi_test(data.forward_max, model, sess, tasks[0], FLAGS.nprint,
                     batch_size * 16, 0)
コード例 #35
0
        with tf.name_scope("eval"):
            scores = tf.nn.softmax(logits)
            predictions = tf.argmax(scores, 1, name="predictions")
            correct_predictions = tf.equal(predictions, tf.argmax(y, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

            # Note: if you prefer to one_hot encode in "train":
            # probabilities = tf.nn.softmax(logits, name="probabilities")
            # classes = tf.argmax(input=probabilities, axis=1)
            # accuracy = tf.contrib.metrics.accuracy(y,classes)

        with tf.name_scope("init_and_save"):
            init = tf.global_variables_initializer()
            saver = tf.train.Saver()


    with tf.Session(graph=graph) as sess:
        init.run()
        batch_iter = get_batch(x_train, y_train)
        for epoch in range(10):
            for iteration in range(x_train.shape[0] // batch_size):
                X_batch, y_batch = batch_iter.next_batch(128)
                sess.run(training_op, feed_dict={X: X_batch, y: y_batch, is_training: True})
            acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
            acc_test = accuracy.eval(feed_dict={X: x_val, y: y_val})
            print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
            save_path = saver.save(sess, "model_nn/model")


コード例 #36
0
def train_epoch(
        epoch,
        samples,
        labels,
        sess,
        Z,
        X,
        CG,
        CD,
        CS,
        D_loss,
        G_loss,  #D_logit_real, D_logit_fake, conv1,
        D_solver,
        G_solver,
        batch_size,
        use_time,
        D_rounds,
        G_rounds,
        seq_length,
        latent_dim,
        #layer, w, D_solver, G_solver, batch_size, use_time, D_rounds, G_rounds, seq_length,
        num_generated_features,
        cond_dim,
        max_val,
        WGAN_clip,
        one_hot):
    """
    Train generator and discriminator for one epoch.
    """
    for batch_idx in range(
            0,
            int(len(samples) / batch_size) - (D_rounds +
                                              (cond_dim > 0) * G_rounds),
            D_rounds + (cond_dim > 0) * G_rounds):
        # update the discriminator
        for d in range(D_rounds):
            X_mb, Y_mb = data_utils.get_batch(samples, batch_size,
                                              batch_idx + d, labels)
            Z_mb = X_mb[:, :
                        -latent_dim, :]  #sample_Z(batch_size, seq_length, latent_dim, use_time)
            X_mb = X_mb[:, -latent_dim:, :]
            X_mb = X_mb.reshape(-1, latent_dim, num_generated_features)
            if cond_dim > 0:
                # CGAN
                Y_mb = Y_mb.reshape(-1, cond_dim)
                if one_hot:
                    # change all of the labels to a different one
                    offsets = np.random.choice(cond_dim - 1, batch_size) + 1
                    new_labels = (np.argmax(Y_mb, axis=1) + offsets) % cond_dim
                    Y_wrong = np.zeros_like(Y_mb)
                    Y_wrong[np.arange(batch_size), new_labels] = 1
                else:
                    # flip all of the bits (assuming binary...)
                    Y_wrong = 1 - Y_mb
                _ = sess.run(D_solver,
                             feed_dict={
                                 X: X_mb,
                                 Z: Z_mb,
                                 CD: Y_mb,
                                 CS: Y_wrong,
                                 CG: Y_mb
                             })
            else:
                _ = sess.run(D_solver, feed_dict={X: X_mb, Z: Z_mb})
            if WGAN_clip:
                raise NotImplementedError("Not implemented WGAN")
                # clip the weights
                # _ = sess.run([clip_disc_weights])
        # update the generator
        for g in range(G_rounds):
            if cond_dim > 0:
                # note we are essentially throwing these X_mb away...
                X_mb, Y_mb = data_utils.get_batch(samples, batch_size,
                                                  batch_idx + D_rounds + g,
                                                  labels)
                _ = sess.run(G_solver,
                             feed_dict={
                                 Z:
                                 sample_Z(batch_size,
                                          seq_length,
                                          latent_dim,
                                          use_time=use_time),
                                 CG:
                                 Y_mb
                             })
            else:
                Z_mb, Y_mb = data_utils.get_batch(samples, batch_size,
                                                  batch_idx, labels)
                Z_mb = Z_mb[:, :-latent_dim, :]
                _ = sess.run(G_solver, feed_dict={
                    Z: Z_mb
                })  #sample_Z(batch_size, seq_length, use_time=use_time)})
    # at the end, get the loss
    if cond_dim > 0:
        D_loss_curr, G_loss_curr = sess.run(
            [D_loss, G_loss],
            feed_dict={
                X: X_mb,
                Z: sample_Z(batch_size,
                            seq_length,
                            latent_dim,
                            use_time=use_time),
                CG: Y_mb,
                CD: Y_mb
            })
        D_loss_curr = np.mean(D_loss_curr)
        G_loss_curr = np.mean(G_loss_curr)
    else:
        D_loss_curr, G_loss_curr =\
            sess.run([D_loss, G_loss], feed_dict={X: X_mb, Z: Z_mb})#sample_Z(batch_size, seq_length, use_time=use_time)})
        D_loss_curr = np.mean(D_loss_curr)
        G_loss_curr = np.mean(G_loss_curr)
    return D_loss_curr, G_loss_curr
コード例 #37
0
ファイル: neural_gpu_trainer.py プロジェクト: ALISCIFP/models
def train():
  """Train the model."""
  batch_size = FLAGS.batch_size * FLAGS.num_gpus
  (model, beam_model, min_length, max_length, checkpoint_dir,
   (train_set, dev_set, en_vocab_path, fr_vocab_path), sv, sess) = initialize()
  with sess.as_default():
    quant_op = model.quantize_op
    max_cur_length = min(min_length + 3, max_length)
    prev_acc_perp = [1000000 for _ in xrange(5)]
    prev_seq_err = 1.0
    is_chief = FLAGS.task < 1
    do_report = False

    # Main traning loop.
    while not sv.ShouldStop():
      global_step, max_cur_length, learning_rate = sess.run(
          [model.global_step, model.cur_length, model.lr])
      acc_loss, acc_l1, acc_total, acc_errors, acc_seq_err = 0.0, 0.0, 0, 0, 0
      acc_grad_norm, step_count, step_c1, step_time = 0.0, 0, 0, 0.0

      # For words in the word vector file, set their embedding at start.
      bound1 = FLAGS.steps_per_checkpoint - 1
      if FLAGS.word_vector_file_en and global_step < bound1 and is_chief:
        assign_vectors(FLAGS.word_vector_file_en, "embedding:0",
                       en_vocab_path, sess)
        if FLAGS.max_target_vocab < 1:
          assign_vectors(FLAGS.word_vector_file_en, "target_embedding:0",
                         en_vocab_path, sess)

      if FLAGS.word_vector_file_fr and global_step < bound1 and is_chief:
        assign_vectors(FLAGS.word_vector_file_fr, "embedding:0",
                       fr_vocab_path, sess)
        if FLAGS.max_target_vocab < 1:
          assign_vectors(FLAGS.word_vector_file_fr, "target_embedding:0",
                         fr_vocab_path, sess)

      for _ in xrange(FLAGS.steps_per_checkpoint):
        step_count += 1
        step_c1 += 1
        global_step = int(model.global_step.eval())
        train_beam_anneal = global_step / float(FLAGS.train_beam_anneal)
        train_beam_freq = FLAGS.train_beam_freq * min(1.0, train_beam_anneal)
        p = random.choice(FLAGS.problem.split("-"))
        train_set = global_train_set[p][-1]
        bucket_id = get_bucket_id(train_buckets_scale[p][-1], max_cur_length,
                                  train_set)
        # Prefer longer stuff 60% of time if not wmt.
        if np.random.randint(100) < 60 and FLAGS.problem != "wmt":
          bucket1 = get_bucket_id(train_buckets_scale[p][-1], max_cur_length,
                                  train_set)
          bucket_id = max(bucket1, bucket_id)

        # Run a step and time it.
        start_time = time.time()
        inp, target = data.get_batch(bucket_id, batch_size, train_set,
                                     FLAGS.height)
        noise_param = math.sqrt(math.pow(global_step + 1, -0.55) *
                                prev_seq_err) * FLAGS.grad_noise_scale
        # In multi-step mode, we use best from beam for middle steps.
        state, new_target, scores, history = None, None, None, []
        while (FLAGS.beam_size > 1 and
               train_beam_freq > np.random.random_sample()):
          # Get the best beam (no training, just forward model).
          new_target, new_first, new_inp, scores = get_best_beam(
              beam_model, sess, inp, target,
              batch_size, FLAGS.beam_size, bucket_id, history, p)
          history.append(new_first)
          # Training step with the previous input and the best beam as target.
          _, _, _, state = model.step(sess, inp, new_target, FLAGS.do_train,
                                      noise_param, update_mem=True, state=state)
          # Change input to the new one for the next step.
          inp = new_inp
          # If all results are great, stop (todo: not to wait for all?).
          if FLAGS.nprint > 1:
            print(scores)
          if sum(scores) / float(len(scores)) >= 10.0:
            break
        # The final step with the true target.
        loss, res, gnorm, _ = model.step(
            sess, inp, target, FLAGS.do_train, noise_param,
            update_mem=True, state=state)
        step_time += time.time() - start_time
        acc_grad_norm += 0.0 if gnorm is None else float(gnorm)

        # Accumulate statistics.
        acc_loss += loss
        acc_l1 += loss
        errors, total, seq_err = data.accuracy(
            inp, res, target, batch_size, 0, new_target, scores)
        if FLAGS.nprint > 1:
          print("seq_err: ", seq_err)
        acc_total += total
        acc_errors += errors
        acc_seq_err += seq_err

        # Report summary every 10 steps.
        if step_count + 3 > FLAGS.steps_per_checkpoint:
          do_report = True  # Don't polute plot too early.
        if is_chief and step_count % 10 == 1 and do_report:
          cur_loss = acc_l1 / float(step_c1)
          acc_l1, step_c1 = 0.0, 0
          cur_perp = data.safe_exp(cur_loss)
          summary = tf.Summary()
          summary.value.extend(
              [tf.Summary.Value(tag="log_perplexity", simple_value=cur_loss),
               tf.Summary.Value(tag="perplexity", simple_value=cur_perp)])
          sv.SummaryComputed(sess, summary, global_step)

      # Normalize and print out accumulated statistics.
      acc_loss /= step_count
      step_time /= FLAGS.steps_per_checkpoint
      acc_seq_err = float(acc_seq_err) / (step_count * batch_size)
      prev_seq_err = max(0.0, acc_seq_err - 0.02)  # No noise at error < 2%.
      acc_errors = float(acc_errors) / acc_total if acc_total > 0 else 1.0
      t_size = float(sum([len(x) for x in train_set])) / float(1000000)
      msg = ("step %d step-time %.2f train-size %.3f lr %.6f grad-norm %.4f"
             % (global_step + 1, step_time, t_size, learning_rate,
                acc_grad_norm / FLAGS.steps_per_checkpoint))
      data.print_out("%s len %d ppl %.6f errors %.2f sequence-errors %.2f" %
                     (msg, max_cur_length, data.safe_exp(acc_loss),
                      100*acc_errors, 100*acc_seq_err))

      # If errors are below the curriculum threshold, move curriculum forward.
      is_good = FLAGS.curriculum_ppx > data.safe_exp(acc_loss)
      is_good = is_good and FLAGS.curriculum_seq > acc_seq_err
      if is_good and is_chief:
        if FLAGS.quantize:
          # Quantize weights.
          data.print_out("  Quantizing parameters.")
          sess.run([quant_op])
        # Increase current length (until the next with training data).
        sess.run(model.cur_length_incr_op)
        # Forget last perplexities if we're not yet at the end.
        if max_cur_length < max_length:
          prev_acc_perp.append(1000000)

      # Lower learning rate if we're worse than the last 5 checkpoints.
      acc_perp = data.safe_exp(acc_loss)
      if acc_perp > max(prev_acc_perp[-5:]) and is_chief:
        sess.run(model.lr_decay_op)
      prev_acc_perp.append(acc_perp)

      # Save checkpoint.
      if is_chief:
        checkpoint_path = os.path.join(checkpoint_dir, "neural_gpu.ckpt")
        model.saver.save(sess, checkpoint_path,
                         global_step=model.global_step)

        # Run evaluation.
        bin_bound = 4
        for p in FLAGS.problem.split("-"):
          total_loss, total_err, tl_counter = 0.0, 0.0, 0
          for bin_id in xrange(len(data.bins)):
            if bin_id < bin_bound or bin_id % FLAGS.eval_bin_print == 1:
              err, _, loss = single_test(bin_id, model, sess, FLAGS.nprint,
                                         batch_size * 4, dev_set, p,
                                         beam_model=beam_model)
              if loss > 0.0:
                total_loss += loss
                total_err += err
                tl_counter += 1
          test_loss = total_loss / max(1, tl_counter)
          test_err = total_err / max(1, tl_counter)
          test_perp = data.safe_exp(test_loss)
          summary = tf.Summary()
          summary.value.extend(
              [tf.Summary.Value(tag="test/%s/loss" % p, simple_value=test_loss),
               tf.Summary.Value(tag="test/%s/error" % p, simple_value=test_err),
               tf.Summary.Value(tag="test/%s/perplexity" % p,
                                simple_value=test_perp)])
          sv.SummaryComputed(sess, summary, global_step)
コード例 #38
0
ファイル: neural_gpu_trainer.py プロジェクト: ALISCIFP/models
def interactive():
  """Interactively probe an existing model."""
  with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
    # Initialize model.
    (model, _, _, _, _, (_, _, en_path, fr_path), _, _) = initialize(sess)
    # Load vocabularies.
    en_vocab, rev_en_vocab = wmt.initialize_vocabulary(en_path)
    _, rev_fr_vocab = wmt.initialize_vocabulary(fr_path)
    # Print out vectors and variables.
    if FLAGS.nprint > 0 and FLAGS.word_vector_file_en:
      print_vectors("embedding:0", en_path, FLAGS.word_vector_file_en)
    if FLAGS.nprint > 0 and FLAGS.word_vector_file_fr:
      print_vectors("target_embedding:0", fr_path, FLAGS.word_vector_file_fr)
    total = 0
    for v in tf.trainable_variables():
      shape = v.get_shape().as_list()
      total += mul(shape)
      print(v.name, shape, mul(shape))
    print(total)
    # Start interactive loop.
    sys.stdout.write("Input to Neural GPU Translation Model.\n")
    sys.stdout.write("> ")
    sys.stdout.flush()
    inpt = sys.stdin.readline(), ""
    while inpt:
      cures = []
      # Get token-ids for the input sentence.
      if FLAGS.simple_tokenizer:
        token_ids = wmt.sentence_to_token_ids(
            inpt, en_vocab, tokenizer=wmt.space_tokenizer,
            normalize_digits=FLAGS.normalize_digits)
      else:
        token_ids = wmt.sentence_to_token_ids(inpt, en_vocab)
      print([rev_en_vocab[t] for t in token_ids])
      # Which bucket does it belong to?
      buckets = [b for b in xrange(len(data.bins))
                 if data.bins[b] >= max(len(token_ids), len(cures))]
      if cures:
        buckets = [buckets[0]]
      if buckets:
        result, result_cost = [], 10000000.0
        for bucket_id in buckets:
          if data.bins[bucket_id] > MAXLEN_F * len(token_ids) + EVAL_LEN_INCR:
            break
          glen = 1
          for gen_idx in xrange(glen):
            # Get a 1-element batch to feed the sentence to the model.
            inp, target = data.get_batch(
                bucket_id, 1, None, FLAGS.height, preset=([token_ids], [cures]))
            loss, output_logits, _, _ = model.step(
                sess, inp, target, None, beam_size=FLAGS.beam_size,
                update_mem=False)
            # If it is a greedy decoder, outputs are argmaxes of output_logits.
            if FLAGS.beam_size > 1:
              outputs = [int(o) for o in output_logits]
            else:
              loss = loss[0] - (data.bins[bucket_id] * FLAGS.length_norm)
              outputs = [int(np.argmax(logit, axis=1))
                         for logit in output_logits]
            print([rev_fr_vocab[t] for t in outputs])
            print(loss, data.bins[bucket_id])
            print(linearize(outputs, rev_fr_vocab))
            cures.append(outputs[gen_idx])
            print(cures)
            print(linearize(cures, rev_fr_vocab))
          if FLAGS.simple_tokenizer:
            cur_out = outputs
            if wmt.EOS_ID in cur_out:
              cur_out = cur_out[:cur_out.index(wmt.EOS_ID)]
            res_tags = [rev_fr_vocab[o] for o in cur_out]
            bad_words, bad_brack = wmt.parse_constraints(token_ids, res_tags)
            loss += 1000.0 * bad_words + 100.0 * bad_brack
          if loss < result_cost:
            result = outputs
            result_cost = loss
        print("FINAL", result_cost)
        print([rev_fr_vocab[t] for t in result])
        print(linearize(result, rev_fr_vocab))
      else:
        print("TOOO_LONG")
      sys.stdout.write("> ")
      sys.stdout.flush()
      inpt = sys.stdin.readline(), ""