Python evaluate Exemples, program_utils.evaluate Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : neural_gpu_trainer.py Projet : ALISCIFP/models

def score_beams_prog(beams, target, inp, history, print_out=False,
                     test_mode=False):
  """Score beams for program synthesis."""
  tgt_prog = linearize(target, program_utils.prog_vocab, True, 1)
  hist_progs = [linearize(h, program_utils.prog_vocab, True, 1)
                for h in history]
  tgt_set = set(target)
  if print_out:
    print("target: ", tgt_prog)
  inps, tgt_outs = [], []
  for i in xrange(3):
    ilist = [inp[i + 1, l] for l in xrange(inp.shape[1])]
    clist = [program_utils.prog_vocab[x] for x in ilist if x > 0]
    olist = clist[clist.index("]") + 1:]  # outputs
    clist = clist[1:clist.index("]")]     # inputs
    inps.append([int(x) for x in clist])
    if olist[0] == "[":  # olist may be [int] or just int
      tgt_outs.append(str([int(x) for x in olist[1:-1]]))
    else:
      if len(olist) == 1:
        tgt_outs.append(olist[0])
      else:
        print([program_utils.prog_vocab[x] for x in ilist if x > 0])
        print(olist)
        print(tgt_prog)
        print(program_utils.evaluate(tgt_prog, {"a": inps[-1]}))
        print("AAAAA")
        tgt_outs.append(olist[0])
  if not test_mode:
    for _ in xrange(7):
      ilen = np.random.randint(len(target) - 3) + 1
      inps.append([random.choice(range(-15, 15)) for _ in range(ilen)])
    tgt_outs.extend([program_utils.evaluate(tgt_prog, {"a": inp})
                     for inp in inps[3:]])
  best, best_prog, best_score = None, "", -1000.0
  for beam in beams:
    b_prog = linearize(beam, program_utils.prog_vocab, True, 1)
    b_set = set(beam)
    jsim = len(tgt_set & b_set) / float(len(tgt_set | b_set))
    b_outs = [program_utils.evaluate(b_prog, {"a": inp}) for inp in inps]
    errs = len([x for x in b_outs if x == "ERROR"])
    imatches = len([i for i in xrange(3) if b_outs[i] == tgt_outs[i]])
    perfect = 10.0 if imatches == 3 else 0.0
    hist_score = 20.0 if b_prog in hist_progs else 0.0
    if test_mode:
      score = perfect - errs
    else:
      matches = len([i for i in xrange(10) if b_outs[i] == tgt_outs[i]])
      score = perfect + matches + jsim - errs
    if score < 10.0:
      score -= hist_score
    # print b_prog
    # print "jsim: ", jsim, " errs: ", errs, " mtchs: ", matches, " s: ", score
    if score > best_score:
      best = beam
      best_prog = b_prog
      best_score = score
  if print_out:
    print("best score: ", best_score, " best prog: ", best_prog)
  return best, best_score

Exemple #2

0

Afficher le fichier

Fichier : data_utils.py Projet : 2012060010010/DL_code

 def prog_io_pair(prog, max_len, counter=0):
     try:
         ilen = np.random.randint(max_len - 3) + 1
         bound = max(15 - (counter / 20), 1)
         inp = [random.choice(range(-bound, bound)) for _ in range(ilen)]
         inp_toks = [
             program_utils.prog_rev_vocab[t]
             for t in program_utils.tokenize(str(inp)) if t != ","
         ]
         out = program_utils.evaluate(prog, {"a": inp})
         out_toks = [
             program_utils.prog_rev_vocab[t]
             for t in program_utils.tokenize(str(out)) if t != ","
         ]
         if counter > 400:
             out_toks = []
         if (out_toks and out_toks[0] == program_utils.prog_rev_vocab["["]
                 and len(out_toks) != len([o
                                           for o in out if o == ","]) + 3):
             raise ValueError("generated list with too long ints")
         if (out_toks and out_toks[0] != program_utils.prog_rev_vocab["["]
                 and len(out_toks) > 1):
             raise ValueError("generated one int but tokenized it to many")
         if len(out_toks) > max_len:
             raise ValueError("output too long")
         return (inp_toks, out_toks)
     except ValueError:
         return prog_io_pair(prog, max_len, counter + 1)

Exemple #3

0

Afficher le fichier

Fichier : data_utils.py Projet : ALISCIFP/models

 def prog_io_pair(prog, max_len, counter=0):
   try:
     ilen = np.random.randint(max_len - 3) + 1
     bound = max(15 - (counter / 20), 1)
     inp = [random.choice(range(-bound, bound)) for _ in range(ilen)]
     inp_toks = [program_utils.prog_rev_vocab[t]
                 for t in program_utils.tokenize(str(inp)) if t != ","]
     out = program_utils.evaluate(prog, {"a": inp})
     out_toks = [program_utils.prog_rev_vocab[t]
                 for t in program_utils.tokenize(str(out)) if t != ","]
     if counter > 400:
       out_toks = []
     if (out_toks and out_toks[0] == program_utils.prog_rev_vocab["["] and
         len(out_toks) != len([o for o in out if o == ","]) + 3):
       raise ValueError("generated list with too long ints")
     if (out_toks and out_toks[0] != program_utils.prog_rev_vocab["["] and
         len(out_toks) > 1):
       raise ValueError("generated one int but tokenized it to many")
     if len(out_toks) > max_len:
       raise ValueError("output too long")
     return (inp_toks, out_toks)
   except ValueError:
     return prog_io_pair(prog, max_len, counter+1)

Exemple #4

0

Afficher le fichier

Fichier : neural_gpu_trainer.py Projet : Hukongtao/models

def score_beams_prog(beams, target, inp, history, print_out=False,
                     test_mode=False):
  """Score beams for program synthesis."""
  tgt_prog = linearize(target, program_utils.prog_vocab, True, 1)
  hist_progs = [linearize(h, program_utils.prog_vocab, True, 1)
                for h in history]
  tgt_set = set(target)
  if print_out:
    print "target: ", tgt_prog
  inps, tgt_outs = [], []
  for i in xrange(3):
    ilist = [inp[i + 1, l] for l in xrange(inp.shape[1])]
    clist = [program_utils.prog_vocab[x] for x in ilist if x > 0]
    olist = clist[clist.index("]") + 1:]  # outputs
    clist = clist[1:clist.index("]")]     # inputs
    inps.append([int(x) for x in clist])
    if olist[0] == "[":  # olist may be [int] or just int
      tgt_outs.append(str([int(x) for x in olist[1:-1]]))
    else:
      if len(olist) == 1:
        tgt_outs.append(olist[0])
      else:
        print [program_utils.prog_vocab[x] for x in ilist if x > 0]
        print olist
        print tgt_prog
        print program_utils.evaluate(tgt_prog, {"a": inps[-1]})
        print "AAAAA"
        tgt_outs.append(olist[0])
  if not test_mode:
    for _ in xrange(7):
      ilen = np.random.randint(len(target) - 3) + 1
      inps.append([random.choice(range(-15, 15)) for _ in range(ilen)])
    tgt_outs.extend([program_utils.evaluate(tgt_prog, {"a": inp})
                     for inp in inps[3:]])
  best, best_prog, best_score = None, "", -1000.0
  for beam in beams:
    b_prog = linearize(beam, program_utils.prog_vocab, True, 1)
    b_set = set(beam)
    jsim = len(tgt_set & b_set) / float(len(tgt_set | b_set))
    b_outs = [program_utils.evaluate(b_prog, {"a": inp}) for inp in inps]
    errs = len([x for x in b_outs if x == "ERROR"])
    imatches = len([i for i in xrange(3) if b_outs[i] == tgt_outs[i]])
    perfect = 10.0 if imatches == 3 else 0.0
    hist_score = 20.0 if b_prog in hist_progs else 0.0
    if test_mode:
      score = perfect - errs
    else:
      matches = len([i for i in xrange(10) if b_outs[i] == tgt_outs[i]])
      score = perfect + matches + jsim - errs
    if score < 10.0:
      score -= hist_score
    # print b_prog
    # print "jsim: ", jsim, " errs: ", errs, " mtchs: ", matches, " s: ", score
    if score > best_score:
      best = beam
      best_prog = b_prog
      best_score = score
  if print_out:
    print "best score: ", best_score, " best prog: ", best_prog
  return best, best_score


def get_best_beam(beam_model, sess, inp, target, batch_size, beam_size,
                  bucket, history, p, test_mode=False):
  """Run beam_model, score beams, and return the best as target and in input."""
  _, output_logits, _, _ = beam_model.step(
      sess, inp, target, None, beam_size=FLAGS.beam_size)
  new_targets, new_firsts, scores, new_inp = [], [], [], np.copy(inp)
  for b in xrange(batch_size):
    outputs = []
    history_b = [[h[b, 0, l] for l in xrange(data.bins[bucket])]
                 for h in history]
    for beam_idx in xrange(beam_size):
      outputs.append([int(o[beam_idx * batch_size + b])
                      for o in output_logits])
    target_t = [target[b, 0, l] for l in xrange(data.bins[bucket])]
    best, best_score = score_beams(
        outputs, [t for t in target_t if t > 0], inp[b, :, :],
        [[t for t in h if t > 0] for h in history_b], p, test_mode=test_mode)
    scores.append(best_score)
    if 1 in best:  # Only until _EOS.
      best = best[:best.index(1) + 1]
    best += [0 for _ in xrange(len(target_t) - len(best))]
    new_targets.append([best])
    first, _ = score_beams(
        outputs, [t for t in target_t if t > 0], inp[b, :, :],
        [[t for t in h if t > 0] for h in history_b], p, test_mode=True)
    if 1 in first:  # Only until _EOS.
      first = first[:first.index(1) + 1]
    first += [0 for _ in xrange(len(target_t) - len(first))]
    new_inp[b, 0, :] = np.array(first, dtype=np.int32)
    new_firsts.append([first])
  # Change target if we found a great answer.
  new_target = np.array(new_targets, dtype=np.int32)
  for b in xrange(batch_size):
    if scores[b] >= 10.0:
      target[b, 0, :] = new_target[b, 0, :]
  new_first = np.array(new_firsts, dtype=np.int32)
  return new_target, new_first, new_inp, scores


def train():
  """Train the model."""
  batch_size = FLAGS.batch_size * FLAGS.num_gpus
  (model, beam_model, min_length, max_length, checkpoint_dir,
   (train_set, dev_set, en_vocab_path, fr_vocab_path), sv, sess) = initialize()
  with sess.as_default():
    quant_op = model.quantize_op
    max_cur_length = min(min_length + 3, max_length)
    prev_acc_perp = [1000000 for _ in xrange(5)]
    prev_seq_err = 1.0
    is_chief = FLAGS.task < 1
    do_report = False

    # Main traning loop.
    while not sv.ShouldStop():
      global_step, max_cur_length, learning_rate = sess.run(
          [model.global_step, model.cur_length, model.lr])
      acc_loss, acc_l1, acc_total, acc_errors, acc_seq_err = 0.0, 0.0, 0, 0, 0
      acc_grad_norm, step_count, step_c1, step_time = 0.0, 0, 0, 0.0

      # For words in the word vector file, set their embedding at start.
      bound1 = FLAGS.steps_per_checkpoint - 1
      if FLAGS.word_vector_file_en and global_step < bound1 and is_chief:
        assign_vectors(FLAGS.word_vector_file_en, "embedding:0",
                       en_vocab_path, sess)
        if FLAGS.max_target_vocab < 1:
          assign_vectors(FLAGS.word_vector_file_en, "target_embedding:0",
                         en_vocab_path, sess)

      if FLAGS.word_vector_file_fr and global_step < bound1 and is_chief:
        assign_vectors(FLAGS.word_vector_file_fr, "embedding:0",
                       fr_vocab_path, sess)
        if FLAGS.max_target_vocab < 1:
          assign_vectors(FLAGS.word_vector_file_fr, "target_embedding:0",
                         fr_vocab_path, sess)

      for _ in xrange(FLAGS.steps_per_checkpoint):
        step_count += 1
        step_c1 += 1
        global_step = int(model.global_step.eval())
        train_beam_anneal = global_step / float(FLAGS.train_beam_anneal)
        train_beam_freq = FLAGS.train_beam_freq * min(1.0, train_beam_anneal)
        p = random.choice(FLAGS.problem.split("-"))
        train_set = global_train_set[p][-1]
        bucket_id = get_bucket_id(train_buckets_scale[p][-1], max_cur_length,
                                  train_set)
        # Prefer longer stuff 60% of time if not wmt.
        if np.random.randint(100) < 60 and FLAGS.problem != "wmt":
          bucket1 = get_bucket_id(train_buckets_scale[p][-1], max_cur_length,
                                  train_set)
          bucket_id = max(bucket1, bucket_id)

        # Run a step and time it.
        start_time = time.time()
        inp, target = data.get_batch(bucket_id, batch_size, train_set,
                                     FLAGS.height)
        noise_param = math.sqrt(math.pow(global_step + 1, -0.55) *
                                prev_seq_err) * FLAGS.grad_noise_scale
        # In multi-step mode, we use best from beam for middle steps.
        state, new_target, scores, history = None, None, None, []
        while (FLAGS.beam_size > 1 and
               train_beam_freq > np.random.random_sample()):
          # Get the best beam (no training, just forward model).
          new_target, new_first, new_inp, scores = get_best_beam(
              beam_model, sess, inp, target,
              batch_size, FLAGS.beam_size, bucket_id, history, p)
          history.append(new_first)
          # Training step with the previous input and the best beam as target.
          _, _, _, state = model.step(sess, inp, new_target, FLAGS.do_train,
                                      noise_param, update_mem=True, state=state)
          # Change input to the new one for the next step.
          inp = new_inp
          # If all results are great, stop (todo: not to wait for all?).
          if FLAGS.nprint > 1:
            print scores
          if sum(scores) / float(len(scores)) >= 10.0:
            break
        # The final step with the true target.
        loss, res, gnorm, _ = model.step(
            sess, inp, target, FLAGS.do_train, noise_param,
            update_mem=True, state=state)
        step_time += time.time() - start_time
        acc_grad_norm += 0.0 if gnorm is None else float(gnorm)

        # Accumulate statistics.
        acc_loss += loss
        acc_l1 += loss
        errors, total, seq_err = data.accuracy(
            inp, res, target, batch_size, 0, new_target, scores)
        if FLAGS.nprint > 1:
          print "seq_err: ", seq_err
        acc_total += total
        acc_errors += errors
        acc_seq_err += seq_err

        # Report summary every 10 steps.
        if step_count + 3 > FLAGS.steps_per_checkpoint:
          do_report = True  # Don't polute plot too early.
        if is_chief and step_count % 10 == 1 and do_report:
          cur_loss = acc_l1 / float(step_c1)
          acc_l1, step_c1 = 0.0, 0
          cur_perp = data.safe_exp(cur_loss)
          summary = tf.Summary()
          summary.value.extend(
              [tf.Summary.Value(tag="log_perplexity", simple_value=cur_loss),
               tf.Summary.Value(tag="perplexity", simple_value=cur_perp)])
          sv.SummaryComputed(sess, summary, global_step)

      # Normalize and print out accumulated statistics.
      acc_loss /= step_count
      step_time /= FLAGS.steps_per_checkpoint
      acc_seq_err = float(acc_seq_err) / (step_count * batch_size)
      prev_seq_err = max(0.0, acc_seq_err - 0.02)  # No noise at error < 2%.
      acc_errors = float(acc_errors) / acc_total if acc_total > 0 else 1.0
      t_size = float(sum([len(x) for x in train_set])) / float(1000000)
      msg = ("step %d step-time %.2f train-size %.3f lr %.6f grad-norm %.4f"
             % (global_step + 1, step_time, t_size, learning_rate,
                acc_grad_norm / FLAGS.steps_per_checkpoint))
      data.print_out("%s len %d ppl %.6f errors %.2f sequence-errors %.2f" %
                     (msg, max_cur_length, data.safe_exp(acc_loss),
                      100*acc_errors, 100*acc_seq_err))

      # If errors are below the curriculum threshold, move curriculum forward.
      is_good = FLAGS.curriculum_ppx > data.safe_exp(acc_loss)
      is_good = is_good and FLAGS.curriculum_seq > acc_seq_err
      if is_good and is_chief:
        if FLAGS.quantize:
          # Quantize weights.
          data.print_out("  Quantizing parameters.")
          sess.run([quant_op])
        # Increase current length (until the next with training data).
        sess.run(model.cur_length_incr_op)
        # Forget last perplexities if we're not yet at the end.
        if max_cur_length < max_length:
          prev_acc_perp.append(1000000)

      # Lower learning rate if we're worse than the last 5 checkpoints.
      acc_perp = data.safe_exp(acc_loss)
      if acc_perp > max(prev_acc_perp[-5:]) and is_chief:
        sess.run(model.lr_decay_op)
      prev_acc_perp.append(acc_perp)

      # Save checkpoint.
      if is_chief:
        checkpoint_path = os.path.join(checkpoint_dir, "neural_gpu.ckpt")
        model.saver.save(sess, checkpoint_path,
                         global_step=model.global_step)

        # Run evaluation.
        bin_bound = 4
        for p in FLAGS.problem.split("-"):
          total_loss, total_err, tl_counter = 0.0, 0.0, 0
          for bin_id in xrange(len(data.bins)):
            if bin_id < bin_bound or bin_id % FLAGS.eval_bin_print == 1:
              err, _, loss = single_test(bin_id, model, sess, FLAGS.nprint,
                                         batch_size * 4, dev_set, p,
                                         beam_model=beam_model)
              if loss > 0.0:
                total_loss += loss
                total_err += err
                tl_counter += 1
          test_loss = total_loss / max(1, tl_counter)
          test_err = total_err / max(1, tl_counter)
          test_perp = data.safe_exp(test_loss)
          summary = tf.Summary()
          summary.value.extend(
              [tf.Summary.Value(tag="test/%s/loss" % p, simple_value=test_loss),
               tf.Summary.Value(tag="test/%s/error" % p, simple_value=test_err),
               tf.Summary.Value(tag="test/%s/perplexity" % p,
                                simple_value=test_perp)])
          sv.SummaryComputed(sess, summary, global_step)


def linearize(output, rev_fr_vocab, simple_tokenizer=None, eos_id=wmt.EOS_ID):
  # If there is an EOS symbol in outputs, cut them at that point (WMT).
  if eos_id in output:
    output = output[:output.index(eos_id)]
  # Print out French sentence corresponding to outputs.
  if simple_tokenizer or FLAGS.simple_tokenizer:
    vlen = len(rev_fr_vocab)
    def vget(o):
      if o < vlen:
        return rev_fr_vocab[o]
      return "UNK"
    return " ".join([vget(o) for o in output])
  else:
    return wmt.basic_detokenizer([rev_fr_vocab[o] for o in output])


def evaluate():
  """Evaluate an existing model."""
  batch_size = FLAGS.batch_size * FLAGS.num_gpus
  with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
    (model, beam_model, _, _, _,
     (_, dev_set, en_vocab_path, fr_vocab_path), _, sess) = initialize(sess)
    for p in FLAGS.problem.split("-"):
      for bin_id in xrange(len(data.bins)):
        if (FLAGS.task >= 0 and bin_id > 4) or (FLAGS.nprint == 0 and
                                                bin_id > 8 and p == "wmt"):
          break
        single_test(bin_id, model, sess, FLAGS.nprint, batch_size, dev_set, p,
                    beam_model=beam_model)
    path = FLAGS.test_file_prefix
    xid = "" if FLAGS.task < 0 else ("%.4d" % (FLAGS.task+FLAGS.decode_offset))
    en_path, fr_path = path + ".en" + xid, path + ".fr" + xid
    # Evaluate the test file if they exist.
    if path and tf.gfile.Exists(en_path) and tf.gfile.Exists(fr_path):
      data.print_out("Translating test set %s" % en_path)
      # Read lines.
      en_lines, fr_lines = [], []
      with tf.gfile.GFile(en_path, mode="r") as f:
        for line in f:
          en_lines.append(line.strip())
      with tf.gfile.GFile(fr_path, mode="r") as f:
        for line in f:
          fr_lines.append(line.strip())
      # Tokenize and convert to ids.
      en_vocab, _ = wmt.initialize_vocabulary(en_vocab_path)
      _, rev_fr_vocab = wmt.initialize_vocabulary(fr_vocab_path)
      if FLAGS.simple_tokenizer:
        en_ids = [wmt.sentence_to_token_ids(
            l, en_vocab, tokenizer=wmt.space_tokenizer,
            normalize_digits=FLAGS.normalize_digits)
                  for l in en_lines]
      else:
        en_ids = [wmt.sentence_to_token_ids(l, en_vocab) for l in en_lines]
      # Translate.
      results = []
      for idx, token_ids in enumerate(en_ids):
        if idx % 5 == 0:
          data.print_out("Translating example %d of %d." % (idx, len(en_ids)))
        # Which bucket does it belong to?
        buckets = [b for b in xrange(len(data.bins))
                   if data.bins[b] >= len(token_ids)]
        if buckets:
          result, result_cost = [], 100000000.0
          for bucket_id in buckets:
            if data.bins[bucket_id] > MAXLEN_F * len(token_ids) + EVAL_LEN_INCR:
              break
            # Get a 1-element batch to feed the sentence to the model.
            used_batch_size = 1  # batch_size
            inp, target = data.get_batch(
                bucket_id, used_batch_size, None, FLAGS.height,
                preset=([token_ids], [[]]))
            loss, output_logits, _, _ = model.step(
                sess, inp, target, None, beam_size=FLAGS.beam_size)
            outputs = [int(o[0]) for o in output_logits]
            loss = loss[0] - (data.bins[bucket_id] * FLAGS.length_norm)
            if FLAGS.simple_tokenizer:
              cur_out = outputs
              if wmt.EOS_ID in cur_out:
                cur_out = cur_out[:cur_out.index(wmt.EOS_ID)]
              res_tags = [rev_fr_vocab[o] for o in cur_out]
              bad_words, bad_brack = wmt.parse_constraints(token_ids, res_tags)
              loss += 1000.0 * bad_words + 100.0 * bad_brack
            # print (bucket_id, loss)
            if loss < result_cost:
              result = outputs
              result_cost = loss
          final = linearize(result, rev_fr_vocab)
          results.append("%s\t%s\n" % (final, fr_lines[idx]))
          # print result_cost
          sys.stderr.write(results[-1])
          sys.stderr.flush()
        else:
          sys.stderr.write("TOOO_LONG\t%s\n" % fr_lines[idx])
          sys.stderr.flush()
      if xid:
        decode_suffix = "beam%dln%dn" % (FLAGS.beam_size,
                                         int(100 * FLAGS.length_norm))
        with tf.gfile.GFile(path + ".res" + decode_suffix + xid, mode="w") as f:
          for line in results:
            f.write(line)


def mul(l):
  res = 1.0
  for s in l:
    res *= s
  return res


def interactive():
  """Interactively probe an existing model."""
  with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
    # Initialize model.
    (model, _, _, _, _, (_, _, en_path, fr_path), _, _) = initialize(sess)
    # Load vocabularies.
    en_vocab, rev_en_vocab = wmt.initialize_vocabulary(en_path)
    _, rev_fr_vocab = wmt.initialize_vocabulary(fr_path)
    # Print out vectors and variables.
    if FLAGS.nprint > 0 and FLAGS.word_vector_file_en:
      print_vectors("embedding:0", en_path, FLAGS.word_vector_file_en)
    if FLAGS.nprint > 0 and FLAGS.word_vector_file_fr:
      print_vectors("target_embedding:0", fr_path, FLAGS.word_vector_file_fr)
    total = 0
    for v in tf.trainable_variables():
      shape = v.get_shape().as_list()
      total += mul(shape)
      print (v.name, shape, mul(shape))
    print total
    # Start interactive loop.
    sys.stdout.write("Input to Neural GPU Translation Model.\n")
    sys.stdout.write("> ")
    sys.stdout.flush()
    inpt = sys.stdin.readline(), ""
    while inpt:
      cures = []
      # Get token-ids for the input sentence.
      if FLAGS.simple_tokenizer:
        token_ids = wmt.sentence_to_token_ids(
            inpt, en_vocab, tokenizer=wmt.space_tokenizer,
            normalize_digits=FLAGS.normalize_digits)
      else:
        token_ids = wmt.sentence_to_token_ids(inpt, en_vocab)
      print [rev_en_vocab[t] for t in token_ids]
      # Which bucket does it belong to?
      buckets = [b for b in xrange(len(data.bins))
                 if data.bins[b] >= max(len(token_ids), len(cures))]
      if cures:
        buckets = [buckets[0]]
      if buckets:
        result, result_cost = [], 10000000.0
        for bucket_id in buckets:
          if data.bins[bucket_id] > MAXLEN_F * len(token_ids) + EVAL_LEN_INCR:
            break
          glen = 1
          for gen_idx in xrange(glen):
            # Get a 1-element batch to feed the sentence to the model.
            inp, target = data.get_batch(
                bucket_id, 1, None, FLAGS.height, preset=([token_ids], [cures]))
            loss, output_logits, _, _ = model.step(
                sess, inp, target, None, beam_size=FLAGS.beam_size,
                update_mem=False)
            # If it is a greedy decoder, outputs are argmaxes of output_logits.
            if FLAGS.beam_size > 1:
              outputs = [int(o) for o in output_logits]
            else:
              loss = loss[0] - (data.bins[bucket_id] * FLAGS.length_norm)
              outputs = [int(np.argmax(logit, axis=1))
                         for logit in output_logits]
            print [rev_fr_vocab[t] for t in outputs]
            print loss, data.bins[bucket_id]
            print linearize(outputs, rev_fr_vocab)
            cures.append(outputs[gen_idx])
            print cures
            print linearize(cures, rev_fr_vocab)
          if FLAGS.simple_tokenizer:
            cur_out = outputs
            if wmt.EOS_ID in cur_out:
              cur_out = cur_out[:cur_out.index(wmt.EOS_ID)]
            res_tags = [rev_fr_vocab[o] for o in cur_out]
            bad_words, bad_brack = wmt.parse_constraints(token_ids, res_tags)
            loss += 1000.0 * bad_words + 100.0 * bad_brack
          if loss < result_cost:
            result = outputs
            result_cost = loss
        print ("FINAL", result_cost)
        print [rev_fr_vocab[t] for t in result]