예제 #1
0
def rouge_l(eval_sentences,
            eval_len,
            ref_sentences,
            ref_len,
            vocab,
            use_bpe=False):
    """Rouge-L.

  Args:
    eval_sentences: prediction to be evaluated.
    eval_len: lengths of the predictions.
    ref_sentences: reference sentences.
    ref_len: lengths of the references.
    vocab: vocabulary.
    use_bpe: to use BPE or not.

  Returns:
    Rouge-L
  """
    f1_scores = []
    for e, el, r, rl in zip(eval_sentences, eval_len, ref_sentences, ref_len):
        e = id2text(e[:el], vocab=vocab, use_bpe=use_bpe).split()
        r = r[:rl]
        r = [x for x in r if x not in SPECIAL_TOKENS]
        lcs = _len_lcs(e, r)
        f1_scores.append(_f_lcs(lcs, len(r), len(e)))
    return np.mean(f1_scores, dtype=np.float32)
예제 #2
0
def rouge_n(eval_sentences,
            eval_len,
            ref_sentences,
            ref_len,
            n,
            vocab,
            use_bpe=False,
            predict_mode=False):
    """Rouge N."""
    f1_scores = []
    for e, el, r, rl in zip(eval_sentences, eval_len, ref_sentences, ref_len):
        e = id2text(e[:el], vocab=vocab, use_bpe=use_bpe).split()
        r = r[:rl]
        e = [x for x in e if x not in SPECIAL_TOKENS]
        r = [x for x in r if x not in SPECIAL_TOKENS]

        if n == 1 and predict_mode:
            tf.logging.info("prediction: %s", " ".join(e))
            tf.logging.info("reference: %s", " ".join(r))

        eval_ngrams = _get_ngrams(n, e)
        ref_ngrams = _get_ngrams(n, r)
        ref_count = len(ref_ngrams)
        eval_count = len(eval_ngrams)

        overlapping_ngrams = eval_ngrams.intersection(ref_ngrams)
        overlapping_count = len(overlapping_ngrams)

        precision = _safe_divide(overlapping_count, eval_count)
        recall = _safe_divide(overlapping_count, ref_count)
        f1_scores.append(
            _safe_divide(2 * precision * recall, precision + recall))
    return np.mean(f1_scores, dtype=np.float32)
예제 #3
0
 def _do_print(n, sequence, lengths, to_txt):
   if to_txt:
     s = sequence[0][:lengths[0]]
     output = id2text(s, vocab, use_bpe=use_bpe)
   else:
     output = " ".join(sequence[0])
   if not predict_mode:
     tf.logging.info("%s: %s", n, output)
예제 #4
0
def remove_repetitive_trigram(preds, lengths, vocab, hps):
  """Select from the beam a prediction without repetitive trigrams."""
  ret_preds, ret_lengths = [], []
  for (pred, length) in zip(preds, lengths):
    flag = True
    for i in xrange(hps.beam_width):
      l = length[Ellipsis, i]
      p = pred[Ellipsis, i][:l]
      tokens = data.id2text(p, vocab=vocab, use_bpe=hps.use_bpe).split()
      flag = repetitive_ngrams(3, tokens) or bad_tok(tokens, vocab)
      if not flag:
        ret_preds.append(pred[Ellipsis, i])
        ret_lengths.append(length[Ellipsis, i])
        break
    if flag:
      ret_preds.append(pred[Ellipsis, 0])
      ret_lengths.append(length[Ellipsis, 0])

  predictions = np.int32(np.stack(ret_preds)), np.int32(np.stack(ret_lengths))
  return predictions
예제 #5
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    if FLAGS.model == "seq2seq":
        assert FLAGS.rnn_cell == "lstm"
        assert FLAGS.att_type != "hyper"
    if FLAGS.model == "hypernet" and FLAGS.rank != FLAGS.decoder_dim:
        print("WARNING: recommended rank value: decoder_dim.")
    if FLAGS.att_neighbor:
        assert FLAGS.neighbor_dim == FLAGS.encoder_dim or FLAGS.att_type == "my"

    if FLAGS.use_copy or FLAGS.att_neighbor:
        assert FLAGS.att_type == "my"
    # These numbers are the target vocabulary sizes of the datasets.
    # It allows for using different vocabularies for source and targets,
    # following the implementation in Open-NMT.
    # I will later put these into command line arguments.
    if FLAGS.use_bpe:
        if FLAGS.dataset == "nyt":
            output_size = 10013
        elif FLAGS.dataset == "giga":
            output_size = 24654
        elif FLAGS.dataset == "cnnd":
            output_size = 10232
    else:
        if FLAGS.dataset == "nyt":
            output_size = 68885
        elif FLAGS.dataset == "giga":
            output_size = 107389
        elif FLAGS.dataset == "cnnd":
            output_size = 21000

    vocab = data.Vocab(FLAGS.vocab_path, FLAGS.vocab_size, FLAGS.dataset)
    hps = tf.contrib.training.HParams(
        sample_neighbor=FLAGS.sample_neighbor,
        use_cluster=FLAGS.use_cluster,
        binary_neighbor=FLAGS.binary_neighbor,
        att_neighbor=FLAGS.att_neighbor,
        encode_neighbor=FLAGS.encode_neighbor,
        sum_neighbor=FLAGS.sum_neighbor,
        dataset=FLAGS.dataset,
        rnn_cell=FLAGS.rnn_cell,
        output_size=output_size + vocab.offset,
        train_path=FLAGS.train_path,
        dev_path=FLAGS.dev_path,
        tie_embedding=FLAGS.tie_embedding,
        use_bpe=FLAGS.use_bpe,
        use_copy=FLAGS.use_copy,
        reuse_attention=FLAGS.reuse_attention,
        use_bridge=FLAGS.use_bridge,
        use_residual=FLAGS.use_residual,
        att_type=FLAGS.att_type,
        random_neighbor=FLAGS.random_neighbor,
        num_neighbors=FLAGS.num_neighbors,
        model=FLAGS.model,
        trainer=FLAGS.trainer,
        learning_rate=FLAGS.learning_rate,
        lr_schedule=FLAGS.lr_schedule,
        total_steps=FLAGS.total_steps,
        emb_dim=FLAGS.emb_dim,
        binary_dim=FLAGS.binary_dim,
        neighbor_dim=FLAGS.neighbor_dim,
        drop=FLAGS.drop,
        emb_drop=FLAGS.emb_drop,
        out_drop=FLAGS.out_drop,
        encoder_drop=FLAGS.encoder_drop,
        decoder_drop=FLAGS.decoder_drop,
        weight_decay=FLAGS.weight_decay,
        encoder_dim=FLAGS.encoder_dim,
        num_encoder_layers=FLAGS.num_encoder_layers,
        decoder_dim=FLAGS.decoder_dim,
        num_decoder_layers=FLAGS.num_decoder_layers,
        num_mlp_layers=FLAGS.num_mlp_layers,
        rank=FLAGS.rank,
        sigma_norm=FLAGS.sigma_norm,
        batch_size=FLAGS.batch_size,
        sampling_probability=FLAGS.sampling_probability,
        beam_width=FLAGS.beam_width,
        max_enc_steps=FLAGS.max_enc_steps,
        max_dec_steps=FLAGS.max_dec_steps,
        vocab_size=FLAGS.vocab_size,
        max_grad_norm=FLAGS.max_grad_norm,
        length_norm=FLAGS.length_norm,
        cp=FLAGS.coverage_penalty,
        predict_mode=FLAGS.predict_mode)

    run_config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir)

    vocab = data.Vocab(FLAGS.vocab_path, FLAGS.vocab_size, FLAGS.dataset)
    eval_input_fn = partial(data.input_function,
                            is_train=False,
                            vocab=vocab,
                            hps=hps)

    estimator = tf.estimator.Estimator(model_fn=partial(
        model_function.model_function, vocab=vocab, hps=hps),
                                       config=run_config,
                                       model_dir=run_config.model_dir)
    results = estimator.predict(input_fn=eval_input_fn)

    with tf.gfile.Open("%s/prediction" % FLAGS.model_dir, "w") as fout:
        for result in results:
            outputs, _ = result["outputs"], result["lengths"]
            prediction = data.id2text(outputs, vocab, use_bpe=FLAGS.use_bpe)
            fout.write(prediction + "\n")