Example #1
0
def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False):
    """Compute BLEU for two files (reference and hypothesis translation)."""
    ref_lines = tokenizer.native_to_unicode(
        tf.io.gfile.GFile(ref_filename).read()).strip().splitlines()
    # tf.io.gfile.GFile(ref_filename).read()).splitlines()
    hyp_lines = tokenizer.native_to_unicode(
        tf.io.gfile.GFile(hyp_filename).read()).strip().splitlines()
    # tf.io.gfile.GFile(hyp_filename).read()).splitlines()
    print(
        '*******************************************************************')
    print(ref_filename, len(ref_lines))
    print(hyp_filename, len(hyp_lines))
    print(
        '*******************************************************************')
    if len(ref_lines) != len(hyp_lines):

        logging.info(
            "Reference and translation files have different number of "
            "lines (%d VS %d). If training only a few steps (100-200), the "
            "translation may be empty." % (len(ref_lines), len(hyp_lines)))
        return 0
        # raise ValueError(
        #     "Reference and translation files have different number of "
        #     "lines (%d VS %d). If training only a few steps (100-200), the "
        #     "translation may be empty." % (len(ref_lines), len(hyp_lines)))
    if not case_sensitive:
        ref_lines = [x.lower() for x in ref_lines]
        hyp_lines = [x.lower() for x in hyp_lines]
    ref_tokens = [bleu_tokenize(x) for x in ref_lines]
    hyp_tokens = [bleu_tokenize(x) for x in hyp_lines]
    return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100
Example #2
0
def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False):
  """Compute BLEU for two files (reference and hypothesis translation)."""
  ref_lines = tf.gfile.Open(ref_filename).read().strip().splitlines()
  hyp_lines = tf.gfile.Open(hyp_filename).read().strip().splitlines()

  if len(ref_lines) != len(hyp_lines):
    raise ValueError("Reference and translation files have different number of "
                     "lines.")
  if not case_sensitive:
    ref_lines = [x.lower() for x in ref_lines]
    hyp_lines = [x.lower() for x in hyp_lines]
  ref_tokens = [bleu_tokenize(x) for x in ref_lines]
  hyp_tokens = [bleu_tokenize(x) for x in hyp_lines]
  return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100
Example #3
0
def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False):
  """Compute BLEU for two files (reference and hypothesis translation)."""
  ref_lines = tf.gfile.Open(ref_filename).read().strip().splitlines()
  hyp_lines = tf.gfile.Open(hyp_filename).read().strip().splitlines()

  if len(ref_lines) != len(hyp_lines):
    raise ValueError("Reference and translation files have different number of "
                     "lines.")
  if not case_sensitive:
    ref_lines = [x.lower() for x in ref_lines]
    hyp_lines = [x.lower() for x in hyp_lines]
  ref_tokens = [bleu_tokenize(x) for x in ref_lines]
  hyp_tokens = [bleu_tokenize(x) for x in hyp_lines]
  return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100
def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False):
    """compute BLEU for two files."""
    print("Compute BLEU score between two files.")
    with open(ref_filename) as f1:
        ref_lines = f1.read().strip().splitlines()
    with open(hyp_filename) as f2:
        hyp_lines = f2.read().strip().splitlines()

    if len(ref_lines) != len(hyp_lines):
        raise ValueError("Reference and translation files have diffenrent number of lines")

    if not case_sensitive:
        ref_lines = [x.lower() for x in ref_lines]
        hyp_lines = [x.lower() for x in hyp_lines]

    ref_tokens = [bleu_tokenize(x) for x in ref_lines]
    hyp_tokens = [bleu_tokenize(x) for x in hyp_lines]

    return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100
Example #5
0
 def result(self):
     return metrics.compute_bleu(self.labels, self.translations) * 100
Example #6
0
def train(params):
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        # calculate the learning rate schedule
        learning_rate = get_learning_rate(params.learning_rate,
                                          params.hidden_size,
                                          params.learning_rate_warmup_steps,
                                          global_step)

        optimizer = tf.contrib.opt.LazyAdamOptimizer(
            learning_rate,
            beta1=params.optimizer_adam_beta1,
            beta2=params.optimizer_adam_beta2,
            epsilon=params.optimizer_adam_epsilon)

        # get src,tgt sentence for each model tower
        my_dataset = dataset.Dataset(params)
        # src, tgt = my_dataset.train_input_fn(params)
        # batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue(
        #     [src, tgt], capacity=2 * flags_obj.num_gpus
        # )
        train_iterator = my_dataset.train_input_fn(params)
        valid_iterator = my_dataset.eval_input_fn(params)

        tower_grads = []
        g_tower_grads = []
        model = transformer_5.Transformer(params, is_train=True)
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            #tf.logging.info(tf.get_variable_scope())
            for i in xrange(flags_obj.num_gpus):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
                        tf.logging.info("Build graph on gpu:{}".format(i))
                        loss, g_loss, rewards_mb = gan_tower_loss(
                            scope, model, train_iterator)
                        # Reuse variables for the next tower.
                        # tf.get_variable_scope().reuse_variables()
                        # Retain the summaries from the final tower.
                        summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                      scope)

                        grads = optimizer.compute_gradients(loss)
                        g_grads = optimizer.compute_gradients(g_loss)
                        #for var, grad in grads:
                        #    tf.logging.info(var)
                        tf.logging.info(
                            "total trainable variables number: {}".format(
                                len(grads)))
                        tower_grads.append(grads)
                        g_tower_grads.append(g_grads)

                    if i == 0 and valid_iterator:
                        #with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
                        # valid_loss_op = tower_loss(scope, valid_iterator)
                        #val_pred, val_target = evaluation(valid_iterator)
                        val_loss_op, val_logits_op, val_tgt_op = evaluation(
                            model, valid_iterator)
                        summaries.append(
                            tf.summary.scalar("val_loss", val_loss_op))

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        if len(tower_grads) > 1:
            grads = average_gradients(tower_grads)
            g_grads = average_gradients(g_tower_grads)
        else:
            grads = tower_grads[0]
            g_grads = g_tower_grads[0]

        # Add a summary to track the learning rate.
        summaries.append(tf.summary.scalar('learning_rate', learning_rate))

        # Add histograms for gradients.
        for grad, var in grads:
            if grad is not None:
                summaries.append(
                    tf.summary.histogram(var.op.name + '/gradients', grad))

        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = optimizer.apply_gradients(grads,
                                                      global_step=global_step)
        g_apply_gradient_op = optimizer.apply_gradients(
            g_grads, global_step=global_step)

        # Add histograms for trainable variables.
        for var in tf.trainable_variables():
            summaries.append(tf.summary.histogram(var.op.name, var))

        # Track the moving averages of all trainable variables.
        #variable_averages = tf.train.ExponentialMovingAverage(
        #    MOVING_AVERAGE_DECAY, global_step)
        #variables_averages_op = variable_averages.apply(tf.trainable_variables())

        # Group all updates to into a single train op.
        # train_op = tf.group(apply_gradient_op, variables_averages_op)
        train_op = tf.group(apply_gradient_op, g_apply_gradient_op)

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=20)

        # Build the summary operation from the last tower summaries.
        summary_op = tf.summary.merge(summaries)

        # Build an initialization operation to run below.
        init = tf.global_variables_initializer()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU
        # implementations.
        sess_config = tf.ConfigProto()
        sess_config.gpu_options.allow_growth = True
        sess_config.allow_soft_placement = True

        with tf.Session(config=sess_config) as sess:
            sess.run(init)
            sess.run(tf.local_variables_initializer())

            sess.run(train_iterator.initializer)

            #ckpt = tf.train.latest_checkpoint(flags_obj.pretrain_dir)
            ckpt = tf.train.latest_checkpoint(flags_obj.model_dir)
            tf.logging.info("ckpt {}".format(ckpt))
            if ckpt and tf.train.checkpoint_exists(ckpt):
                tf.logging.info(
                    "Reloading model parameters..from {}".format(ckpt))
                saver.restore(sess, ckpt)
            else:
                tf.logging.info("Create a new model...{}".format(
                    flags_obj.pretrain_dir))

            # Start the queue runners.
            tf.train.start_queue_runners(sess=sess)
            summary_writer = tf.summary.FileWriter(flags_obj.model_dir,
                                                   sess.graph)

            best_bleu = 0.0
            for step in xrange(flags_obj.train_steps):
                start_time = time.time()
                _, loss_value, g_loss_value, rewards_mb_value, baseline_value, total_rewards_value = sess.run(
                    [
                        train_op, loss, g_loss, rewards_mb, model.baseline,
                        model.total_rewards
                    ])
                tf.logging.info(
                    "step = {}, step_g_loss = {:.4f}, step_loss = {:.4f}".
                    format(step, g_loss_value, loss_value))
                duration = time.time() - start_time

                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'

                if step % 100 == 0:
                    num_examples_per_step = flags_obj.batch_size * flags_obj.num_gpus
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = duration / flags_obj.num_gpus

                    tf.logging.info(
                        "step = {}, step_g_loss = {:.4f}, step_loss = {:.4f}, reward_mb = {}, baseline = {}, total_rewards = {}"
                        .format(step, g_loss_value, loss_value,
                                rewards_mb_value[:5], baseline_value[:5],
                                total_rewards_value[:5]))

                if step % 100 == 0:
                    summary_str = sess.run(summary_op)
                    summary_writer.add_summary(summary_str, step)

                if step % flags_obj.steps_between_evals == 0:
                    sess.run(valid_iterator.initializer)
                    tf.logging.info(
                        "-------------------- Validation step ...{} -------------------------- ----------"
                        .format(step))
                    total_bleu = 0.0
                    total_size = 0
                    total_loss = 0.0
                    while True:
                        try:
                            val_loss, val_logit, val_tgt = sess.run(
                                [val_loss_op, val_logits_op, val_tgt_op])
                            val_pred = np.argmax(val_logit, axis=-1)
                            val_bleu = metrics.compute_bleu(val_tgt, val_pred)
                            batch_size = val_pred.shape[0]
                            total_bleu += val_bleu * batch_size
                            total_loss += val_loss * batch_size
                            total_size += batch_size
                            tf.logging.info(
                                "pairs shape {}, {}, step_bleu: {:.5f}, step_loss: {:.4f}"
                                .format(val_pred.shape, val_tgt.shape,
                                        val_bleu, val_loss))
                        except tf.errors.OutOfRangeError:
                            pred_string = array_to_string(val_pred[-1])
                            tgt_string = array_to_string(val_tgt[-1])
                            tf.logging.info(
                                "prediction:\n{}".format(pred_string))
                            tf.logging.info("target:\n{}".format(tgt_string))
                            tf.logging.info(
                                "Finished going through the valid dataset")
                            break
                    total_bleu /= total_size
                    total_loss /= total_size
                    tf.logging.info(
                        "{}, Step: {}, Valid loss: {:.6f}, Valid bleu : {:.6f}"
                        .format(datetime.now(), step, total_loss, total_bleu))
                    tf.logging.info(
                        "--------------------- Finish evaluation -----------------------------------------------------"
                    )
                    # Save the model checkpoint periodically.
                    if step == 0:
                        total_bleu = 0.0

                    if total_bleu > best_bleu:
                        best_bleu = total_bleu
                        checkpoint_path = os.path.join(flags_obj.model_dir,
                                                       'model.ckpt')
                        saver.save(sess, checkpoint_path, global_step=step)
                        tf.logging.info(
                            "Saving model at {}".format(checkpoint_path + "-" +
                                                        str(step)))
Example #7
0
def train(params):
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        g_model, d_model, train_return, valid_return, dataset_iter = build_graph(
            params)
        train_op, global_step, g_loss, xen_loss, rewards, learning_rate, \
            init_step, roll_mean_loss, real_mean_loss = train_return
        val_pred, val_tgt, val_src = valid_return
        train_iterator, valid_iterator = dataset_iter

        vars_to_update = tf.global_variables()
        print("total variables number is %i" % len(vars_to_update))
        update_op = train_helper.update_checkpoint(vars_to_update,
                                                   replace_from="Transformer",
                                                   replace_to="Discriminator")

        saver = tf.train.Saver(tf.global_variables(), max_to_keep=20)

        sess_config = tf.ConfigProto()
        sess_config.gpu_options.allow_growth = True
        sess_config.allow_soft_placement = True

        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())

            sess.run(train_iterator.initializer)

            # reload the parameters
            print(flags_obj.pretrain_dir)
            ckpt = tf.train.latest_checkpoint(flags_obj.pretrain_dir)
            tf.logging.info("ckpt {}".format(ckpt))
            if ckpt and tf.train.checkpoint_exists(ckpt):
                tf.logging.info(
                    "Reloading model parameters..from {}".format(ckpt))
                variables = tf.global_variables()
                var_keep_dic = train_helper.get_variables_in_checkpoint_file(
                    ckpt)
                variables_to_restore = []
                for v in variables:
                    if v.name.split(':')[0] in var_keep_dic:
                        variables_to_restore.append(v)
                restorer = tf.train.Saver(variables_to_restore)
                restorer.restore(sess, ckpt)
            else:
                tf.logging.info("Create a new model...{}".format(
                    flags_obj.model_dir))

            tf.train.start_queue_runners(sess=sess)
            summary_writer = tf.summary.FileWriter(flags_obj.model_dir,
                                                   sess.graph)

            best_bleu = 0.0
            sess.run(update_op)
            for step in xrange(init_step, flags_obj.train_steps):
                g_steps_per_iter = 5
                for g_step in range(g_steps_per_iter):
                    _, x_loss_value, g_loss_value, rewards_value, roll_loss, real_loss = sess.run(
                        [
                            train_op, xen_loss, g_loss, rewards,
                            roll_mean_loss, real_mean_loss
                        ],
                        feed_dict={
                            g_model.dropout_rate: 0.0,
                            d_model.dropout_rate: 0.1
                        })

                    assert not np.isnan(
                        g_loss_value), 'Model diverged with loss = NaN'
                    assert not np.isnan(
                        x_loss_value), 'Model diverged with loss = NaN'

                    if step % 50 == 0:
                        tf.logging.info(
                            "step = {}, g_loss = {:.4f}, x_loss = {:.4f}, roll_loss = {:.4f}, "
                            "real_loss = {:.4f}, reward = {}".format(
                                step, g_loss_value, x_loss_value, roll_loss,
                                real_loss, rewards_value[:5]))

                # train discriminator
                sess.run(update_op)

                if step % flags_obj.steps_between_evals == 0:
                    sess.run(valid_iterator.initializer)
                    tf.logging.info(
                        "------------- Validation step ...{} -----------".
                        format(step))
                    total_bleu = 0.0
                    total_size = 0
                    while True:
                        try:
                            val_tgt_np, val_src_np, val_pred_np = sess.run(
                                [val_tgt, val_src, val_pred],
                                feed_dict={
                                    g_model.dropout_rate: 0.0,
                                    d_model.dropout_rate: 0.0
                                })
                            val_bleu = metrics.compute_bleu(
                                val_tgt_np, val_pred_np)
                            batch_size = val_pred_np.shape[0]
                            total_bleu += val_bleu * batch_size
                            total_size += batch_size
                        except tf.errors.OutOfRangeError:
                            break
                    total_bleu /= total_size
                    tf.logging.info("{}, Step: {}, Valid bleu : {:.6f}".format(
                        datetime.now(), step, total_bleu))

                    tf.logging.info(
                        "--------------------- Finish evaluation ---------------------"
                    )
                    # Save the model checkpoint periodically.
                    if total_bleu > best_bleu:
                        best_bleu = total_bleu
                        checkpoint_path = os.path.join(flags_obj.model_dir,
                                                       'model.ckpt')
                        saver.save(sess, checkpoint_path, global_step=step)
                        tf.logging.info(
                            "Saving model at {}".format(checkpoint_path + "-" +
                                                        str(step)))
def train(params):
    with tf.Graph().as_default():
        if tf.train.latest_checkpoint(flags_obj.model_dir):
            global_step_value = int(
                tf.train.latest_checkpoint(flags_obj.model_dir).split("-")[-1])
            global_step = tf.Variable(initial_value=global_step_value,
                                      dtype=tf.int32,
                                      trainable=False)
            print(
                "right here!",
                int(
                    tf.train.latest_checkpoint(
                        flags_obj.model_dir).split("-")[-1]))
        else:
            global_step_value = 0
            global_step = tf.get_variable(
                'global_step', [],
                initializer=tf.constant_initializer(0),
                trainable=False)
        learning_rate = get_learning_rate(params.learning_rate,
                                          params.hidden_size,
                                          params.learning_rate_warmup_steps,
                                          global_step)

        optimizer = tf.contrib.opt.LazyAdamOptimizer(
            learning_rate,
            beta1=params.optimizer_adam_beta1,
            beta2=params.optimizer_adam_beta2,
            epsilon=params.optimizer_adam_epsilon)

        my_dataset = dataset.Dataset(params)

        train_iterator = my_dataset.train_input_fn(params)
        valid_iterator = my_dataset.eval_input_fn(params)

        tower_grads = []
        g_model = transformer_9.Transformer(params,
                                            is_train=True,
                                            mode=None,
                                            scope="Transformer")
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            for i in xrange(flags_obj.num_gpus):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
                        tf.logging.info("Build graph on gpu:{}".format(i))
                        logits = g_model.inference(train_iterator.source,
                                                   train_iterator.target)
                        xentropy, weights = metrics.padded_cross_entropy_loss(
                            logits, train_iterator.target,
                            params.label_smoothing, params.target_vocab_size)
                        loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)
                        summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                      scope)
                        grads = optimizer.compute_gradients(loss)
                        tf.logging.info(
                            "total trainable variables number: {}".format(
                                len(grads)))
                        tower_grads.append(grads)
                    if i == 0 and valid_iterator:
                        valid_pred = g_model.inference(
                            inputs=valid_iterator.source,
                            targets=None)["outputs"]
                        valid_tgt = valid_iterator.target
                        valid_src = valid_iterator.source

        if len(tower_grads) > 1:
            grads = average_gradients(tower_grads)
        else:
            grads = tower_grads[0]
        summaries.append(tf.summary.scalar('learning_rate', learning_rate))
        for grad, var in grads:
            if grad is not None:
                summaries.append(
                    tf.summary.histogram(var.op.name + '/gradients', grad))
        apply_gradient_op = optimizer.apply_gradients(grads,
                                                      global_step=global_step)
        for var in tf.trainable_variables():
            summaries.append(tf.summary.histogram(var.op.name, var))
        train_op = apply_gradient_op

        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=20)

        init = tf.global_variables_initializer()
        sess_config = tf.ConfigProto()
        sess_config.gpu_options.allow_growth = True
        sess_config.allow_soft_placement = True

        with tf.Session(config=sess_config) as sess:
            sess.run(init)
            sess.run(tf.local_variables_initializer())

            sess.run(train_iterator.initializer)

            ckpt = tf.train.latest_checkpoint(flags_obj.model_dir)
            tf.logging.info("ckpt {}".format(ckpt))
            if ckpt and tf.train.checkpoint_exists(ckpt):
                tf.logging.info(
                    "Reloading model parameters..from {}".format(ckpt))
                saver.restore(sess, ckpt)
            else:
                tf.logging.info("create a new model...{}".format(
                    flags_obj.model_dir))
            tf.train.start_queue_runners(sess=sess)
            summary_writer = tf.summary.FileWriter(flags_obj.model_dir,
                                                   sess.graph)

            count = 0
            best_bleu = 0.0
            for step in xrange(global_step_value, flags_obj.train_steps):
                _, loss_value, lr_value = sess.run(
                    [train_op, loss, learning_rate],
                    feed_dict={g_model.dropout_rate: 0.1})
                if step % 200 == 0:
                    tf.logging.info(
                        "step: {}, loss = {:.4f}, lr = {:5f}".format(
                            step, loss_value, lr_value))

                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'

                if step < 10000:
                    steps_between_evals = 2000
                else:
                    steps_between_evals = 1000
                if step % steps_between_evals == 0:
                    sess.run(valid_iterator.initializer)
                    tf.logging.info(
                        "------------------ Evaluation bleu -------------------------"
                    )
                    total_bleu = 0.0
                    total_size = 0
                    while True:
                        try:
                            val_pred, val_tgt, val_src = sess.run(
                                [valid_pred, valid_tgt, valid_src],
                                feed_dict={g_model.dropout_rate: 0.0})
                            val_bleu = metrics.compute_bleu(val_tgt, val_pred)
                            batch_size = val_pred.shape[0]
                            total_bleu += val_bleu * batch_size
                            total_size += batch_size
                        except tf.errors.OutOfRangeError:
                            break
                    total_bleu /= total_size
                    tf.logging.info("{}, Step: {}, Valid bleu : {:.6f}".format(
                        datetime.now(), step, total_bleu))
                    tf.logging.info(
                        "--------------------- Finish evaluation ------------------------"
                    )
                    # Save the model checkpoint periodically.
                    if step == 0:
                        total_bleu = 0.0

                    if total_bleu > best_bleu:
                        best_bleu = total_bleu
                        checkpoint_path = os.path.join(flags_obj.model_dir,
                                                       'model.ckpt')
                        saver.save(sess, checkpoint_path, global_step=step)
                        tf.logging.info(
                            "Saving model at {}".format(checkpoint_path + "-" +
                                                        str(step)))
                    elif total_bleu + 0.003 > best_bleu:
                        checkpoint_path = os.path.join(flags_obj.model_dir,
                                                       'model.ckpt')
                        saver.save(sess, checkpoint_path, global_step=step)
                        tf.logging.info(
                            "Saving model at {}".format(checkpoint_path + "-" +
                                                        str(step)))
                    else:
                        count += 1
                        # early stop
                        if count > 5:
                            break
            tf.logging.info("Best bleu is {}".format(best_bleu))