Beispiel #1
0
def main(_):
    data_path = os.path.join(FLAGS.datadir,
                             "training-monolingual.tokenized.shuffled/*")
    distribute_batch_size = FLAGS.batch_size * autodist._resource_spec.num_gpus
    with tf.Graph().as_default(), autodist.scope():
        train_dataset = gen_lm1b_train_dataset(data_path, FLAGS.num_steps)
        train_dataset = train_dataset.batch(FLAGS.batch_size)
        train_iterator = train_dataset.make_one_shot_iterator().get_next()

        model = language_model.LM(FLAGS.num_steps)
        # TODO (Hao): need to improve this.
        train_step = autodist.function(model.train_step)

        prev_time = time.time()
        for local_step in range(FLAGS.max_steps):
            loss, _ = train_step(train_iterator)
            if local_step % FLAGS.log_frequency == 0:
                cur_time = time.time()
                elapsed_time = cur_time - prev_time
                num_words = distribute_batch_size * FLAGS.log_frequency
                wps = float(num_words) / elapsed_time
                print(
                    "Iteration %d, time = %.2fs, wps = %.0f, train loss = %.4f"
                    % (local_step, cur_time - prev_time, wps, loss))
                prev_time = cur_time
def build_model():
  model = language_model.LM(FLAGS.num_steps)
  global_step = tf.train.get_or_create_global_step()

  with tf.device('/gpu:0'):
    placeholder_x = tf.placeholder(tf.int32, [FLAGS.batch_size, FLAGS.num_steps])
    placeholder_y = tf.placeholder(tf.int32, [FLAGS.batch_size, FLAGS.num_steps])
    placeholder_w = tf.placeholder(tf.int32, [FLAGS.batch_size, FLAGS.num_steps])
    initial_state_c = tf.placeholder(dtype=tf.float32,
                                     shape=[FLAGS.batch_size, model.state_size],
                                     name='initial_c')
    initial_state_h = tf.placeholder(dtype=tf.float32,
                                     shape=[FLAGS.batch_size, model.projected_size],
                                     name='initial_h')
    loss, final_state_c, final_state_h = model(placeholder_x, placeholder_y, placeholder_w, initial_state_c, initial_state_h, training=True)
    scaled_loss = loss * FLAGS.num_steps

    emb_vars = list(model.emb)
    lstm_vars = [model.W, model.B, model.W_P]
    softmax_vars = list(model.softmax_w) + [model.softmax_b]
    all_vars = emb_vars + lstm_vars + softmax_vars
    grads = tf.gradients(scaled_loss, all_vars)

    emb_grads = grads[:len(emb_vars)]
    emb_grads = [tf.IndexedSlices(grad.values * FLAGS.batch_size,
                                  grad.indices,
                                  grad.dense_shape) for grad in emb_grads]

    lstm_grads = grads[len(emb_vars):len(emb_vars) + len(lstm_vars)]
    lstm_grads, _ = tf.clip_by_global_norm(lstm_grads, FLAGS.max_grad_norm)

    softmax_grads = grads[len(emb_vars) + len(lstm_vars):]

    clipped_grads = emb_grads + lstm_grads + softmax_grads
    grads_and_vars = list(zip(clipped_grads, all_vars))

    optimizer = tf.train.AdagradOptimizer(FLAGS.learning_rate, initial_accumulator_value=1.0)
    train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

    ema = tf.train.ExponentialMovingAverage(decay=0.999)
    with tf.control_dependencies([train_op]):
      train_op = ema.apply(lstm_vars)

  model.global_step = global_step
  model.loss = loss
  model.train_op = train_op

  model.final_state_c = final_state_c
  model.final_state_h = final_state_h

  model.initial_state_c = initial_state_c
  model.initial_state_h = initial_state_h

  model.x = placeholder_x
  model.y = placeholder_y
  model.w = placeholder_w

  return model
Beispiel #3
0
def main(argv):
    data_path = os.path.join(FLAGS.datadir, "training-monolingual.tokenized.shuffled/*")

    train_dataset = gen_lm1b_train_dataset(data_path, FLAGS.num_steps)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    model = language_model.LM(FLAGS.num_steps)
    prev_time = time.time()
    for local_step, input_data in enumerate(train_dataset.take(10)):
        loss, _ = model.train_step(input_data)
        if local_step % FLAGS.log_frequency == 0:
            cur_time = time.time()
            elapsed_time = cur_time - prev_time
            num_words = FLAGS.batch_size * FLAGS.log_frequency
            wps = float(num_words) / elapsed_time
            logging.info("Iteration %d, time = %.2fs, wps = %.0f, train loss = %.4f" % (
                local_step, cur_time - prev_time, wps, loss))
            prev_time = cur_time
Beispiel #4
0
def main():
    with tf.Graph().as_default() as g:
        with tf.device('/gpu:0'):
            model = lm1b_model_graph.LM(FLAGS.num_steps)
            placeholder_x = tf.placeholder(tf.int32,
                                           [FLAGS.batch_size, FLAGS.num_steps])
            placeholder_y = tf.placeholder(tf.int32,
                                           [FLAGS.batch_size, FLAGS.num_steps])
            placeholder_w = tf.placeholder(tf.int32,
                                           [FLAGS.batch_size, FLAGS.num_steps])
            initial_state_c = tf.placeholder(
                dtype=tf.float32,
                shape=[FLAGS.batch_size, model.state_size],
                name='initial_c')
            initial_state_h = tf.placeholder(
                dtype=tf.float32,
                shape=[FLAGS.batch_size, model.projected_size],
                name='initial_h')
            loss, final_state_c, final_state_h = model(placeholder_x,
                                                       placeholder_y,
                                                       placeholder_w,
                                                       initial_state_c,
                                                       initial_state_h,
                                                       training=False)

        ema = tf.train.ExponentialMovingAverage(decay=0.999)
        lstm_vars = tf.trainable_variables()[-3:]
        avg_dict = ema.variables_to_restore(lstm_vars)
        new_dict = {}
        for key, value in avg_dict.items():
            new_dict[new_names[key]] = value
        saver = tf.train.Saver(new_dict)
        ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
        with tf.Session(config=config) as sess:
            for i in range(len(ckpt.all_model_checkpoint_paths)):
                if i % FLAGS.evaluate_every_nth_ckpt != 0:
                    continue
                evaluate(sess, loss, final_state_c, final_state_h,
                         placeholder_x, placeholder_y, placeholder_w,
                         initial_state_c, initial_state_h, saver, ckpt, i)