Esempio n. 1
0
def main(_):
    FLAGS.alsologtostderr = True

    # Make training dataset.
    train_data = iter(
        dataset.load(tfds.Split.TRAIN,
                     batch_size=FLAGS.train_batch_size,
                     sequence_length=FLAGS.sequence_length))

    # Make evaluation dataset(s).
    eval_data = {  # pylint: disable=g-complex-comprehension
        split: iter(
            dataset.load(split,
                         batch_size=FLAGS.eval_batch_size,
                         sequence_length=FLAGS.sequence_length))
        for split in [tfds.Split.TRAIN, tfds.Split.TEST]
    }

    # Make loss, sampler, and optimizer.
    params_init, loss_fn = hk.without_apply_rng(hk.transform(sequence_loss))
    _, sample_fn = hk.without_apply_rng(hk.transform(sample))
    opt_init, _ = make_optimizer()

    loss_fn = jax.jit(loss_fn)
    sample_fn = jax.jit(sample_fn, static_argnums=[3])

    # Initialize training state.
    rng = hk.PRNGSequence(FLAGS.seed)
    initial_params = params_init(next(rng), next(train_data))
    initial_opt_state = opt_init(initial_params)
    state = TrainingState(params=initial_params, opt_state=initial_opt_state)

    # Training loop.
    for step in tqdm(range(FLAGS.training_steps + 1)):
        # Do a batch of SGD.
        train_batch = next(train_data)
        state = update(state, train_batch)

        # Periodically generate samples.
        if step % FLAGS.sampling_interval == 0:
            context = train_batch[
                'input'][:, 0]  # First element of training batch.
            assert context.ndim == 1
            rng_key = next(rng)
            samples = sample_fn(state.params, rng_key, context,
                                FLAGS.sample_length)

            prompt = dataset.decode(context)
            continuation = dataset.decode(samples)

            #logging.info('Prompt: %s', prompt)
            #logging.info('Continuation: %s', continuation)

        # Periodically evaluate training and test loss.
        if step % FLAGS.evaluation_interval == 0:
            for split, ds in eval_data.items():
                eval_batch = next(ds)
                loss = loss_fn(state.params, eval_batch)
Esempio n. 2
0
def main(_):
    flags.FLAGS.alsologtostderr = True

    # Make training dataset.
    train_data = dataset.load(tfds.Split.TRAIN,
                              batch_size=TRAIN_BATCH_SIZE.value,
                              sequence_length=SEQUENCE_LENGTH.value)

    # Make evaluation dataset(s).
    eval_data = {  # pylint: disable=g-complex-comprehension
        split: dataset.load(split,
                            batch_size=EVAL_BATCH_SIZE.value,
                            sequence_length=SEQUENCE_LENGTH.value)
        for split in [tfds.Split.TRAIN, tfds.Split.TEST]
    }

    # Make loss, sampler, and optimizer.
    params_init, loss_fn = hk.without_apply_rng(hk.transform(sequence_loss))
    _, sample_fn = hk.without_apply_rng(hk.transform(sample))
    opt_init, _ = make_optimizer()

    loss_fn = jax.jit(loss_fn)
    sample_fn = jax.jit(sample_fn, static_argnums=[3])

    # Initialize training state.
    rng = hk.PRNGSequence(SEED.value)
    initial_params = params_init(next(rng), next(train_data))
    initial_opt_state = opt_init(initial_params)
    state = TrainingState(params=initial_params, opt_state=initial_opt_state)

    # Training loop.
    for step in range(TRAINING_STEPS.value + 1):
        # Do a batch of SGD.
        train_batch = next(train_data)
        state = update(state, train_batch)

        # Periodically generate samples.
        if step % SAMPLING_INTERVAL.value == 0:
            context = train_batch[
                'input'][:, 0]  # First element of training batch.
            assert context.ndim == 1
            rng_key = next(rng)
            samples = sample_fn(state.params, rng_key, context,
                                SAMPLE_LENGTH.value)

            prompt = dataset.decode(context)
            continuation = dataset.decode(samples)

            logging.info('Prompt: %s', prompt)
            logging.info('Continuation: %s', continuation)

        # Periodically evaluate training and test loss.
        if step % EVALUATION_INTERVAL.value == 0:
            for split, ds in eval_data.items():
                eval_batch = next(ds)
                loss = loss_fn(state.params, eval_batch)
                logging.info({
                    'step': step,
                    'loss': float(loss),
                    'split': split,
                })