Ejemplo n.º 1
0
def evaluate_loop(dataset, FactorMuE_variational, latent_dims, latent_length,
                  latent_alphabet_size, alphabet_size, transfer_mats, bt_scale,
                  b0_scale, u_conc, r_conc, l_conc, z_distr, mc_samples, dtype,
                  writer):
    """Evaluate heldout log likelihood and perplexity."""

    data_size = sum(1 for i in dataset.batch(1))
    heldout_likelihood = 0.
    heldout_log_perplex = 0.
    for x_batch, xlen_batch in dataset.batch(1):
        x = x_batch[0]
        xlen = xlen_batch[0]

        log_likelihood = 0.
        for rep in range(mc_samples):
            # Sample from variational approximation.
            qz, qbt, qb0, qu, qr, ql = FactorMuE_variational(x)

            # Forward pass.
            with tape() as model_tape:
                # Condition on variational sample.
                with condition(z=qz, bt=qbt, b0=qb0, u=qu, r=qr, l=ql):
                    posterior_predictive = FactorMuE(latent_dims,
                                                     latent_length,
                                                     latent_alphabet_size,
                                                     alphabet_size,
                                                     xlen,
                                                     transfer_mats,
                                                     bt_scale,
                                                     b0_scale,
                                                     u_conc,
                                                     r_conc,
                                                     l_conc,
                                                     z_distr=z_distr,
                                                     dtype=dtype)

            # Compute likelihood term.
            log_likelihood += mue.hmm_log_prob(
                posterior_predictive.distribution, x, xlen) / mc_samples

        # Compute KL(vi posterior||prior) for local parameters.
        kl_local = 0.
        for rv_name, variational_rv in [("z", qz)]:
            kl_local += tf.reduce_sum(
                variational_rv.distribution.kl_divergence(
                    model_tape[rv_name].distribution))

        # Summary
        local_elbo = log_likelihood - kl_local
        heldout_likelihood += local_elbo
        heldout_log_perplex -= local_elbo / tf.cast(xlen * data_size, dtype)

    # Compute perplexity.
    heldout_perplex = tf.exp(heldout_log_perplex)

    # Record.
    with writer.as_default():
        tf.summary.scalar('perplexity', heldout_perplex, step=1)

    return heldout_perplex, heldout_likelihood
Ejemplo n.º 2
0
def evaluate_loop(dataset, RegressMuE_variational,
                  latent_dims, latent_length,
                  latent_alphabet_size, alphabet_size,
                  transfer_mats,  bt_scale, b0_scale, u_conc, r_conc, l_conc,
                  mc_samples, dtype, writer):

    """Evaluate heldout log likelihood and perplexity."""

    data_size = sum(1 for i in dataset.batch(1))
    heldout_likelihood = 0.
    heldout_log_perplex = 0.
    for z_batch, x_batch, xlen_batch in dataset.batch(1):
        z = z_batch[0]
        x = x_batch[0]
        xlen = xlen_batch[0]

        log_likelihood = 0.
        for rep in range(mc_samples):
            # Sample from variational approximation.
            qbt, qb0, qu, qr, ql = RegressMuE_variational()

            # Condition on variational sample.
            with condition(bt=qbt, b0=qb0, u=qu, r=qr, l=ql):
                posterior_predictive = RegressMuE(
                        z, latent_dims, latent_length,
                        latent_alphabet_size, alphabet_size,
                        xlen, transfer_mats, bt_scale, b0_scale,
                        u_conc, r_conc, l_conc, dtype=dtype)

            # Compute likelihood term.
            log_likelihood += mue.hmm_log_prob(
                    posterior_predictive.distribution, x, xlen) / mc_samples

        # Summary
        local_elbo = log_likelihood
        heldout_likelihood += local_elbo
        heldout_log_perplex -= local_elbo/tf.cast(xlen * data_size, dtype)

    # Compute perplexity.
    heldout_perplex = tf.exp(heldout_log_perplex)

    # Record.
    with writer.as_default():
        tf.summary.scalar('perplexity', heldout_perplex, step=1)

    return heldout_perplex, heldout_likelihood
Ejemplo n.º 3
0
def test_hmm_log_prob():

    a0 = np.array([0.9, 0.08, 0.02])
    a = np.array([[0.1, 0.8, 0.1], [0.5, 0.3, 0.2], [0.4, 0.4, 0.2]])
    e = np.array([[0.99, 0.01], [0.01, 0.99], [0.5, 0.5]])

    model = tfpd.HiddenMarkovModel(
        tfpd.Categorical(
            logits=tf.math.log(tf.convert_to_tensor(np.matmul(a0, a)))),
        tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(a))),
        tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(e))), 5)

    x = tf.convert_to_tensor(
        np.array([[0., 1.], [1., 0.], [0., 1.], [0., 1.], [1., 0.], [0.5,
                                                                     0.5]]))
    xlen = tf.convert_to_tensor(5)

    chk_lp = mue.hmm_log_prob(model, x, xlen)

    f = np.matmul(a0, a) * e[:, 1]
    f = np.matmul(f, a) * e[:, 0]
    f = np.matmul(f, a) * e[:, 1]
    f = np.matmul(f, a) * e[:, 1]
    f = np.matmul(f, a) * e[:, 0]
    tst_lp = np.log(np.sum(f))

    assert np.allclose(chk_lp.numpy(), tst_lp)

    # Check against (predictably incorrect) tensorflow probability
    # implementation.
    model = tfpd.HiddenMarkovModel(
        tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(a0))),
        tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(a))),
        tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(e))), 5)
    xcat = tf.convert_to_tensor([1, 0, 1, 1, 0])
    tst_lp2 = model.log_prob(xcat)

    assert np.allclose(chk_lp.numpy(), tst_lp2.numpy())
Ejemplo n.º 4
0
def train_loop(dataset, RegressMuE_variational, trainable_variables,
               latent_dims, latent_length, latent_alphabet_size, alphabet_size,
               transfer_mats,  bt_scale, b0_scale, u_conc, r_conc, l_conc,
               max_epochs, shuffle_buffer, batch_size,
               optimizer_name, learning_rate, dtype, writer, out_folder):

    """Training loop."""
    # Set up the optimizer.
    optimizer = getattr(tf.keras.optimizers, optimizer_name)(
                                    learning_rate=learning_rate)
    data_size = sum(1 for i in dataset.batch(1))

    # Training loop.
    step = 0
    t0 = datetime.datetime.now()
    for epoch in range(max_epochs):
        for z_batch, x_batch, xlen_batch in dataset.shuffle(
                                    shuffle_buffer).batch(batch_size):

            step += 1

            accum_gradients = [tf.zeros(elem.shape, dtype=dtype)
                               for elem in trainable_variables]
            accum_elbo = 0.
            ix = -1
            for z, x, xlen in zip(z_batch, x_batch, xlen_batch):
                ix += 1

                # Track gradients.
                with tf.GradientTape() as gtape:
                    # Sample from variational approximation.
                    qbt, qb0, qu, qr, ql = RegressMuE_variational()
                    # Forward pass.
                    with tape() as model_tape:
                        # Condition on variational sample.
                        with condition(bt=qbt, b0=qb0, u=qu, r=qr, l=ql):
                            posterior_predictive = RegressMuE(
                                    z, latent_dims, latent_length,
                                    latent_alphabet_size, alphabet_size,
                                    xlen, transfer_mats, bt_scale, b0_scale,
                                    u_conc, r_conc, l_conc, dtype=dtype)

                    # Compute likelihood term.
                    log_likelihood = mue.hmm_log_prob(
                            posterior_predictive.distribution, x, xlen)

                    # Compute KL(vi posterior||prior) for global parameters.
                    kl_global = 0.
                    if ix == x_batch.shape[0] - 1:
                        for rv_name, variational_rv in [
                                    ("bt", qbt), ("b0", qb0),
                                    ("u", qu), ("r", qr), ("l", ql)]:
                            kl_global += tf.reduce_sum(
                                    variational_rv.distribution.kl_divergence(
                                        model_tape[rv_name].distribution))

                    # Compute the ELBO term, correcting for subsampling.
                    elbo = ((data_size/x_batch.shape[0])
                            * log_likelihood - kl_global)

                    # Compute gradient.
                    loss = -elbo
                    gradients = gtape.gradient(loss, trainable_variables)

                # Accumulate elbo and gradients.
                accum_elbo += elbo
                for gi, grad in enumerate(gradients):
                    if grad is not None:
                        accum_gradients[gi] += grad

            # Record.
            with writer.as_default():
                tf.summary.scalar('elbo', accum_elbo, step=step)

            # Optimization step.
            optimizer.apply_gradients(
                    zip(accum_gradients, trainable_variables))

        print('epoch {} ({})'.format(epoch, datetime.datetime.now() - t0))

    return RegressMuE_variational, trainable_variables