Ejemplo n.º 1
0
def evaluate_loop(dataset, FactorMuE_variational, latent_dims, latent_length,
                  latent_alphabet_size, alphabet_size, transfer_mats, bt_scale,
                  b0_scale, u_conc, r_conc, l_conc, z_distr, mc_samples, dtype,
                  writer):
    """Evaluate heldout log likelihood and perplexity."""

    data_size = sum(1 for i in dataset.batch(1))
    heldout_likelihood = 0.
    heldout_log_perplex = 0.
    for x_batch, xlen_batch in dataset.batch(1):
        x = x_batch[0]
        xlen = xlen_batch[0]

        log_likelihood = 0.
        for rep in range(mc_samples):
            # Sample from variational approximation.
            qz, qbt, qb0, qu, qr, ql = FactorMuE_variational(x)

            # Forward pass.
            with tape() as model_tape:
                # Condition on variational sample.
                with condition(z=qz, bt=qbt, b0=qb0, u=qu, r=qr, l=ql):
                    posterior_predictive = FactorMuE(latent_dims,
                                                     latent_length,
                                                     latent_alphabet_size,
                                                     alphabet_size,
                                                     xlen,
                                                     transfer_mats,
                                                     bt_scale,
                                                     b0_scale,
                                                     u_conc,
                                                     r_conc,
                                                     l_conc,
                                                     z_distr=z_distr,
                                                     dtype=dtype)

            # Compute likelihood term.
            log_likelihood += mue.hmm_log_prob(
                posterior_predictive.distribution, x, xlen) / mc_samples

        # Compute KL(vi posterior||prior) for local parameters.
        kl_local = 0.
        for rv_name, variational_rv in [("z", qz)]:
            kl_local += tf.reduce_sum(
                variational_rv.distribution.kl_divergence(
                    model_tape[rv_name].distribution))

        # Summary
        local_elbo = log_likelihood - kl_local
        heldout_likelihood += local_elbo
        heldout_log_perplex -= local_elbo / tf.cast(xlen * data_size, dtype)

    # Compute perplexity.
    heldout_perplex = tf.exp(heldout_log_perplex)

    # Record.
    with writer.as_default():
        tf.summary.scalar('perplexity', heldout_perplex, step=1)

    return heldout_perplex, heldout_likelihood
Ejemplo n.º 2
0
def project_latent_to_sequence(zs, RegressMuE_variational, latent_dims,
                               latent_length, latent_alphabet_size,
                               alphabet_size, bt_scale, b0_scale, u_conc,
                               r_conc, l_conc, z_covar=None, x=None, xlen=None,
                               mc_samples=1, dtype=tf.float32):
    """Project from latent space to sequence space."""

    # Make transfer matrices.
    transfer_mats = mue.make_transfer(latent_length, dtype=dtype)

    if x is not None:
        # Get alignment projection matrix.
        pmode_oh = single_alignment_mode(
                      RegressMuE_variational, z_covar, x, xlen, latent_dims,
                      latent_length, latent_alphabet_size, alphabet_size,
                      transfer_mats, bt_scale, b0_scale, u_conc, r_conc,
                      l_conc, mc_samples=mc_samples, dtype=dtype)

    else:
        # Project onto conserved positions.
        pmode_oh = tf.convert_to_tensor(
                mue.mg2k(np.arange(latent_length), 0)[:, None]
                == np.arange(2*latent_length + 2)[None, :], dtype=dtype)
        xlen = latent_length

    # Store results.
    nus = [tf.zeros([xlen, alphabet_size], dtype=dtype)
           for iz, z in enumerate(zs)]

    for rep in range(mc_samples):
        # Sample from variational approximation.
        qbt, qb0, qu, qr, ql = RegressMuE_variational()
        for iz, z in enumerate(zs):
            # Condition on latent space position.
            with condition(bt=qbt, b0=qb0, u=qu, r=qr, l=ql):
                posterior_predictive = RegressMuE(
                         z, latent_dims, latent_length,
                         latent_alphabet_size, alphabet_size,
                         xlen, transfer_mats, bt_scale, b0_scale,
                         u_conc, r_conc, l_conc, dtype=dtype)

            # Compute latent-sequence space observation.
            latseq = tf.exp(
             posterior_predictive.distribution.observation_distribution.logits)
            # Project to sequence space.
            nus[iz] += tf.matmul(pmode_oh, latseq) / mc_samples
    return nus
Ejemplo n.º 3
0
def evaluate_loop(dataset, RegressMuE_variational,
                  latent_dims, latent_length,
                  latent_alphabet_size, alphabet_size,
                  transfer_mats,  bt_scale, b0_scale, u_conc, r_conc, l_conc,
                  mc_samples, dtype, writer):

    """Evaluate heldout log likelihood and perplexity."""

    data_size = sum(1 for i in dataset.batch(1))
    heldout_likelihood = 0.
    heldout_log_perplex = 0.
    for z_batch, x_batch, xlen_batch in dataset.batch(1):
        z = z_batch[0]
        x = x_batch[0]
        xlen = xlen_batch[0]

        log_likelihood = 0.
        for rep in range(mc_samples):
            # Sample from variational approximation.
            qbt, qb0, qu, qr, ql = RegressMuE_variational()

            # Condition on variational sample.
            with condition(bt=qbt, b0=qb0, u=qu, r=qr, l=ql):
                posterior_predictive = RegressMuE(
                        z, latent_dims, latent_length,
                        latent_alphabet_size, alphabet_size,
                        xlen, transfer_mats, bt_scale, b0_scale,
                        u_conc, r_conc, l_conc, dtype=dtype)

            # Compute likelihood term.
            log_likelihood += mue.hmm_log_prob(
                    posterior_predictive.distribution, x, xlen) / mc_samples

        # Summary
        local_elbo = log_likelihood
        heldout_likelihood += local_elbo
        heldout_log_perplex -= local_elbo/tf.cast(xlen * data_size, dtype)

    # Compute perplexity.
    heldout_perplex = tf.exp(heldout_log_perplex)

    # Record.
    with writer.as_default():
        tf.summary.scalar('perplexity', heldout_perplex, step=1)

    return heldout_perplex, heldout_likelihood
Ejemplo n.º 4
0
def single_alignment_mode(RegressMuE_variational, z, x, xlen, latent_dims,
                          latent_length, latent_alphabet_size, alphabet_size,
                          transfer_mats, bt_scale, b0_scale, u_conc, r_conc,
                          l_conc, mc_samples=1, z_distr='Normal',
                          dtype=tf.float32):
    """Align example sequence (eg. for plotting on structure)."""
    # Sample from variational approximation.
    qbt, qb0, qu, qr, ql = RegressMuE_variational()

    pmode_tuples = []
    for rep in range(mc_samples):
        # Condition on variational sample.
        with condition(bt=qbt.distribution.sample(),
                       b0=qb0.distribution.sample(),
                       u=qu.distribution.sample(),
                       r=qr.distribution.sample(), l=ql.distribution.sample()):
            posterior_predictive = RegressMuE(
                    z, latent_dims, latent_length,
                    latent_alphabet_size, alphabet_size,
                    xlen, transfer_mats, bt_scale, b0_scale,
                    u_conc, r_conc, l_conc, dtype=dtype)

        # Compute posterior mode.
        pmode = posterior_predictive.distribution.posterior_mode(x[:xlen, :])

        # Convert to tuple.
        pmode_tuples.append(tuple(pmode.numpy()))

    # Get mode (MAP).
    pmode_MAP = mue.get_most_common_tuple(pmode_tuples)
    print('{} unique alignment(s) out of {} Monte Carlo samples'.format(
          len(set(pmode_tuples)), mc_samples))

    # One hot encode (seq position x hidden state)
    pmode_oh = (tf.convert_to_tensor(pmode_MAP, dtype=tf.int32)[:, None]
                == tf.range(
      posterior_predictive.distribution.observation_distribution.logits.shape[
        0], dtype=tf.int32)[None, :])

    return tf.cast(pmode_oh, dtype=dtype)
Ejemplo n.º 5
0
def train_loop(dataset, RegressMuE_variational, trainable_variables,
               latent_dims, latent_length, latent_alphabet_size, alphabet_size,
               transfer_mats,  bt_scale, b0_scale, u_conc, r_conc, l_conc,
               max_epochs, shuffle_buffer, batch_size,
               optimizer_name, learning_rate, dtype, writer, out_folder):

    """Training loop."""
    # Set up the optimizer.
    optimizer = getattr(tf.keras.optimizers, optimizer_name)(
                                    learning_rate=learning_rate)
    data_size = sum(1 for i in dataset.batch(1))

    # Training loop.
    step = 0
    t0 = datetime.datetime.now()
    for epoch in range(max_epochs):
        for z_batch, x_batch, xlen_batch in dataset.shuffle(
                                    shuffle_buffer).batch(batch_size):

            step += 1

            accum_gradients = [tf.zeros(elem.shape, dtype=dtype)
                               for elem in trainable_variables]
            accum_elbo = 0.
            ix = -1
            for z, x, xlen in zip(z_batch, x_batch, xlen_batch):
                ix += 1

                # Track gradients.
                with tf.GradientTape() as gtape:
                    # Sample from variational approximation.
                    qbt, qb0, qu, qr, ql = RegressMuE_variational()
                    # Forward pass.
                    with tape() as model_tape:
                        # Condition on variational sample.
                        with condition(bt=qbt, b0=qb0, u=qu, r=qr, l=ql):
                            posterior_predictive = RegressMuE(
                                    z, latent_dims, latent_length,
                                    latent_alphabet_size, alphabet_size,
                                    xlen, transfer_mats, bt_scale, b0_scale,
                                    u_conc, r_conc, l_conc, dtype=dtype)

                    # Compute likelihood term.
                    log_likelihood = mue.hmm_log_prob(
                            posterior_predictive.distribution, x, xlen)

                    # Compute KL(vi posterior||prior) for global parameters.
                    kl_global = 0.
                    if ix == x_batch.shape[0] - 1:
                        for rv_name, variational_rv in [
                                    ("bt", qbt), ("b0", qb0),
                                    ("u", qu), ("r", qr), ("l", ql)]:
                            kl_global += tf.reduce_sum(
                                    variational_rv.distribution.kl_divergence(
                                        model_tape[rv_name].distribution))

                    # Compute the ELBO term, correcting for subsampling.
                    elbo = ((data_size/x_batch.shape[0])
                            * log_likelihood - kl_global)

                    # Compute gradient.
                    loss = -elbo
                    gradients = gtape.gradient(loss, trainable_variables)

                # Accumulate elbo and gradients.
                accum_elbo += elbo
                for gi, grad in enumerate(gradients):
                    if grad is not None:
                        accum_gradients[gi] += grad

            # Record.
            with writer.as_default():
                tf.summary.scalar('elbo', accum_elbo, step=step)

            # Optimization step.
            optimizer.apply_gradients(
                    zip(accum_gradients, trainable_variables))

        print('epoch {} ({})'.format(epoch, datetime.datetime.now() - t0))

    return RegressMuE_variational, trainable_variables