def train(dataset, dataset_train, dataset_test, latent_dims, latent_length, latent_alphabet_size, alphabet_size, bt_scale, b0_scale, u_conc, r_conc, l_conc, max_epochs, shuffle_buffer, batch_size, optimizer_name, learning_rate, mc_samples, dtype, writer, out_folder): """Main training loop.""" # Set up the variational approximation RegressMuE_variational, trainable_variables = build_RegressMuE_variational( latent_dims, latent_length, latent_alphabet_size, alphabet_size, u_conc, r_conc, l_conc, dtype=dtype) # Make transfer matrices. transfer_mats = mue.make_transfer(latent_length, dtype=dtype) # Run training loop. RegressMuE_variational, trainable_variables = train_loop( dataset_train, RegressMuE_variational, trainable_variables, latent_dims, latent_length, latent_alphabet_size, alphabet_size, transfer_mats, bt_scale, b0_scale, u_conc, r_conc, l_conc, max_epochs, shuffle_buffer, batch_size, optimizer_name, learning_rate, dtype, writer, out_folder) # Run evaluation loop. heldout_perplex, heldout_likelihood = evaluate_loop( dataset_test, RegressMuE_variational, latent_dims, latent_length, latent_alphabet_size, alphabet_size, transfer_mats, bt_scale, b0_scale, u_conc, r_conc, l_conc, mc_samples, dtype, writer) return (RegressMuE_variational, trainable_variables, heldout_perplex, heldout_likelihood)
def build_FactorMuE_variational(latent_dims, latent_length, latent_alphabet_size, alphabet_size, u_conc, r_conc, l_conc, padded_data_length, z_distr='Normal', dtype=tf.float32): """Build complete variational approximation.""" # Get individual generators. uc, rc, lc = get_prior_conc(latent_length, latent_alphabet_size, alphabet_size, u_conc, r_conc, l_conc, dtype=dtype) enc_transfer_mats = mue.make_transfer(padded_data_length - 1, dtype=dtype) QZ, qz_params = build_trainable_infnet(latent_dims, latent_length, latent_alphabet_size, alphabet_size, padded_data_length, enc_transfer_mats, z_distr=z_distr, name="qz", dtype=dtype) QW, qbt_params = build_trainable_normal( [2, latent_dims, latent_length + 1, latent_alphabet_size], name="qbt", dtype=dtype) QB, qb0_params = build_trainable_normal( [2, latent_length + 1, latent_alphabet_size], name="qb0", dtype=dtype) QU, qu_params = build_trainable_dirichlet(uc, name="qu", dtype=dtype) QR, qr_params = build_trainable_dirichlet(rc, name="qr", dtype=dtype) QL, ql_params = build_trainable_dirichlet(lc, name="ql", dtype=dtype) # Consolidate trainable parameters. parameters = (qz_params + qbt_params + qb0_params + qu_params + qr_params + ql_params) # Construct generator for complete variational approximation. def FactorMuE_variational(x=None): if x is None: return QW(), QB(), QU(), QR(), QL() else: return QZ(x), QW(), QB(), QU(), QR(), QL() # Return variational approximation generator and parameters. return FactorMuE_variational, parameters
def test_single_alignment_mode(): dtype = tf.float64 latent_dims = 2 latent_length = 4 latent_alphabet_size, alphabet_size = 3, 3 q_conc = tf.convert_to_tensor([10, 1], dtype=dtype) r_conc = tf.convert_to_tensor([10, 1], dtype=dtype) l_conc = tf.convert_to_tensor(2., dtype=dtype) padded_data_length = 5 FactorMuE_variational, parameters = FactorMuE.build_FactorMuE_variational( latent_dims, latent_length, latent_alphabet_size, alphabet_size, q_conc, r_conc, l_conc, padded_data_length, z_distr='Normal', dtype=dtype) x = tf.convert_to_tensor(np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1], [1, 0, 0], [1 / 3, 1 / 3, 1 / 3]]), dtype=dtype) xlen = 4 transfer_mats = mue.make_transfer(latent_length, dtype=dtype) w_scale = tf.convert_to_tensor(1., dtype=dtype) b_scale = tf.convert_to_tensor(1., dtype=dtype) pmode_oh = FactorMuE.single_alignment_mode(FactorMuE_variational, x, xlen, latent_dims, latent_length, latent_alphabet_size, alphabet_size, transfer_mats, w_scale, b_scale, q_conc, r_conc, l_conc, mc_samples=5, dtype=dtype) assert pmode_oh.shape[0] == xlen assert pmode_oh.shape[1] == 2 * (latent_length + 1)
def project_latent_to_sequence(zs, RegressMuE_variational, latent_dims, latent_length, latent_alphabet_size, alphabet_size, bt_scale, b0_scale, u_conc, r_conc, l_conc, z_covar=None, x=None, xlen=None, mc_samples=1, dtype=tf.float32): """Project from latent space to sequence space.""" # Make transfer matrices. transfer_mats = mue.make_transfer(latent_length, dtype=dtype) if x is not None: # Get alignment projection matrix. pmode_oh = single_alignment_mode( RegressMuE_variational, z_covar, x, xlen, latent_dims, latent_length, latent_alphabet_size, alphabet_size, transfer_mats, bt_scale, b0_scale, u_conc, r_conc, l_conc, mc_samples=mc_samples, dtype=dtype) else: # Project onto conserved positions. pmode_oh = tf.convert_to_tensor( mue.mg2k(np.arange(latent_length), 0)[:, None] == np.arange(2*latent_length + 2)[None, :], dtype=dtype) xlen = latent_length # Store results. nus = [tf.zeros([xlen, alphabet_size], dtype=dtype) for iz, z in enumerate(zs)] for rep in range(mc_samples): # Sample from variational approximation. qbt, qb0, qu, qr, ql = RegressMuE_variational() for iz, z in enumerate(zs): # Condition on latent space position. with condition(bt=qbt, b0=qb0, u=qu, r=qr, l=ql): posterior_predictive = RegressMuE( z, latent_dims, latent_length, latent_alphabet_size, alphabet_size, xlen, transfer_mats, bt_scale, b0_scale, u_conc, r_conc, l_conc, dtype=dtype) # Compute latent-sequence space observation. latseq = tf.exp( posterior_predictive.distribution.observation_distribution.logits) # Project to sequence space. nus[iz] += tf.matmul(pmode_oh, latseq) / mc_samples return nus
def train(dataset, dataset_train, dataset_test, padded_data_length, latent_dims, latent_length, latent_alphabet_size, alphabet_size, bt_scale, b0_scale, u_conc, r_conc, l_conc, z_distr, anneal_epochs, max_epochs, shuffle_buffer, batch_size, optimizer_name, learning_rate, mc_samples, dtype, writer, out_folder): """Main training loop.""" # Set up the variational approximation FactorMuE_variational, trainable_variables = build_FactorMuE_variational( latent_dims, latent_length, latent_alphabet_size, alphabet_size, u_conc, r_conc, l_conc, padded_data_length, z_distr=z_distr, dtype=dtype) # Make transfer matrices. transfer_mats = mue.make_transfer(latent_length, dtype=dtype) # Run training loop. FactorMuE_variational, trainable_variables = train_loop( dataset_train, FactorMuE_variational, trainable_variables, latent_dims, latent_length, latent_alphabet_size, alphabet_size, transfer_mats, bt_scale, b0_scale, u_conc, r_conc, l_conc, z_distr, anneal_epochs, max_epochs, shuffle_buffer, batch_size, optimizer_name, learning_rate, dtype, writer, out_folder) # Run evaluation loop. heldout_perplex, heldout_likelihood = evaluate_loop( dataset_test, FactorMuE_variational, latent_dims, latent_length, latent_alphabet_size, alphabet_size, transfer_mats, bt_scale, b0_scale, u_conc, r_conc, l_conc, z_distr, mc_samples, dtype, writer) # Create embedding. embed_mean, embed_std = embed(dataset, FactorMuE_variational, latent_dims, dtype) return (FactorMuE_variational, trainable_variables, heldout_perplex, heldout_likelihood, embed_mean, embed_std)
def test_MuE_encode(): dtype = tf.float32 x = tf.convert_to_tensor(np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1], [1, 0, 0], [1 / 3, 1 / 3, 1 / 3]]), dtype=dtype) qln0 = tf.convert_to_tensor([10, 0], dtype=dtype) rln0 = tf.convert_to_tensor([10, 0], dtype=dtype) lln0 = tf.convert_to_tensor([[10, 0, 0, 0], [0, 10, 0, 0], [0, 0, 0, 10]], dtype=dtype) latent_length = 4 latent_alphabet_size = 4 alphabet_size = 3 transfer_mats = mue.make_transfer(latent_length, dtype=dtype) padded_data_length = 5 eps = 1e-32 chk_enc = mue.encode(x, qln0, rln0, lln0, latent_length, latent_alphabet_size, alphabet_size, padded_data_length, transfer_mats, dtype, eps) tst_enc = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0]]) assert np.allclose(chk_enc.numpy(), tst_enc, atol=1e-3, rtol=1e-3)
def test_get_hmm_parameters(): np.random.seed(10) dtype = tf.float64 M = 4 transfer_mats = mue.make_transfer(M, dtype=dtype) q1 = np.random.rand(M + 1) q = tf.convert_to_tensor(np.concatenate([(1 - q1)[:, None], q1[:, None]], axis=1), dtype=tf.float64) r1 = np.random.rand(M + 1) r = tf.convert_to_tensor(np.concatenate([(1 - r1)[:, None], r1[:, None]], axis=1), dtype=tf.float64) s = tf.random.uniform((M + 1, 4), dtype=dtype) s = s / tf.reduce_sum(s, axis=1, keepdims=True) c = tf.random.uniform((M + 1, 4), dtype=dtype) c = c / tf.reduce_sum(c, axis=1, keepdims=True) a0ln, aln, eln = mue.make_hmm_params(tf.math.log(s), tf.math.log(c), tf.math.log(q), tf.math.log(r), None, transfer_mats, eps=1e-32, dtype=dtype) # - Remake transition matrices. - q1[-1] = 1e-32 K = 2 * (M + 1) chk_a = np.zeros((K, K)) chk_a0 = np.zeros((K, )) m, g = -1, 0 for mp in range(M + 1): for gp in range(2): kp = mue.mg2k(mp, gp) if m + 1 - g == mp and gp == 0: chk_a0[kp] = (1 - r1[m + 1 - g]) * (1 - q1[m + 1 - g]) elif m + 1 - g < mp and gp == 0: chk_a0[kp] = ((1 - r1[m + 1 - g]) * q1[m + 1 - g] * np.prod([(1 - r1[mpp]) * q1[mpp] for mpp in range(m + 2 - g, mp)]) * (1 - r1[mp]) * (1 - q1[mp])) elif m + 1 - g == mp and gp == 1: chk_a0[kp] = r1[m + 1 - g] elif m + 1 - g < mp and gp == 1: chk_a0[kp] = ((1 - r1[m + 1 - g]) * q1[m + 1 - g] * np.prod([(1 - r1[mpp]) * q1[mpp] for mpp in range(m + 2 - g, mp)]) * r1[mp]) for m in range(M + 1): for g in range(2): k = mue.mg2k(m, g) for mp in range(M + 1): for gp in range(2): kp = mue.mg2k(mp, gp) if m + 1 - g == mp and gp == 0: chk_a[k, kp] = (1 - r1[m + 1 - g]) * (1 - q1[m + 1 - g]) elif m + 1 - g < mp and gp == 0: chk_a[k, kp] = ( (1 - r1[m + 1 - g]) * q1[m + 1 - g] * np.prod([(1 - r1[mpp]) * q1[mpp] for mpp in range(m + 2 - g, mp)]) * (1 - r1[mp]) * (1 - q1[mp])) elif m + 1 - g == mp and gp == 1: chk_a[k, kp] = r1[m + 1 - g] elif m + 1 - g < mp and gp == 1: chk_a[k, kp] = ( (1 - r1[m + 1 - g]) * q1[m + 1 - g] * np.prod([(1 - r1[mpp]) * q1[mpp] for mpp in range(m + 2 - g, mp)]) * r1[mp]) elif m == M and mp == M and g == 0 and gp == 0: chk_a[k, kp] = 1. chk_e = np.zeros((2 * (M + 1), 4), dtype=np.float64) for m in range(M + 1): for g in range(2): k = mue.mg2k(m, g) if g == 0: chk_e[k, :] = s[m, :].numpy() else: chk_e[k, :] = c[m, :].numpy() # - - assert np.allclose(chk_a0, tf.math.exp(a0ln).numpy()) assert np.allclose(chk_a, tf.math.exp(aln).numpy()) assert np.allclose(chk_e, tf.math.exp(eln).numpy()) # Check normalization. assert np.allclose(tf.reduce_sum(tf.math.exp(a0ln)).numpy(), 1., atol=1e-3, rtol=1e-3) assert np.allclose(tf.reduce_sum(tf.math.exp(aln), axis=1).numpy()[:-1], tf.ones(2 * (M + 1) - 1, dtype=dtype).numpy(), atol=1e-3, rtol=1e-3)