예제 #1
0
def predict(y, phi_gmm, encoder_layers, decoder_layers, seed=0):
    """
    Args:
        y: data to cluster and reconstruct
        phi_gmm: latent phi param
        encoder_layers: encoder NN architecture
        decoder_layers: encoder NN architecture
        seed: random seed

    Returns:
        reconstructed y and most probable cluster allocation
    """
    with tf.name_scope('prediction'):
        # encode (reusing current encoder parameters)
        nb_samples = 1
        phi_enc = vae.make_encoder(y, layerspecs=encoder_layers)

        # predict cluster allocation and sample latent variables (e-step)
        x_k_samples, log_r_nk, _, _ = e_step(phi_enc, phi_gmm, nb_samples, name="svae_e_step_predict", seed=seed)
        x_samples = subsample_x(x_k_samples, log_r_nk, seed)[:, 0, :]

        # decode (reusing current decoder parameters)
        y_mean, _ = vae.make_decoder(x_samples, layerspecs=decoder_layers)

        return tf.tuple((y_mean, tf.argmax(log_r_nk, axis=1)), name='prediction')
예제 #2
0
def inference(y, phi_gmm, encoder_layers, decoder_layers, nb_samples=10, stddev_init_nn=0.01, seed=0, name='inference',
              param_device='/gpu:0'):
    with tf.name_scope(name):

        # Use VAE encoder
        x_given_y_phi = vae.make_encoder(y, layerspecs=encoder_layers, stddev_init=stddev_init_nn,
                                         param_device=param_device, seed=seed)

        # execute E-step (update/sample local variables)
        x_k_samples, log_z_given_y_phi, phi_tilde, w_eta_12 = e_step(x_given_y_phi, phi_gmm, nb_samples, seed=seed)

        # compute reconstruction
        y_reconstruction = vae.make_decoder(x_k_samples, layerspecs=decoder_layers, stddev_init=stddev_init_nn,
                                            param_device=param_device, seed=seed)

        x_samples = subsample_x(x_k_samples, log_z_given_y_phi, seed)[:, 0, :]

        return y_reconstruction, x_given_y_phi, x_k_samples, x_samples, log_z_given_y_phi, phi_gmm, phi_tilde
def visualize_svae(ax,
                   config,
                   log_path,
                   ratio_tr=0.7,
                   nb_samples=20,
                   grid_density=100,
                   window=((-20, 20), (-20, 20)),
                   param_device='/cpu:0'):

    with tf.device(param_device):

        if config['dataset'] in ['mnist', 'fashion']:
            binarise = True
            size_minibatch = 1024
            output_type = 'bernoulli'
        else:
            binarise = False
            size_minibatch = -1
            output_type = 'standard'

        # First we build the model graph so that we can load the learned parameters from a checkpoint.
        # Initialisations don't matter, they'll be overwritten with saver.restore().
        data, lbl, _, _ = make_minibatch(config['dataset'],
                                         path_datadir='../datasets',
                                         ratio_tr=ratio_tr,
                                         seed_split=0,
                                         size_minibatch=size_minibatch,
                                         size_testbatch=-1,
                                         binarise=binarise)

        # define nn-architecture
        encoder_layers = [(config['U'], tf.tanh), (config['U'], tf.tanh),
                          (config['L'], 'natparam')]
        decoder_layers = [(config['U'], tf.tanh), (config['U'], tf.tanh),
                          (int(data.get_shape()[1]), output_type)]
        sample_size = 100

        if config['dataset'] in ['mnist', 'fashion']:
            data = tf.where(tf.equal(data, -1),
                            tf.zeros_like(data, dtype=tf.float32),
                            tf.ones_like(data, dtype=tf.float32))

        with tf.name_scope('model'):
            gmm_prior, theta = svae.init_mm(config['K'],
                                            config['L'],
                                            seed=config['seed'],
                                            param_device='/gpu:0')
            theta_copied = niw.natural_to_standard(tf.identity(gmm_prior[1]),
                                                   tf.identity(gmm_prior[2]),
                                                   tf.identity(gmm_prior[3]),
                                                   tf.identity(gmm_prior[4]))
            _, sigma_k = niw.expected_values(theta_copied)
            pi_k_init = tf.nn.softmax(
                tf.random_normal(shape=(config['K'], ),
                                 mean=0.0,
                                 stddev=1.,
                                 seed=config['seed']))
            L_k = tf.cholesky(sigma_k)
            mu_k = tf.random_normal(shape=(config['K'], config['L']),
                                    stddev=1,
                                    seed=config['seed'])
            with tf.variable_scope('phi_gmm'):
                mu_k = variable_on_device('mu_k',
                                          shape=None,
                                          initializer=mu_k,
                                          trainable=True,
                                          device=param_device)
                L_k = variable_on_device('L_k',
                                         shape=None,
                                         initializer=L_k,
                                         trainable=True,
                                         device=param_device)
                pi_k = variable_on_device('log_pi_k',
                                          shape=None,
                                          initializer=pi_k_init,
                                          trainable=True,
                                          device=param_device)
            phi_gmm = mu_k, L_k, pi_k
            _ = vae.make_encoder(data,
                                 layerspecs=encoder_layers,
                                 stddev_init=.1,
                                 seed=config['seed'])

        with tf.name_scope('random_sampling'):
            # compute expected theta_pgm
            beta_k, m_k, C_k, v_k = niw.natural_to_standard(*theta[1:])
            alpha_k = dirichlet.natural_to_standard(theta[0])
            mean, cov = niw.expected_values((beta_k, m_k, C_k, v_k))
            expected_log_pi = dirichlet.expected_log_pi(alpha_k)
            pi = tf.exp(expected_log_pi)

            # sample from prior (first from
            x_k_samples = tf.contrib.distributions.MultivariateNormalFullCovariance(
                loc=mean, covariance_matrix=cov).sample(sample_size)
            z_samples = tf.multinomial(logits=tf.reshape(tf.log(pi), (1, -1)),
                                       num_samples=sample_size,
                                       name='k_samples')
            z_samples = tf.squeeze(z_samples)

            assert z_samples.get_shape() == (sample_size, )
            assert x_k_samples.get_shape() == (sample_size, config['K'],
                                               config['L'])

            # compute reconstructions
            y_k_samples, _ = vae.make_decoder(x_k_samples,
                                              layerspecs=decoder_layers,
                                              stddev_init=.1,
                                              seed=config['seed'])

            assert y_k_samples.get_shape() == (sample_size, config['K'],
                                               data.get_shape()[1])

        with tf.name_scope('cluster_sample_data'):
            tf.get_variable_scope().reuse_variables()
            _, clustering = svae.predict(data,
                                         phi_gmm,
                                         encoder_layers,
                                         decoder_layers,
                                         seed=config['seed'])

        # load trained model
        saver = tf.train.Saver()
        model_path = log_path + '/' + generate_log_id(config)
        print(model_path)
        latest_ckpnt = tf.train.latest_checkpoint(model_path)

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess = tf.Session(config=sess_config)
        saver.restore(sess, latest_ckpnt)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        collected_y_samps = []
        collected_z_samps = []
        for s in range(nb_samples):
            y_samps, z_samps = sess.run((y_k_samples, z_samples))
            collected_y_samps.append(y_samps)
            collected_z_samps.append(z_samps)
        collected_y_samps = np.concatenate(collected_y_samps, axis=0)
        collected_z_samps = np.concatenate(collected_z_samps, axis=0)
        assert collected_y_samps.shape == (nb_samples * sample_size,
                                           config['K'], data.shape[1])
        assert collected_z_samps.shape == (nb_samples * sample_size, )

        # use 300 sample points from the dataset
        data, lbl, clustering = sess.run(
            (data[:300], lbl[:300], clustering[:300]))

        # compute PCA if necessary
        samples_2d = []
        if data.shape[1] > 2:
            pca = PCA(n_components=2).fit(data)
            data2d = pca.transform(data)

            for z_samples in range(config['K']):
                chosen = collected_z_samps == z_samples
                samps_k = collected_y_samps[chosen, z_samples, :]
                if samps_k.size > 0:
                    samples_2d.append(pca.transform(samps_k))
        else:
            data2d = data
            for z_samples in range(config['K']):
                chosen = (collected_z_samps == z_samples)
                samps_k = collected_y_samps[chosen, z_samples, :]
                if samps_k.size > 0:
                    samples_2d.append(samps_k)

        # plot 2d-histogram (one histogram for each of the K components)
        from matplotlib.colors import LogNorm
        for z_samples, samples in enumerate(samples_2d):
            ax.hist2d(samples[:, 0],
                      samples[:, 1],
                      bins=grid_density,
                      range=window,
                      cmap=make_colormap(dark_colors[z_samples %
                                                     len(dark_colors)]),
                      normed=True,
                      norm=LogNorm())

        # overlay histogram with sample datapoints (coloured according to their most likely cluster allocation)
        labels = np.argmax(lbl, axis=1)
        for c in np.unique(labels):
            in_class_c = (labels == c)
            color = bright_colors[int(c % len(bright_colors))]
            marker = markers[int(c % len(markers))]
            ax.scatter(data2d[in_class_c, 0],
                       data2d[in_class_c, 1],
                       c=color,
                       marker=marker,
                       s=data_dot_size,
                       linewidths=0)
def visualize_vae(ax,
                  config,
                  log_path,
                  ratio_tr=0.7,
                  nb_samples=20,
                  grid_density=100,
                  window=((-20, 20), (-20, 20)),
                  param_device='/cpu:0'):

    with tf.device(param_device):
        data, lbl, _, _ = make_minibatch(config['dataset'],
                                         path_datadir='../datasets',
                                         ratio_tr=ratio_tr,
                                         seed_split=0,
                                         size_minibatch=-1,
                                         size_testbatch=-1)

        # First we build the model graph so that we can load the learned parameters from a checkpoint.
        # Initialisations don't matter, they'll be overwritten with saver.restore().
        encoder_layers = [(config['U'], tf.tanh), (config['U'], tf.tanh),
                          (config['L'], 'natparam')]
        decoder_layers = [(config['U'], tf.tanh), (config['U'], tf.tanh),
                          (int(data.get_shape()[1]), 'standard')]
        sample_size = 100
        with tf.name_scope('model'):
            x_mean, x_var_diag = vae.make_encoder(data,
                                                  layerspecs=encoder_layers,
                                                  stddev_init=.1,
                                                  seed=config['seed'])
            x_samp = vae.reparam_trick_sampling(x_mean, x_var_diag, nb_samples,
                                                config['seed'])

            # generate random samples from prior N(x|0,1)
            x_samples = tf.contrib.distributions.MultivariateNormalDiag(
                loc=tf.zeros((config['L'])), scale_diag=tf.ones(
                    (config['L']))).sample(sample_size)
            y_mean, _ = vae.make_decoder(x_samples,
                                         layerspecs=decoder_layers,
                                         stddev_init=.1,
                                         seed=config['seed'])
            assert y_mean.get_shape() == (sample_size, data.get_shape()[1])

        saver = tf.train.Saver()
        model_path = log_path + '/' + generate_log_id(config)
        print(model_path)
        latest_ckpnt = tf.train.latest_checkpoint(model_path)
        latest_ckpnt = model_path + '/checkpoint-100000'

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess = tf.Session(config=sess_config)
        saver.restore(sess, latest_ckpnt)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        collected_samples = []
        for s in range(nb_samples):
            y_samps = sess.run((y_mean))
            collected_samples.append(y_samps)

        data, lbl = sess.run((data[:300], lbl[:300]))

        collected_samples = np.concatenate(collected_samples, axis=0)
        assert collected_samples.shape == (nb_samples * sample_size,
                                           data.shape[1])

        if data.shape[1] > 2:
            pca = PCA(n_components=2).fit(data)
            data2d = pca.transform(data)
            samples_2d = pca.transform(collected_samples)
        else:
            data2d = data
            samples_2d = collected_samples

        from matplotlib.colors import LogNorm
        ax.hist2d(samples_2d[:, 0],
                  samples_2d[:, 1],
                  bins=grid_density,
                  range=window,
                  cmap=make_colormap('black'),
                  normed=True,
                  norm=LogNorm())

        labels = np.argmax(lbl, axis=1)
        for c in np.unique(labels):
            in_class_c = (labels == c)
            color = bright_colors[int(c % len(bright_colors))]
            marker = markers[int(c % len(markers))]
            ax.scatter(data2d[in_class_c, 0],
                       data2d[in_class_c, 1],
                       c=color,
                       marker=marker,
                       s=data_dot_size,
                       linewidths=0)