Example #1
0
def init_mm_params(nb_components, latent_dims, alpha_scale=.1, beta_scale=1e-5, v_init=10., m_scale=1., C_scale=10.,
                   seed=0, as_variables=True, trainable=False, device='/gpu:0', name='gmm'):

    with tf.name_scope('gmm_initialization'):
        alpha_init = alpha_scale * tf.ones((nb_components,))
        beta_init = beta_scale * tf.ones((nb_components,))
        v_init = tf.tile([float(latent_dims + v_init)], [nb_components])
        means_init = m_scale * tf.random_uniform((nb_components, latent_dims), minval=-1, maxval=1, seed=seed)
        covariance_init = C_scale * tf.tile(tf.expand_dims(tf.eye(latent_dims), axis=0), [nb_components, 1, 1])

        # transform to natural parameters
        A, b, beta, v_hat = niw.standard_to_natural(beta_init, means_init, covariance_init, v_init)
        alpha = dirichlet.standard_to_natural(alpha_init)

        # init variable
        if as_variables:
            with tf.variable_scope(name):
                alpha = variable_on_device('alpha_k', shape=None, initializer=alpha, trainable=trainable, device=device)
                A = variable_on_device('beta_k', shape=None, initializer=A, trainable=trainable, device=device)
                b = variable_on_device('m_k', shape=None, initializer=b, trainable=trainable, device=device)
                beta = variable_on_device('C_k', shape=None, initializer=beta, trainable=trainable, device=device)
                v_hat = variable_on_device('v_k', shape=None, initializer=v_hat, trainable=trainable, device=device)

        params = alpha, A, b, beta, v_hat

        return params
Example #2
0
def make_nnet(input, layerspecs, stddev, name, param_device='/gpu:0', seed=0):

    with tf.variable_scope(name):
        # ravel inputs: (M, K, D) -> (M*K, D)
        input_shape = input.get_shape()
        input_dim = int(input_shape[-1])
        input = tf.reshape(input, (-1, input_dim))
        prev_layer = input

        # create all layers except the output layer
        for i, (hidden_units, activation) in enumerate(layerspecs[:-1]):
            prev_layer = make_layer(prev_layer, hidden_units, stddev, activation, 'layer_%d' % i, param_device, seed)

        # create output layer
        output_dim, type = layerspecs[-1]

        if type == 'bernoulli':
            out_mlp = make_bernoulli_layer(prev_layer, output_dim, stddev, param_device=param_device, seed=seed)
        else:
            out_mlp = make_gaussian_layer(prev_layer, output_dim, stddev, type, param_device=param_device, seed=seed)

        # create resnet-like shortcut (as in Johnson's SVAE code)
        with tf.variable_scope('shortcut'):
            orthonormal_cols = tf.constant(rand_partial_isometry(input_dim, output_dim, 1., seed=seed), dtype=tf.float32)
            W = variable_on_device('W', shape=None, initializer=orthonormal_cols, trainable=True, device=param_device,
                                   dtype=tf.float32)
            b1 = variable_on_device('b1', shape=None, initializer=tf.zeros(output_dim), trainable=True,
                                    device=param_device, dtype=tf.float32)
            out_res = tf.add(tf.matmul(input, W), b1, name='res_shortcut_1')

            # create shortcut for second output (in Gaussian case)
            if type != 'bernoulli':
                b2 = variable_on_device('b2', shape=None, initializer=tf.zeros(output_dim), trainable=True,
                                        device=param_device, dtype=tf.float32)

                if type == 'standard':
                    a = tf.constant(1., dtype=tf.float32)
                elif type == 'natparam':
                    a = tf.constant(-0.5, dtype=tf.float32)
                else:
                    raise NotImplementedError
                out_res = (out_res, tf.multiply(a, tf.log1p(tf.exp(b2)), name='res_shortcut_2'))

        with tf.variable_scope('resnet_out'):
            # unravel output: (M*K, D) -> (M, K, D)
            output_shape = input_shape[:-1].concatenate(output_dim)
            if type == 'bernoulli':
                outputs = tf.reshape(tf.add(out_mlp, out_res), output_shape, name='output')
            else:
                outputs = (
                    tf.reshape(tf.add(out_mlp[0], out_res[0]), output_shape, name='output_0'),
                    tf.reshape(tf.add(out_mlp[1], out_res[1]), output_shape, name='output_1')
                )
            return outputs
Example #3
0
def make_loc_scale_variables(theta, param_device='/gpu:0', name='copy_m_v'):
    # create location/scale variables for point estimations
    with tf.name_scope(name):
        theta_copied = niw.natural_to_standard(tf.identity(theta[1]), tf.identity(theta[2]),
                                               tf.identity(theta[3]), tf.identity(theta[4]))
        mu_k_init, sigma_k = niw.expected_values(theta_copied)
        L_k_init = tf.cholesky(sigma_k)

        mu_k = variable_on_device('mu_k', shape=None, initializer=mu_k_init, trainable=True, device=param_device)
        L_k = variable_on_device('L_k', shape=None, initializer=L_k_init, trainable=True, device=param_device)

        return mu_k, L_k
Example #4
0
def init_recognition_params(theta, nb_components, seed=0, param_device='/gpu:0', var_scope='phi_gmm'):
    # make parameters for PGM part of recognition network
    with tf.name_scope('init_' + var_scope):
        pi_k_init = tf.nn.softmax(tf.random_normal(shape=(nb_components,), mean=0.0, stddev=1., seed=seed))

        with tf.variable_scope(var_scope):
            mu_k, L_k = make_loc_scale_variables(theta, param_device)
            pi_k = variable_on_device('log_pi_k', shape=None, initializer=pi_k_init, trainable=True, device=param_device)
            return mu_k, L_k, pi_k
                # init helper values for SMM (theta is a constant, we just need it to init the rec GMM below)
                gmm_prior, theta = svae.init_mm(config['K'],
                                                config['L'],
                                                seed=config['seed'],
                                                param_device=param_device,
                                                theta_as_variable=False)

                # create tensor for Student-t parameters
                with tf.variable_scope('theta'):
                    mu_k, L_k = svae.make_loc_scale_variables(
                        gmm_prior, param_device=param_device)
                    DoF = config['DoF'] * tf.ones(
                        (config['K'], ), dtype=tf.float32)
                    DoF = variable_on_device('DoF_k',
                                             shape=None,
                                             initializer=DoF,
                                             trainable=False,
                                             device=param_device)

                    alpha_k = variable_on_device('alpha_k',
                                                 shape=None,
                                                 initializer=theta[0],
                                                 trainable=False,
                                                 device=param_device)

                # init inference GMM parameters
                phi_gmm = svae.init_recognition_params(
                    theta,
                    config['K'],
                    seed=config['seed'],
                    param_device=param_device)
def visualize_svae(ax,
                   config,
                   log_path,
                   ratio_tr=0.7,
                   nb_samples=20,
                   grid_density=100,
                   window=((-20, 20), (-20, 20)),
                   param_device='/cpu:0'):

    with tf.device(param_device):

        if config['dataset'] in ['mnist', 'fashion']:
            binarise = True
            size_minibatch = 1024
            output_type = 'bernoulli'
        else:
            binarise = False
            size_minibatch = -1
            output_type = 'standard'

        # First we build the model graph so that we can load the learned parameters from a checkpoint.
        # Initialisations don't matter, they'll be overwritten with saver.restore().
        data, lbl, _, _ = make_minibatch(config['dataset'],
                                         path_datadir='../datasets',
                                         ratio_tr=ratio_tr,
                                         seed_split=0,
                                         size_minibatch=size_minibatch,
                                         size_testbatch=-1,
                                         binarise=binarise)

        # define nn-architecture
        encoder_layers = [(config['U'], tf.tanh), (config['U'], tf.tanh),
                          (config['L'], 'natparam')]
        decoder_layers = [(config['U'], tf.tanh), (config['U'], tf.tanh),
                          (int(data.get_shape()[1]), output_type)]
        sample_size = 100

        if config['dataset'] in ['mnist', 'fashion']:
            data = tf.where(tf.equal(data, -1),
                            tf.zeros_like(data, dtype=tf.float32),
                            tf.ones_like(data, dtype=tf.float32))

        with tf.name_scope('model'):
            gmm_prior, theta = svae.init_mm(config['K'],
                                            config['L'],
                                            seed=config['seed'],
                                            param_device='/gpu:0')
            theta_copied = niw.natural_to_standard(tf.identity(gmm_prior[1]),
                                                   tf.identity(gmm_prior[2]),
                                                   tf.identity(gmm_prior[3]),
                                                   tf.identity(gmm_prior[4]))
            _, sigma_k = niw.expected_values(theta_copied)
            pi_k_init = tf.nn.softmax(
                tf.random_normal(shape=(config['K'], ),
                                 mean=0.0,
                                 stddev=1.,
                                 seed=config['seed']))
            L_k = tf.cholesky(sigma_k)
            mu_k = tf.random_normal(shape=(config['K'], config['L']),
                                    stddev=1,
                                    seed=config['seed'])
            with tf.variable_scope('phi_gmm'):
                mu_k = variable_on_device('mu_k',
                                          shape=None,
                                          initializer=mu_k,
                                          trainable=True,
                                          device=param_device)
                L_k = variable_on_device('L_k',
                                         shape=None,
                                         initializer=L_k,
                                         trainable=True,
                                         device=param_device)
                pi_k = variable_on_device('log_pi_k',
                                          shape=None,
                                          initializer=pi_k_init,
                                          trainable=True,
                                          device=param_device)
            phi_gmm = mu_k, L_k, pi_k
            _ = vae.make_encoder(data,
                                 layerspecs=encoder_layers,
                                 stddev_init=.1,
                                 seed=config['seed'])

        with tf.name_scope('random_sampling'):
            # compute expected theta_pgm
            beta_k, m_k, C_k, v_k = niw.natural_to_standard(*theta[1:])
            alpha_k = dirichlet.natural_to_standard(theta[0])
            mean, cov = niw.expected_values((beta_k, m_k, C_k, v_k))
            expected_log_pi = dirichlet.expected_log_pi(alpha_k)
            pi = tf.exp(expected_log_pi)

            # sample from prior (first from
            x_k_samples = tf.contrib.distributions.MultivariateNormalFullCovariance(
                loc=mean, covariance_matrix=cov).sample(sample_size)
            z_samples = tf.multinomial(logits=tf.reshape(tf.log(pi), (1, -1)),
                                       num_samples=sample_size,
                                       name='k_samples')
            z_samples = tf.squeeze(z_samples)

            assert z_samples.get_shape() == (sample_size, )
            assert x_k_samples.get_shape() == (sample_size, config['K'],
                                               config['L'])

            # compute reconstructions
            y_k_samples, _ = vae.make_decoder(x_k_samples,
                                              layerspecs=decoder_layers,
                                              stddev_init=.1,
                                              seed=config['seed'])

            assert y_k_samples.get_shape() == (sample_size, config['K'],
                                               data.get_shape()[1])

        with tf.name_scope('cluster_sample_data'):
            tf.get_variable_scope().reuse_variables()
            _, clustering = svae.predict(data,
                                         phi_gmm,
                                         encoder_layers,
                                         decoder_layers,
                                         seed=config['seed'])

        # load trained model
        saver = tf.train.Saver()
        model_path = log_path + '/' + generate_log_id(config)
        print(model_path)
        latest_ckpnt = tf.train.latest_checkpoint(model_path)

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess = tf.Session(config=sess_config)
        saver.restore(sess, latest_ckpnt)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        collected_y_samps = []
        collected_z_samps = []
        for s in range(nb_samples):
            y_samps, z_samps = sess.run((y_k_samples, z_samples))
            collected_y_samps.append(y_samps)
            collected_z_samps.append(z_samps)
        collected_y_samps = np.concatenate(collected_y_samps, axis=0)
        collected_z_samps = np.concatenate(collected_z_samps, axis=0)
        assert collected_y_samps.shape == (nb_samples * sample_size,
                                           config['K'], data.shape[1])
        assert collected_z_samps.shape == (nb_samples * sample_size, )

        # use 300 sample points from the dataset
        data, lbl, clustering = sess.run(
            (data[:300], lbl[:300], clustering[:300]))

        # compute PCA if necessary
        samples_2d = []
        if data.shape[1] > 2:
            pca = PCA(n_components=2).fit(data)
            data2d = pca.transform(data)

            for z_samples in range(config['K']):
                chosen = collected_z_samps == z_samples
                samps_k = collected_y_samps[chosen, z_samples, :]
                if samps_k.size > 0:
                    samples_2d.append(pca.transform(samps_k))
        else:
            data2d = data
            for z_samples in range(config['K']):
                chosen = (collected_z_samps == z_samples)
                samps_k = collected_y_samps[chosen, z_samples, :]
                if samps_k.size > 0:
                    samples_2d.append(samps_k)

        # plot 2d-histogram (one histogram for each of the K components)
        from matplotlib.colors import LogNorm
        for z_samples, samples in enumerate(samples_2d):
            ax.hist2d(samples[:, 0],
                      samples[:, 1],
                      bins=grid_density,
                      range=window,
                      cmap=make_colormap(dark_colors[z_samples %
                                                     len(dark_colors)]),
                      normed=True,
                      norm=LogNorm())

        # overlay histogram with sample datapoints (coloured according to their most likely cluster allocation)
        labels = np.argmax(lbl, axis=1)
        for c in np.unique(labels):
            in_class_c = (labels == c)
            color = bright_colors[int(c % len(bright_colors))]
            marker = markers[int(c % len(markers))]
            ax.scatter(data2d[in_class_c, 0],
                       data2d[in_class_c, 1],
                       c=color,
                       marker=marker,
                       s=data_dot_size,
                       linewidths=0)