Beispiel #1
0
    def output_function(self, state):
        params = dense_layer(state.h3,
                             self.output_units,
                             scope='gmm',
                             reuse=tf.compat.v1.AUTO_REUSE)
        pis, mus, sigmas, rhos, es = self._parse_parameters(params)
        mu1, mu2 = tf.split(mus, 2, axis=1)
        mus = tf.stack([mu1, mu2], axis=2)
        sigma1, sigma2 = tf.split(sigmas, 2, axis=1)

        covar_matrix = [
            tf.square(sigma1), rhos * sigma1 * sigma2, rhos * sigma1 * sigma2,
            tf.square(sigma2)
        ]
        covar_matrix = tf.stack(covar_matrix, axis=2)
        covar_matrix = tf.reshape(
            covar_matrix,
            (self.batch_size, self.num_output_mixture_components, 2, 2))

        mvn = tfd.MultivariateNormalFullCovariance(
            loc=mus, covariance_matrix=covar_matrix)
        b = tfd.Bernoulli(probs=es)
        c = tfd.Categorical(probs=pis)

        sampled_e = b.sample()
        sampled_coords = mvn.sample()
        sampled_idx = c.sample()

        idx = tf.stack([tf.range(self.batch_size), sampled_idx], axis=1)
        coords = tf.gather_nd(sampled_coords, idx)
        return tf.concat([coords, tf.cast(sampled_e, tf.float32)], axis=1)
Beispiel #2
0
 def build_prior(self):
     if self.whiten:
         mvn = tfd.MultivariateNormalDiag(loc=tf.zeros_like(self.Ug))
     else:
         mvn = tfd.MultivariateNormalFullCovariance(
             loc=tf.zeros_like(self.Ug),
             covariance_matrix=self.kern.K(self.Zg, self.Zg))
     return tf.reduce_sum(mvn.log_prob(self.Ug))
Beispiel #3
0
    def compute_elbo(self, data, output_distribution,
                     posterior_mixture_distribution, latent_k_samples,
                     log_z_given_y_phi):
        nb_samples = output_distribution.batch_shape[0]
        r_nk = tf.exp(log_z_given_y_phi)

        # compute negative reconstruction error
        with tf.name_scope('compute_reconstruction_err'):
            shaped_data = tf.tile(
                tf.expand_dims(tf.expand_dims(data, axis=1), axis=0),
                [nb_samples, 1, self._nb_components, 1, 1, 1])
            neg_reconstruction_error = tf.reduce_mean(
                tf.reduce_sum(
                    output_distribution.log_prob(shaped_data) * r_nk, axis=2))

        # compute E[log q_phi(x,z=k|y)]
        with tf.name_scope('compute_regularizer'):
            with tf.name_scope('log_numerator'):
                log_N_x_given_phi = posterior_mixture_distribution.components_distribution.log_prob(
                    latent_k_samples)
                log_numerator = log_N_x_given_phi + log_z_given_y_phi

            with tf.name_scope('log_denominator'):
                with tf.name_scope('theta_expected_vals'):
                    log_pi, mu, sigma = self._theta.expected_values()
                    # mu = tf.Print(mu, [tf.norm(mu, axis=1)], summarize=10, message='gmm_mu: ')
                    # mu = tf.Print(mu, [tf.norm(sigma, axis=[1, 2])], summarize=10, message='gmm_sigma: ')
                    # mu = tf.Print(mu, [tf.nn.softmax(log_pi)], summarize=10, message='gmm_log_pi: ')
                    mu = tf.stop_gradient(mu)
                    sigma = tf.stop_gradient(sigma)
                    log_pi = tf.stop_gradient(log_pi)
                    theta_gaussian_dist = tfd.MultivariateNormalFullCovariance(
                        mu, sigma)

                log_N_x_given_theta = theta_gaussian_dist.log_prob(
                    latent_k_samples)
                log_denominator = log_N_x_given_theta + log_pi

            # log_denominator = tf.Print(log_denominator, [latent_k_samples, mu, sigma])
            # log_denominator = tf.Print(log_denominator, [log_N_x_given_theta, tf.log(pi)])

        # weighted sum using r_nk over components, then mean over samples and batch
        regularizer_term = tf.reduce_mean(
            tf.reduce_sum(r_nk * (log_numerator - log_denominator), axis=2))

        elbo = neg_reconstruction_error - regularizer_term

        details = (neg_reconstruction_error,
                   tf.reduce_mean(
                       tf.reduce_sum(
                           tf.multiply(r_nk, log_numerator), axis=-1),
                       axis=0),
                   tf.reduce_mean(
                       tf.reduce_sum(
                           tf.multiply(r_nk, log_denominator), axis=-1),
                       axis=0), regularizer_term)

        return elbo, details
Beispiel #4
0
    def _build(self,
               inputs,
               nb_samples=10,
               seed=0,
               encoder_param_type='natural'):
        ### vae encode
        emb = self._encoder(inputs)
        enc_eta1 = self._mu_net(emb)
        enc_eta2_diag = self._sigma_net(emb)
        if encoder_param_type == 'natural':
            enc_eta2_diag *= -1. / 2
            # enc_eta2_diag -= 1e-8
        enc_eta2 = tf.matrix_diag(enc_eta2_diag)

        ### GMM natural parameters
        gmm_pi, gmm_eta1, gmm_eta2 = self.phi_gmm()

        ### combined GMM and VAE latent parameters
        # eta1_tilde.shape = (N, K, D); eta2_tsilde.shape = (N, K, D, D)
        # with tf.control_dependencies([util.matrix_is_pos_def_op(-2 * enc_eta2)]):
        eta1_tilde = tf.expand_dims(
            enc_eta1, axis=1) + tf.expand_dims(
                gmm_eta1, axis=0)
        eta2_tilde = tf.expand_dims(
            enc_eta2, axis=1) + tf.expand_dims(
                gmm_eta2, axis=0)
        log_z_given_y_phi = compute_log_z_given_y(enc_eta1, enc_eta2, gmm_eta1,
                                                  gmm_eta2, gmm_pi)
        # with tf.control_dependencies([util.matrix_is_pos_def_op(-2 * gmm_eta2)]):
        mu, cov = gaussian.natural_to_standard(eta1_tilde, eta2_tilde)
        posterior_mixture_distribution = tfd.MixtureSameFamily(
            mixture_distribution=tfd.Categorical(tf.exp(log_z_given_y_phi)),
            components_distribution=tfd.MultivariateNormalFullCovariance(
                loc=mu, covariance_matrix=cov))

        # sample x for each of the K components
        # latent_k_samples.shape == nb_samples, batch_size, nb_components, latent_dim
        latent_k_samples = posterior_mixture_distribution.components_distribution.sample(
            [nb_samples])

        ### vae decode
        output_mean = snt.BatchApply(self._decoder, n_dims=3)(latent_k_samples)
        output_variance = tf.get_variable(
            'output_variance',
            dtype=tf.float32,
            initializer=tf.zeros(output_mean.get_shape().as_list()),
            trainable=True)  # learned parameter for output distribution
        output_distribution = tfd.Independent(
            tfd.MultivariateNormalDiagWithSoftplusScale(
                loc=output_mean, scale_diag=output_variance),
            reinterpreted_batch_ndims=2)

        # subsample for each datum in minibatch (go from `nb_samples` per component to `nb_samples` total)
        latent_samples = subsample_x(
            tf.transpose(latent_k_samples, [1, 0, 2, 3]), log_z_given_y_phi,
            seed)

        return output_distribution, posterior_mixture_distribution, latent_k_samples, latent_samples, log_z_given_y_phi
Beispiel #5
0
	def posterior(self, Zprev_NxDz, y_Dy, name = None, debug_mode=False):
		""" Computes Laplace Approx using an FPI for the first and second moments differentiating the log posterior """
		if name is None:
			name = self.name
		with tf.name_scope(name):
			Dz = tf.constant(self.A_DzxDz.get_shape().as_list()[0], dtype = tf.float32)
			N = Zprev_NxDz.get_shape().as_list()[0]

			Z_NxDz = tf.identity(Zprev_NxDz, name = 'Z_NxDz')
			mu_NxDz = tf.matmul(Zprev_NxDz, self.A_DzxDz, transpose_b = True, name = 'mu_NxDz')

			QBt_DzxDy = tf.matmul(self.Q_DzxDz, self.B_DyxDz, transpose_b = True)
			QBtY_Dz = tf.matmul(tf.expand_dims(y_Dy, axis = 0), QBt_DzxDy, transpose_b = True, name = 'QBtY_Dz')

			# Iterate over FPIs for first and second moments:
			for i in range(self.n_iters):
				# Compute FPI for the mean
				BZ_NxDy = tf.matmul(Z_NxDz, self.B_DyxDz, transpose_b = True, name = 'BZ_NxDy')
				expBZ_NxDy = tf.exp(BZ_NxDy, name = 'expBZ_NxDy')
				BtexpBZ_NxDz = tf.matmul(expBZ_NxDy, self.B_DyxDz, name = 'BtexpBZ_Dz')

				Z_NxDz = - tf.matmul(BtexpBZ_NxDz, self.Q_DzxDz, transpose_b = True) + QBtY_Dz + mu_NxDz
				if debug_mode:
					print("iter %i, mean:\n"%i, Z_NxDz)

			# Compute FPI for the Hessian
			expBZ_NxDyxDy = tf.matrix_diag(tf.exp(tf.matmul(Z_NxDz, self.B_DyxDz, transpose_b=True)), name = 'expBZ_NxDyxDy')
			BtexpBZ_NxDzxDy = tf.einsum('ijk,jh->ihk', expBZ_NxDyxDy, self.B_DyxDz)
			BtexpBZB_NxDzxDz = tf.einsum('ijk,kh->ijh', BtexpBZ_NxDzxDy, self.B_DyxDz)
			H_NxDzxDz = BtexpBZB_NxDzxDz + self.Q_inv_DzxDz
			if debug_mode:
				print("Tensor of Hessians:\n", H_NxDzxDz)

			# Compute the inverse normalization to approximate the integral
			SqInvDet = 1./tf.sqrt(tf.matrix_determinant(H_NxDzxDz))
			PiTerm = (2*tf.constant(np.pi))**(Dz/2)

			mvn_loc = tf.matmul(Zprev_NxDz, self.A_DzxDz, transpose_b = True, name = 'mvn_loc')
			mvn = tfd.MultivariateNormalFullCovariance(loc = mvn_loc, 
													   covariance_matrix = self.Q_DzxDz,
													   name = "mvn")
			mvn_prob = mvn.prob(Z_NxDz, name = "mvn_prob")

			log_rate = tf.matmul(Z_NxDz, self.B_DyxDz, transpose_b = True, name = 'log_rate')
			poisson = tfd.Poisson(log_rate = log_rate, name = "Poisson")
			y_NxDy = tf.tile(tf.expand_dims(y_Dy, axis = 0), [N, 1], name = 'y_NxDy')
			element_wise_prob = poisson.prob(y_NxDy, name = "element_wise_prob")
			poisson_prob = tf.reduce_sum(element_wise_prob, axis = 1, name = "poisson_prob")

			Pstar = mvn_prob * poisson_prob

			Ztilde_Nx1 = SqInvDet * PiTerm * Pstar
			return Ztilde_Nx1
Beispiel #6
0
    def build_prior(self):
        if self.kern.ktype == "id" or self.kern.ktype == "kr":
            if self.whiten:
                mvn = tfd.MultivariateNormalDiag(loc=tf.zeros_like(self.U[:,
                                                                          0]))
            else:
                mvn = tfd.MultivariateNormalFullCovariance(
                    loc=tf.zeros_like(self.U[:, 0]),
                    covariance_matrix=self.kern.K(self.Z, self.Z))

            probs = tf.add_n(
                [mvn.log_prob(self.U[:, d]) for d in range(self.kern.ndims)])

        else:
            if self.whiten:
                mvn = tfd.MultivariateNormalDiag(loc=tf.zeros_like(self.U))
            else:
                mvn = tfd.MultivariateNormalFullCovariance(
                    loc=tf.zeros_like(self.U),
                    covariance_matrix=self.kern.K(self.Z, self.Z))
            probs = tf.reduce_sum(mvn.log_prob(tf.squeeze(self.U)))
        return probs
Beispiel #7
0
    def __init__(self,
                 nb_components,
                 dimension,
                 mu_init=None,
                 cov_init=None,
                 trainable=False,
                 name='gmm'):
        super(GMM, self).__init__(name=name)
        with self._enter_variable_scope():
            self.pi = tf.get_variable("pi",
                                      shape=(nb_components),
                                      dtype=tf.float32,
                                      trainable=trainable)

            if mu_init is not None:
                assert mu_init.get_shape().as_list() == [
                    nb_components, dimension
                ]
                self.mu = tf.get_variable("mixture_mu",
                                          initializer=mu_init,
                                          dtype=tf.float32,
                                          trainable=trainable)
            else:
                self.mu = tf.get_variable("mixture_mu",
                                          shape=(nb_components, dimension),
                                          dtype=tf.float32,
                                          trainable=trainable)

            if cov_init is not None:
                assert cov_init.get_shape().as_list() == [
                    nb_components, dimension, dimension
                ]
                self._L_k_raw = tf.get_variable(
                    "mixture_lower_cov",
                    initializer=tf.cholesky(cov_init),
                    dtype=tf.float32,
                    trainable=trainable)
            else:
                self._L_k_raw = tf.get_variable("mixture_lower_cov",
                                                shape=(nb_components,
                                                       dimension, dimension),
                                                dtype=tf.float32,
                                                trainable=trainable)

            self.model = tfd.MixtureSameFamily(
                mixture_distribution=tfd.Categorical(logits=self.pi),
                components_distribution=tfd.MultivariateNormalFullCovariance(
                    loc=self.mu, covariance_matrix=self._L_k_raw))
Beispiel #8
0
    def __init__(self,
                 environment,
                 state_size,
                 action_size,
                 hidden_size,
                 it_tloop,
                 it_dyn,
                 bs_dyn,
                 it_policy,
                 bs_policy,
                 K,
                 T,
                 action_bound_high,
                 action_bound_low,
                 discount_factor,
                 moment_matching=True,
                 scope='pai'):
        self.environment = environment
        self.state_size = state_size
        self.action_size = action_size
        self.hidden_size = hidden_size

        self.it_tloop = it_tloop
        self.it_dyn = it_dyn
        self.bs_dyn = bs_dyn
        self.it_policy = it_policy
        self.bs_policy = bs_policy

        self.K = K  #Number of particles
        assert self.bs_policy == self.K  #Does this have to be true?
        self.T = T  #Time horizon

        self.action_bound_high = action_bound_high
        self.action_bound_low = action_bound_low
        self.discount_factor = discount_factor

        self.moment_matching = moment_matching
        self.scope = scope

        self.policy_reuse_vars = None

        # Assertion
        np.testing.assert_array_equal(-self.action_bound_low,
                                      self.action_bound_high)

        # Initialize the Bayesian neural network.
        self.bnn = bayesian_dynamics_model(self.state_size + self.action_size,
                                           self.state_size)
        self.bnn.initialize_inference(n_iter=self.it_tloop * self.it_dyn * 300,
                                      n_samples=10)

        # Declare variables and assignment operators for each W_k.
        self.assign_op = []
        for k in range(K):
            self.declare_vars_and_assign_op(scope='W_' + str(k) + '_')

        # True reward model
        self.reward_model = real_env_pendulum_reward()
        rewards = []

        # Predict x_t for t = 1,...,T.
        self.particles = tf.placeholder(shape=[self.K, self.state_size],
                                        dtype=tf.float32)
        self.action = self.build_policy(self.particles)
        particles = self.particles
        for t in range(T):
            actions = self.build_policy(particles)
            rewards.append((self.discount_factor**t) *
                           self.reward_model.build(particles, actions))
            states_actions = tf.concat([particles, actions], axis=-1)
            next_states = []
            for k in range(K):
                W_k = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                        'W_' + str(k) + '_')
                next_state = self.bnn.build(
                    *([tf.expand_dims(states_actions[k, :], axis=0)] + W_k))
                next_states.append(next_state)
            next_states = tf.concat(next_states, axis=0)

            # Perform moment matching.
            mu, cov = self.mu_and_cov(next_states)
            cov = cov + 5e-5 * np.eye(
                self.state_size)  # To prevent singular matrix
            particles = tfd.MultivariateNormalFullCovariance(
                loc=mu, covariance_matrix=cov).sample(self.K)

        # Maximize cumulative rewards in horizon T.
        rewards = tf.reduce_sum(tf.stack(rewards, axis=-1), axis=-1)
        self.loss = -tf.reduce_mean(tf.reduce_sum(rewards, axis=-1))
        self.opt = tf.train.AdamOptimizer().minimize(self.loss)