def inference_policy(self, observation_space, action_space): """ Creates a neural-network policy approximator Args: observation_space: observation space of the environment action_space: action space of the environment Returns: Nothing, the network is usable only after calling this method """ self.variables = tf.trainable_variables() self.input_pl = tf.placeholder(tf.float32, [None, observation_space], name='Input_PL') #2 hidden layers with 100 neurons each net = tf.layers.dense( self.input_pl, 100, activation=tf.nn.tanh, kernel_initializer=tf.random_normal_initializer(stddev=.1)) net = tf.layers.dense( net, 100, activation=tf.nn.tanh, kernel_initializer=tf.random_normal_initializer(stddev=.1)) mean = tf.layers.dense( net, action_space, kernel_initializer=tf.random_normal_initializer(stddev=.01)) self.std = tf.Variable(np.ones(action_space).astype(np.float32)) self.mvn = MultivariateNormalDiag(mean, self.std) self.sample = self.mvn.sample() self.variables = tf.trainable_variables()[len(self.variables):]
def _gauss_log_pi(self, mu, log_sig): sigma = tf.exp(log_sig) normal = Normal(mu, sigma) z = normal.sample() actions = self._squash_actions(z) gauss_log_prob = normal.log_prob(z) log_pi = gauss_log_prob - self._squash_correction(z) return log_pi[:, None], actions
def _create_loss_optimizer(self): # The loss is composed of two terms: # 1.) The reconstruction loss (the negative log probability # of the input under the reconstructed Bernoulli distribution # induced by the decoder in the data space). # This can be interpreted as the number of "nats" required # for reconstructing the input when the activation in latent # is given. # Adding 1e-10 to avoid evaluatio of log(0.0) epsilon = tf.constant(1e-10) reconstr_loss = \ -tf.reduce_sum(self.x * tf.log(epsilon + self.x_reconstr_mean) + (1 - self.x) * tf.log(epsilon + 1 - self.x_reconstr_mean), 1, name='reconstruction_loss') # 2.) The latent loss, which is defined as the Kullback Leibler divergence # between the distribution in latent space induced by the encoder on # the data and some prior. This acts as a kind of regularizer. # This can be interpreted as the number of "nats" required # for transmitting the the latent space distribution given # the prior. latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq_concat - tf.square(self.z_mean_concat) - tf.exp(self.z_log_sigma_sq_concat), 1, name='Latent_Loss') # 3.) Mutual information loss: log(q(c'|x')) q_c_given_x = MultivariateNormalDiag(self.z_mean_c_prime, tf.exp(self.z_log_sigma_sq_c_prime), validate_args=True, name='q_c_given_x') prob_c_prime_given_x_prime = q_c_given_x.pdf(self.c_prime, name='prob_c_prime_given_x_prime') self.prob_c_prime_given_x_prime = prob_c_prime_given_x_prime mi_loss = - tf.log(tf.add(epsilon, prob_c_prime_given_x_prime), name='mi_loss') #mi_loss = - tf.reduce_sum(tf.log(epsilon + prob_c_prime_given_x_prime), 1, # name='mi_loss') self.mi_loss = mi_loss #self.cost = tf.reduce_mean(reconstr_loss + latent_loss + # self.network_architecture['lmbda'] * mi_loss, name='cost') # average over batch self.cost = tf.add(tf.reduce_mean(reconstr_loss + latent_loss), tf.reduce_mean(mi_loss)) # Use ADAM optimizer self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost) rec_summary = tf.scalar_summary('reconstruction loss', tf.reduce_mean(reconstr_loss)) mi_summary = tf.scalar_summary('MI loss', tf.reduce_mean(mi_loss)) latent_summary = tf.scalar_summary('KLD q(z|x) || p(z)', tf.reduce_mean(latent_loss)) latent_pqgivenx = tf.scalar_summary('p(q_prime|x_prime)', tf.reduce_mean(prob_c_prime_given_x_prime)) summaries = [rec_summary, mi_summary, latent_summary, latent_pqgivenx] self.merged = tf.merge_summary(summaries)
def gen_one_step(self, z, u): p_mean, p_var = self.p_transition(z, u) p = MultivariateNormalDiag(p_mean, tf.sqrt(p_var)) z_step = p.sample() return z_step
def build(self): self.__tensor_z_encoded = tf.placeholder(shape=np.append([None], self.__z_dim), dtype=tf.float32) self.__distr = MultivariateNormalDiag(loc=tf.zeros(self.__z_dim), scale_diag=tf.ones(self.__z_dim)) self.__tensor_z_sampled = self.__distr.sample( tf.shape(self.__tensor_z_encoded)[0]) self.__tensor_mmd_penalty = mmd_penalty(self.__tensor_z_encoded, self.__tensor_z_sampled)
def one_step_IAF(self, a, x): z = a[0] log_q = a[1] u, enc = x z_step, q_var = self.q_transition_IAF(z, enc, u) p_mean, p_var = self.p_transition(z, u) p = MultivariateNormalDiag(p_mean, tf.sqrt(p_var)) log_q = log_q - tf.reduce_sum(tf.log(q_var + 1e-5), axis=1) log_p = p.log_prob(z_step) return z_step, log_q, log_p
def one_step(self, a, x): z = a[0] u, enc = x q_mean, q_var = self.q_transition(z, enc, u) p_mean, p_var = self.p_transition(z, u) q = MultivariateNormalDiag(q_mean, tf.sqrt(q_var)) p = MultivariateNormalDiag(p_mean, tf.sqrt(p_var)) z_step = q.sample() kl = kl_divergence(q, p) return z_step, kl
def get_generative_dist(self, z): x_mean = self.generative_mlp(tf.reshape(z, (-1, self.n_latent))) x_mean = tf.reshape(x_mean, (tf.shape(z)[0], -1, self.n_obs)) x_var = tf.zeros_like(x_mean) + self.x_var**2 + 1e-8 x_var = tf.reshape(x_var, (tf.shape(z)[0], -1, self.n_obs)) return MultivariateNormalDiag(x_mean, tf.sqrt(x_var))
def mixture(locs, scales, pi, K): cat = Categorical(probs=pi) components = [ MultivariateNormalDiag(loc=locs[:, i, :], scale_diag=scales[:, i, :]) for i in range(K)] # get the mixture distribution mix = Mixture(cat=cat, components=components) return mix
def build_sample_standard(self): total_sum = [0.0 for y in range(self.num_outputs)] r=self.r for i in range(self.num_outputs): k = tf.squeeze(tf.random_uniform(shape=[1], minval=0, maxval=self.num_components, dtype=tf.int32)) full_cov = True mu_f, sigma_f, mu_w, sigma_w = self.sparsity._build_intermediate_conditionals(k, r, self.x_test, predict=not full_cov) #mu_f, sigma_f, mu_w, sigma_w = self.get_expected_values(mu_f, sigma_f, mu_w, sigma_w) pi_k = self.q_weights[k] wf_sum = 0 latent_sum = [0.0 for j in range(self.num_latent)] for j in range(self.num_latent): if full_cov: print('mu_f: ', mu_f) print('mu_w: ', mu_w) print('sigma_f: ', sigma_f) print('sigma_w: ', sigma_w) mu_fj = tf.squeeze(mu_f[j,:, :]) mu_wij = tf.squeeze(mu_w[i,j,:,:]) sigma_fk = sigma_f[j,:,:] sigma_wik = sigma_w[i,j,:,:] else: mu_fj = tf.squeeze(mu_f[j,:, :]) mu_wij = tf.squeeze(mu_w[i,j,:,:]) sigma_fk = sigma_f[j,:,:] sigma_wik = sigma_w[i,j,:,:] sigma_fk_chol = tf.cast(tf.cholesky(tf.cast(util.add_jitter(sigma_fk, 1e-4), tf.float64)), tf.float32) sig_w_chol = tf.cast(tf.cholesky(tf.cast(util.add_jitter(sigma_wik, 1e-4), tf.float64)), tf.float32) f = MultivariateNormalTriL(loc=mu_fj, scale_tril=sigma_fk_chol).sample() w = MultivariateNormalTriL(loc=mu_wij, scale_tril=sig_w_chol).sample() w = tf.diag(tf.squeeze(w)) wf = tf.matmul(w, tf.expand_dims(f, 1)) latent_sum[j] += pi_k*tf.squeeze(wf) latent_sum = tf.stack(latent_sum) latent_sum = tf.squeeze(tf.reduce_sum(latent_sum, axis=0)) if self.context.plot_posterior: y = latent_sum else: noise_sigma = tf.square(util.var_postive(self.sigma_y[i])) y = MultivariateNormalDiag(loc=latent_sum, scale_diag=tf.sqrt(noise_sigma)*tf.ones(tf.shape(self.x_test)[0])).sample() #y = latent_sum total_sum[i] += y total_sum = tf.stack(total_sum, axis=1) return total_sum
def _compute_dist(self, states): features = self.l1(states) features = self.l2(features) mu = self.out_mean(features) log_sigma = self.out_sigma(features) log_sigma = tf.clip_by_value(log_sigma, self.LOG_SIG_CAP_MIN, self.LOG_SIG_CAP_MAX) return MultivariateNormalDiag(loc=mu, scale_diag=tf.exp(log_sigma))
def __init__(self, layer_index, kern, output_dim, n_inducing, X, n_sample=100, fixed_mean=True): eps_dim = int(n_inducing * output_dim) self.layer_index = layer_index self.kernel = kern self.input_dim = kern.input_dim self.output_dim = output_dim self.eps_dim = eps_dim self.n_sample = n_sample self.n_inducing = n_inducing self.fixed_mean = fixed_mean # bool, Defatl = True for all layers before the last layer. print("========= Layer {} summary =========".format(layer_index)) print("::::: LAYOUT") print("----- [Input dimension] : ", self.input_dim) print("----- [Output dimension] : ", self.output_dim) """ The prior distribution is set to be i.i.d Gaussian distributions. """ """================== Initialization of the inducing point ==================""" with tf.variable_scope('theta'): # scope [theta] self.Z = tf.Variable(kmeans2(X, self.n_inducing, minit='points')[0], dtype=tf.float64, name='Z') """================== Initialization of the GAN and noise sampler ==================""" self.gan = GAN(self.n_inducing, self.output_dim, self.input_dim, self.layer_index) _prior_mean = 0.0 _prior_var = 1.0 self.prior_mean = [_prior_mean] * int(n_inducing * output_dim) self.prior_var = [_prior_var] * int(n_inducing * output_dim) self.mu, self.scale = [0.] * eps_dim, [1.0] * eps_dim # In the paper we use a single global eps while in this implementation we disentangle them. self.eps_sampler = MultiNormal(self.mu, self.scale) print("----- [Prior mean] : ", _prior_mean) print("----- [Prior var] : ", _prior_var) """================== Initialization of the skip layer connection ==================""" if self.input_dim == self.output_dim: self.W_skiplayer = np.eye(self.input_dim) elif self.input_dim < self.output_dim: self.W_skiplayer = np.concatenate([ np.eye(self.input_dim), np.zeros((self.input_dim, self.output_dim - self.input_dim)) ], axis=1) else: _, _, V = np.linalg.svd(X, full_matrices=False) self.W_skiplayer = V[:self.output_dim, :].T
def gmm_log_pi(self, log_weights, mu, log_std): sigma = tf.exp(log_std) normal = Normal(mu, sigma) # sample from GMM sample_w = tf.stop_gradient( tf.multinomial(logits=log_weights, num_samples=1)) sample_z = tf.stop_gradient(normal.sample()) mask = tf.one_hot(sample_w[:, 0], depth=self._actor.K) z = tf.reduce_sum(sample_z * mask[:, :, None], axis=1) action = self.squash_action(z) # calculate log policy gauss_log_pi = normal.log_prob(z[:, None, :]) log_pi = tf.reduce_logsumexp(gauss_log_pi + log_weights, axis=-1) log_pi -= tf.reduce_logsumexp(log_weights, axis=-1) log_pi -= self.get_squash_correction(z) log_pi *= self._temp return log_pi[:, None], action
def __init__(self, state_dim, act_dim, hid_top): self.variables_v = tf.trainable_variables() self.input = tfl.input_data([None, state_dim]) net = self.input for h in hid_top: net = tfl.fully_connected(net, h, activation='tanh') net = tfl.fully_connected(net, 1, activation='linear') self.vpred = tf.squeeze(net, axis=[1]) self.variables_v = tf.trainable_variables()[len(self.variables_v):] self.variables_p = tf.trainable_variables() net = self.input for h in hid_top: net = tfl.fully_connected(net, h, activation='tanh') mean = tfl.fully_connected(net, act_dim, activation='linear') logstd = tf.Variable(initial_value=np.zeros(act_dim).astype(np.float32)) self.variables_p = tf.trainable_variables()[len(self.variables_p):] self.mvn = MultivariateNormalDiag(mean, tf.exp(logstd)) self.sample = self.mvn.sample()
def __init__(self, state_dim, act_dim): self.input = tfl.input_data([None, state_dim]) self.v_variables = tf.trainable_variables() net = self.input for h in [64, 64]: # 80, 40, 5 net = tfl.fully_connected(net, h, activation='tanh') net = tfl.fully_connected(net, 1, activation='linear') self.vpred = tf.squeeze(net, axis=[1]) self.v_variables = tf.trainable_variables()[len(self.v_variables):] self.p_variables = tf.trainable_variables() net = self.input for h in [64, 64]: # 80, 50, 20 net = tfl.fully_connected(net, h, activation='tanh') self.mean = tfl.fully_connected(net, act_dim, activation='linear') self.var = tf.exp( tf.Variable(initial_value=np.zeros(act_dim).astype(np.float32))) self.mvn = MultivariateNormalDiag(self.mean, self.var) self.sample = self.mvn.sample() self.p_variables = tf.trainable_variables()[len(self.p_variables):]
def build_sample_standard(self): total_sum = [0.0 for y in range(self.num_outputs)] for i in range(self.num_outputs): k = tf.squeeze( tf.random_uniform(shape=[1], minval=0, maxval=self.num_components, dtype=tf.int32)) mu_f, sigma_f, mu_w, sigma_w = self.sparsity._build_intermediate_conditionals( k, self.x_test) mu_f, sigma_f, mu_w, sigma_w = self.get_expected_values( mu_f, sigma_f, mu_w, sigma_w) pi_k = self.q_weights[k] wf_sum = 0 latent_sum = [0.0 for j in range(self.num_latent)] for j in range(self.num_latent): mu_fj = tf.squeeze(mu_f[j, :, :]) mu_wij = tf.squeeze(mu_w[i, j, :, :]) sigma_fk = sigma_f[j, :, :] sigma_wik = sigma_w[i, j, :, :] sigma_fk_chol = tf.cholesky(sigma_fk) sig_w_chol = tf.cholesky(sigma_wik) f = MultivariateNormalTriL(loc=mu_fj, scale_tril=sigma_fk_chol).sample() w = MultivariateNormalTriL(loc=mu_wij, scale_tril=sig_w_chol).sample() w = tf.diag(tf.squeeze(w)) wf = tf.matmul(w, tf.expand_dims(f, 1)) latent_sum[j] += pi_k * tf.squeeze(wf) latent_sum = tf.stack(latent_sum) latent_sum = tf.squeeze(tf.reduce_sum(latent_sum, axis=0)) y = MultivariateNormalDiag( loc=latent_sum, scale_diag=tf.sqrt(util.var_postive(self.sigma_y[i])) * tf.ones(tf.shape(self.x_test)[0])).sample() y = tf.Print(y, [latent_sum], 'latent_sum') y = tf.Print(y, [y], 'y') #y = latent_sum total_sum[i] += y total_sum = tf.stack(total_sum, axis=1) return total_sum
def sampling_func(y_pred): out_mu, out_sigma, out_pi = tf.split(y_pred, num_or_size_splits=[num_mixes * output_dim, num_mixes * output_dim, num_mixes], axis=1, name='mdn_coef_split') cat = Categorical(logits=out_pi) component_splits = [output_dim] * num_mixes mus = tf.split(out_mu, num_or_size_splits=component_splits, axis=1) sigs = tf.split(out_sigma, num_or_size_splits=component_splits, axis=1) coll = [MultivariateNormalDiag(loc=loc, scale_diag=scale) for loc, scale in zip(mus, sigs)] mixture = Mixture(cat=cat, components=coll) samp = mixture.sample() # Todo: temperature adjustment for sampling function. return samp
def loss_func(y_true, y_pred): out_mu, out_sigma, out_pi = tf.split(y_pred, num_or_size_splits=[num_mixes * output_dim, num_mixes * output_dim, num_mixes], axis=1, name='mdn_coef_split') cat = Categorical(logits=out_pi) component_splits = [output_dim] * num_mixes mus = tf.split(out_mu, num_or_size_splits=component_splits, axis=1) sigs = tf.split(out_sigma, num_or_size_splits=component_splits, axis=1) coll = [MultivariateNormalDiag(loc=loc, scale_diag=scale) for loc, scale in zip(mus, sigs)] mixture = Mixture(cat=cat, components=coll) loss = mixture.log_prob(y_true) loss = tf.negative(loss) loss = tf.reduce_mean(loss) return loss
def build_graph_monte_carlo(self): f_k_arr = [0.0 for j in range(self.num_latent)] w_k_arr = [[0.0 for j in range(self.num_latent)] for i in range(self.num_outputs)] total_sum = [0.0 for y in range(self.num_outputs)] num_test = self.x_test.get_shape().as_list()[0] print('build_graph') for i in range(self.num_outputs): wf_sum = 0 k = util.sample_index_with_prob_weights(self.q_weights, self.num_outputs) mu_f, sigma_f, mu_w, sigma_w = self.sparsity._build_intermediate_conditionals(k, self.x_test) mu_f, sigma_f, mu_w, sigma_w = self.model.get_expected_values(mu_f, sigma_f, mu_w, sigma_w) pi_k = self.q_weights[k] latent_sum = [0.0 for j in range(self.num_latent)] for j in range(self.num_latent): mu_fj = tf.squeeze(mu_f[j,:, :]) mu_wij = tf.squeeze(mu_w[i,j,:,:]) sigma_fk = sigma_f[j,:,:] sigma_wik = sigma_w[i,j,:,:] sigma_fk_chol = tf.cholesky(sigma_fk) sig_w = tf.cholesky(sigma_wik) f_kj_arr = MultivariateNormalTriL(loc=mu_fj, scale_tril=sigma_fk_chol).sample() w_kij_arr = MultivariateNormalTriL(loc=mu_wij, scale_tril=sig_w).sample() f_kj_arr = tf.expand_dims(tf.squeeze(f_kj_arr), 1) w_kij_arr = tf.diag(tf.squeeze(w_kij_arr)) wf = tf.matmul(w_kij_arr, f_kj_arr) latent_sum[j] = wf latent_sum = tf.reduce_sum(latent_sum, axis=0) wf_sum += tf.squeeze(latent_sum) y = MultivariateNormalDiag(loc=wf_sum, scale_identity_multiplier=util.var_postive(self.sigma_y)).sample() #y = tf.expand_dims(y, 1) y = tf.squeeze(y) total_sum[i] += y total_sum = tf.stack(total_sum, axis=1) return total_sum
class MMDEvaluator: def __init__(self, z_dim, repeats_count: int = 3, samples_limit: int = 2000): self.__z_dim = z_dim self.__repeats_count = repeats_count self.__samples_limit = samples_limit def build(self): self.__tensor_z_encoded = tf.placeholder(shape=np.append([None], self.__z_dim), dtype=tf.float32) self.__distr = MultivariateNormalDiag(loc=tf.zeros(self.__z_dim), scale_diag=tf.ones(self.__z_dim)) self.__tensor_z_sampled = self.__distr.sample( tf.shape(self.__tensor_z_encoded)[0]) self.__tensor_mmd_penalty = mmd_penalty(self.__tensor_z_encoded, self.__tensor_z_sampled) def __compute_wae_distance(self, session, latent): mmd_penalty_sum = 0 feed_dict = { self.__tensor_z_encoded: latent, } for _ in range(self.__repeats_count): mmd_penalty_sum += session.run(self.__tensor_mmd_penalty, feed_dict) avg_mmd_penalty = mmd_penalty_sum / self.__repeats_count return avg_mmd_penalty def evaluate(self, session, z): print('Computing MMD') if z.shape[0] > self.__samples_limit: index = np.random.choice(z.shape[0], self.__samples_limit, replace=False) wae_distance = self.__compute_wae_distance(session, z[index]) else: wae_distance = self.__compute_wae_distance(session, z) return [('wae_distance', wae_distance)]
def set_input_shape(self, input_shape, reuse): batch_size, rows, cols, input_channels = input_shape kernel_shape = tuple( self.kernel_shape) + (input_channels, self.output_channels) assert len(kernel_shape) == 4 assert all(isinstance(e, int) for e in kernel_shape), kernel_shape with tf.variable_scope(self.scope_name + '_init', reuse): init = tf.truncated_normal(kernel_shape, stddev=0.2, dtype=tf.float32) self.kernels = tf.get_variable("k", initializer=init) k_summ = tf.summary.histogram(name="k", values=self.kernels) if self.binary: from tensorflow.contrib.distributions import Bernoulli with self.G.gradient_override_map( {"Bernoulli": "QuantizeGrad"}): self.kernels = 2. * Bernoulli( probs=hard_sigmoid( self.kernels), dtype=tf.float32).sample() - 1. else: from tensorflow.contrib.distributions import MultivariateNormalDiag with self.G.gradient_override_map( {"MultivariateNormalDiag": "QuantizeGrad"}): self.kernels = MultivariateNormalDiag( loc=self.kernels).sample() k_rand_summ = tf.summary.histogram(name="k_rand", values=self.kernels) orig_input_batch_size = input_shape[0] input_shape = list(input_shape) input_shape[0] = 1 dummy_batch = tf.zeros(input_shape) dummy_output = self.fprop(dummy_batch, False) output_shape = [int(e) for e in dummy_output.get_shape()] output_shape[0] = 1 self.output_shape = tuple(output_shape)
def build_sample_single_gp(self): total_sum = [0.0 for y in range(self.num_outputs)] i = 0 j = 0 k = tf.squeeze( tf.random_uniform(shape=[1], minval=0, maxval=self.num_components, dtype=tf.int32)) noise_sigma = tf.square(util.var_postive(self.sigma_y[0])) mu_f, sigma_f, _, _ = self.sparsity._build_intermediate_conditionals( k, 0, self.x_test, predict=False) mu_f, sigma_f = self.get_expected_values(mu_f, sigma_f) pi_k = self.q_weights[k] wf_sum = 0 latent_sum = [0.0 for j in range(self.num_latent)] mu_fj = tf.squeeze(mu_f[j, :, :]) sigma_fk = sigma_f[j, :, :] sigma_fk_chol = tf.cast( tf.cholesky(tf.cast(util.add_jitter(sigma_fk, 1e-4), tf.float64)), tf.float32) f = MultivariateNormalTriL(loc=mu_fj, scale_tril=sigma_fk_chol).sample() latent_sum[j] += pi_k * tf.squeeze(f) latent_sum = tf.stack(latent_sum) latent_sum = tf.squeeze(tf.reduce_sum(latent_sum, axis=0)) y = MultivariateNormalDiag(loc=latent_sum, scale_diag=tf.sqrt(noise_sigma) * tf.ones(tf.shape(self.x_test)[0])).sample() total_sum[i] += y total_sum = tf.stack(total_sum, axis=1) return total_sum
def __init__(self, layer_index, kern, output_dim, n_inducing, X, fixed_mean=True,n_sample=100, eps_dim=32): print("=== Layer {} summary ===".format(layer_index)) print("--- Input dimension: ",kern.input_dim) print("--- Output dimension: ",output_dim) eps_dim=int(n_inducing*output_dim) self.layer_index = layer_index self.kernel = kern self.input_dim, self.output_dim, self.eps_dim = kern.input_dim, output_dim, eps_dim self.n_sample = n_sample self.n_inducing = n_inducing self.fixed_mean = fixed_mean # bool self.prior_mean = [0.0] * int(n_inducing*output_dim) self.prior_var = [1.0] * int(n_inducing*output_dim) self.mu, self.scale = [0.] * eps_dim, [1.0] * eps_dim self.eps_sampler = MultiNormal(self.mu, self.scale) ################################################################################### with tf.variable_scope('theta'): self.Z = tf.Variable(kmeans2(X, self.n_inducing, minit='points')[0], dtype=tf.float64, name='Z') ################################################################################### self.gan = GAN(self.n_inducing, self.output_dim, self.layer_index, self.input_dim) ################################################################################### if self.input_dim == self.output_dim: self.W_skiplayer = np.eye(self.input_dim) elif self.input_dim < self.output_dim: self.W_skiplayer = np.concatenate([np.eye(self.input_dim), np.zeros((self.input_dim, self.output_dim - self.input_dim))], axis=1) else: _, _, V = np.linalg.svd(X, full_matrices=False) self.W_skiplayer = V[:self.output_dim, :].T ################################################################################### """ 1. trainable=False because this is the prior, and it is not a variable to be learn from SGD; 2. exist because ??? """ self.U = tf.Variable(np.zeros((self.n_inducing, self.output_dim)), dtype=tf.float64, trainable=False, name='U')
def _build_anet(self, name, trainable): with tf.variable_scope(name): w_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003) l1 = tf.layers.dense(self.tfs, 200, tf.nn.relu, trainable=trainable) mu1 = tf.layers.dense(l1, 1, tf.nn.tanh, trainable=trainable, kernel_initializer=w_init) sigma1 = tf.layers.dense(l1, 1, tf.nn.sigmoid, trainable=trainable, kernel_initializer=w_init) mu2 = tf.layers.dense(l1, 1, tf.nn.sigmoid, trainable=trainable) sigma2 = tf.layers.dense(l1, 1, tf.nn.sigmoid, trainable=trainable) # mu2 = tf.layers.dense(l1, 1, tf.nn.sigmoid, trainable=trainable) # sigma2 = tf.layers.dense(l1, 1, tf.nn.sigmoid, trainable=trainable) mu3 = tf.layers.dense(l1, 1, tf.nn.sigmoid, trainable=trainable, kernel_initializer=w_init) sigma3 = tf.layers.dense(l1, 1, tf.nn.sigmoid, trainable=trainable, kernel_initializer=w_init) mu = tf.concat([mu1, mu2, mu3], axis=1) sigma = tf.concat([sigma1, sigma2, sigma3], axis=1) norm_dist = MultivariateNormalDiag(loc=mu, scale_diag=sigma) # print([mu1,mu2,mu3].eval()) params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name) return norm_dist, params
def one_step_IAF(self, a, x): z = a[0] #log_q = a[1] u, enc = x input_h = tf.concat([z, enc, u], 1) #input should have enc(x), u and previous z h = self.q_henc(input_h) #h encoding for iaf q_mean, q_var = self.q_transition(z, enc, u) p_mean, p_var = self.p_transition(z, u) q = MultivariateNormalDiag(q_mean, tf.sqrt(q_var)) p = MultivariateNormalDiag(p_mean, tf.sqrt(p_var)) z_step = q.sample() log_q = q.log_prob(z_step) #before performing the iaf step z_step_iaf, q_var = self.q_transition_IAF(z_step, h) log_q = log_q - tf.reduce_sum(tf.log(q_var + 1e-5), axis=1) #after performing the iaf step log_p = p.log_prob(z_step_iaf) #TODO: check if this is correct? Should we be getting the probability of z_step or z_step_iaf? return z_step_iaf, log_q, log_p
class Layer(object): def __init__(self, layer_index, kern, output_dim, n_inducing, X, n_sample=100, fixed_mean=True): eps_dim = int(n_inducing * output_dim) self.layer_index = layer_index self.kernel = kern self.input_dim = kern.input_dim self.output_dim = output_dim self.eps_dim = eps_dim self.n_sample = n_sample self.n_inducing = n_inducing self.fixed_mean = fixed_mean # bool, Defatl = True for all layers before the last layer. print("========= Layer {} summary =========".format(layer_index)) print("::::: LAYOUT") print("----- [Input dimension] : ", self.input_dim) print("----- [Output dimension] : ", self.output_dim) """ The prior distribution is set to be i.i.d Gaussian distributions. """ """================== Initialization of the inducing point ==================""" with tf.variable_scope('theta'): # scope [theta] self.Z = tf.Variable(kmeans2(X, self.n_inducing, minit='points')[0], dtype=tf.float64, name='Z') """================== Initialization of the GAN and noise sampler ==================""" self.gan = GAN(self.n_inducing, self.output_dim, self.input_dim, self.layer_index) _prior_mean = 0.0 _prior_var = 1.0 self.prior_mean = [_prior_mean] * int(n_inducing * output_dim) self.prior_var = [_prior_var] * int(n_inducing * output_dim) self.mu, self.scale = [0.] * eps_dim, [1.0] * eps_dim # In the paper we use a single global eps while in this implementation we disentangle them. self.eps_sampler = MultiNormal(self.mu, self.scale) print("----- [Prior mean] : ", _prior_mean) print("----- [Prior var] : ", _prior_var) """================== Initialization of the skip layer connection ==================""" if self.input_dim == self.output_dim: self.W_skiplayer = np.eye(self.input_dim) elif self.input_dim < self.output_dim: self.W_skiplayer = np.concatenate([ np.eye(self.input_dim), np.zeros((self.input_dim, self.output_dim - self.input_dim)) ], axis=1) else: _, _, V = np.linalg.svd(X, full_matrices=False) self.W_skiplayer = V[:self.output_dim, :].T """ return the mean & cov in X given inducing points&values""" def gan_base_conditional(self, Kmn, Kmm, Knn, f, full_cov=False, q_sqrt=None, white=False): if full_cov != False: print("ERROR! full_cov NOT IMPLEMENTED!") num_func = f.shape[2] # R Lm = tf.cholesky(Kmm) # Compute the projection matrix A A = tf.matrix_triangular_solve(Lm, Kmn, lower=True) # Compute the covariance due to the conditioning fvar = Knn - tf.reduce_sum(tf.square(A), 0) fvar = tf.tile(fvar[None, :], [num_func, 1]) # R x N # Another backsubstitution in the unwhitened case if not white: A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False) fmean = tf.einsum("zx,nzo->nxo", A, f) if q_sqrt is not None: if q_sqrt.get_shape().ndims == 2: LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2) # R x M x N elif q_sqrt.get_shape().ndims == 3: L = q_sqrt A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([num_func, 1, 1])) LTA = tf.matmul(L, A_tiled, transpose_a=True) # R x M x N else: # pragma: no cover raise ValueError("Bad dimension for q_sqrt: %s" % str(q_sqrt.get_shape().ndims)) fvar = fvar + tf.reduce_sum(tf.square(LTA), 1) # R x N fvar = tf.transpose(fvar) return fmean, fvar # n_sample x N x R, N x R def gan_conditional(self, X): """ Given f, representing the GP at the points X, produce the mean and (co-)variance of the GP at the points Xnew. Additionally, there may be Gaussian uncertainty about f as represented by q_sqrt. In this case `f` represents the mean of the distribution and q_sqrt the square-root of the covariance. :: [params] :: white Additionally, the GP may have been centered (whitened) so that p(v) = N(0, I) f = L v thus p(f) = N(0, LL^T) = N(0, K). In this case `f` represents the values taken by v. The method can either return the diagonals of the covariance matrix for each output (default) or the full covariance matrix (full_cov=True). Let R = output_dim, N = N_x, M = n_inducing; We assume R independent GPs, represented by the columns of f (and the first dimension of q_sqrt). :param Xnew: data matrix, size N x D. Evaluate the GP at these new points :param X: data points, size M x D. :param kern: GPflow kernel. :param f: data matrix, M x R, representing the function values at X, for K functions. :param q_sqrt: matrix of standard-deviations or Cholesky matrices, size M x R or R x M x M. :param white: boolean of whether to use the whitened representation as described above. :return: - mean: N x R - variance: N x R (full_cov = False), R x N x N (full_cov = True) """ self.eps = tf.reshape( self.eps_sampler.sample(self.n_sample), # n_sample * self.eps_dim [self.n_sample, self.n_inducing, self.output_dim]) self.Z_repeat = tf.cast( tf.tile(tf.reshape(self.Z, [1, self.n_inducing, self.input_dim]), [self.n_sample, 1, 1]), tf.float32) self.eps_with_z = tf.concat([self.eps, self.Z_repeat], axis=2) self.post = tf.cast(self.gan.generator(self.eps_with_z), tf.float64) # n_sample * n_inducing * output_dim Kxz = self.kernel.K(X, self.Z) Kzx = self.kernel.K(self.Z, X) Kzz = self.kernel.K( self.Z) + tf.eye(self.n_inducing, dtype=tf.float64) * 1e-7 self.Kzz = Kzz self.determinant = tf.matrix_determinant(Kzz) Kxx = self.kernel.Kdiag(X) # Just the diagonal part. mu, _var1 = self.gan_base_conditional(Kzx, Kzz, Kxx, self.post, full_cov=False, q_sqrt=None, white=True) mean = tf.reduce_mean(mu, axis=0) # n_X * output_dim _var2 = tf.einsum("nxi,nxi->xi", mu, mu) / self.n_sample _var3 = -tf.einsum("xi,xi->xi", mean, mean) var = _var1 + _var2 + _var3 # Use momentum matching for mixtures of Gaussians to estimate posterior variance. return mean, var def prior_sampler(self, prior_batch_size): self.prob_prior = MultiNormal(self.prior_mean, self.prior_var) samples = tf.reshape( self.prob_prior.sample(prior_batch_size), [prior_batch_size, self.n_inducing, self.output_dim]) return samples
def prior_sampler(self, prior_batch_size): self.prob_prior = MultiNormal(self.prior_mean, self.prior_var) samples = tf.reshape( self.prob_prior.sample(prior_batch_size), [prior_batch_size, self.n_inducing, self.output_dim]) return samples
def network(self, inputs, pi_raw_action, q_action, phase, num_samples): # TODO: Remove alpha (not using multimodal) # shared net shared_net = tf.contrib.layers.fully_connected( inputs, self.shared_layer_dim, activation_fn=None, weights_initializer=tf.contrib.layers.variance_scaling_initializer( factor=1.0, mode="FAN_IN", uniform=True), weights_regularizer=tf.contrib.layers.l2_regularizer(0.01), biases_initializer=tf.contrib.layers.variance_scaling_initializer( factor=1.0, mode="FAN_IN", uniform=True)) shared_net = self.apply_norm(shared_net, activation_fn=tf.nn.relu, phase=phase, layer_num=1) # action branch pi_net = tf.contrib.layers.fully_connected( shared_net, self.actor_layer_dim, activation_fn=None, weights_initializer=tf.contrib.layers.variance_scaling_initializer( factor=1.0, mode="FAN_IN", uniform=True), weights_regularizer=None, biases_initializer=tf.contrib.layers.variance_scaling_initializer( factor=1.0, mode="FAN_IN", uniform=True)) pi_net = self.apply_norm(pi_net, activation_fn=tf.nn.relu, phase=phase, layer_num=2) # no activation pi_mu = tf.contrib.layers.fully_connected( pi_net, self.num_modal * self.action_dim, activation_fn=None, weights_initializer=tf.contrib.layers.variance_scaling_initializer( factor=1.0, mode="FAN_IN", uniform=True), # weights_initializer=tf.random_uniform_initializer(-3e-3, 3e-3), weights_regularizer=None, # tf.contrib.layers.l2_regularizer(0.001), biases_initializer=tf.contrib.layers.variance_scaling_initializer( factor=1.0, mode="FAN_IN", uniform=True)) # biases_initializer=tf.random_uniform_initializer(-3e-3, 3e-3)) pi_logstd = tf.contrib.layers.fully_connected( pi_net, self.num_modal * self.action_dim, activation_fn=tf.tanh, weights_initializer=tf.random_uniform_initializer(0, 1), weights_regularizer=None, # tf.contrib.layers.l2_regularizer(0.001), biases_initializer=tf.random_uniform_initializer(-3e-3, 3e-3)) pi_alpha = tf.contrib.layers.fully_connected( pi_net, self.num_modal, activation_fn=tf.tanh, weights_initializer=tf.random_uniform_initializer(-3e-3, 3e-3), weights_regularizer=None, # tf.contrib.layers.l2_regularizer(0.001), biases_initializer=tf.random_uniform_initializer(-3e-3, 3e-3)) # reshape output assert (self.num_modal == 1) # pi_mu = tf.reshape(pi_mu, [-1, self.num_modal, self.action_dim]) # pi_logstd = tf.reshape(pi_logstd, [-1, self.num_modal, self.action_dim]) # pi_alpha = tf.reshape(pi_alpha, [-1, self.num_modal, 1]) pi_mu = tf.reshape(pi_mu, [-1, self.action_dim]) pi_logstd = tf.reshape(pi_logstd, [-1, self.action_dim]) pi_alpha = tf.reshape(pi_alpha, [-1, 1]) # exponentiate logstd # pi_std = tf.exp(tf.scalar_mul(self.sigma_scale, pi_logstd)) pi_std = tf.exp(self.LOG_STD_MIN + 0.5 * (self.LOG_STD_MAX - self.LOG_STD_MIN) * (pi_logstd + 1)) # construct MultivariateNormalDiag dist. mvn = MultivariateNormalDiag(loc=pi_mu, scale_diag=pi_std) if self.actor_update == "reparam": # pi = mu + tf.random_normal(tf.shape(mu)) * std # logp_pi = self.gaussian_likelihood(pi, mu, log_std) # pi_mu: (batch_size, action_dim) # (batch_size x num_samples, action_dim) # If updating multiple samples stacked_pi_mu = tf.expand_dims(pi_mu, 1) stacked_pi_mu = tf.tile(stacked_pi_mu, [1, num_samples, 1]) stacked_pi_mu = tf.reshape( stacked_pi_mu, (-1, self.action_dim)) # (batch_size * num_samples, action_dim) stacked_pi_std = tf.expand_dims(pi_std, 1) stacked_pi_std = tf.tile(stacked_pi_std, [1, num_samples, 1]) stacked_pi_std = tf.reshape( stacked_pi_std, (-1, self.action_dim)) # (batch_size * num_samples, action_dim) noise = tf.random_normal(tf.shape(stacked_pi_mu)) # (batch_size * num_samples, action_dim) pi_raw_samples = stacked_pi_mu + noise * stacked_pi_std pi_raw_samples_logprob = self.gaussian_loglikelihood( pi_raw_samples, stacked_pi_mu, stacked_pi_std) pi_raw_samples = tf.reshape(pi_raw_samples, (-1, num_samples, self.action_dim)) pi_raw_samples_logprob = tf.reshape( pi_raw_samples_logprob, (-1, num_samples, self.action_dim)) else: pi_raw_samples_og = mvn.sample(num_samples) # dim: (batch_size, num_samples, action_dim) pi_raw_samples = tf.transpose(pi_raw_samples_og, [1, 0, 2]) # get raw logprob pi_raw_samples_logprob_og = mvn.log_prob(pi_raw_samples_og) pi_raw_samples_logprob = tf.transpose(pi_raw_samples_logprob_og, [1, 0, 2]) # apply tanh pi_mu = tf.tanh(pi_mu) pi_samples = tf.tanh(pi_raw_samples) pi_samples_logprob = pi_raw_samples_logprob - tf.reduce_sum(tf.log( self.clip_but_pass_gradient(1 - pi_samples**2, l=0, u=1) + 1e-6), axis=-1) pi_mu = tf.multiply(pi_mu, self.action_max) pi_samples = tf.multiply(pi_samples, self.action_max) # compute logprob for input action pi_raw_actions_logprob = mvn.log_prob(pi_raw_action) pi_action = tf.tanh(pi_raw_action) pi_actions_logprob = pi_raw_actions_logprob - tf.reduce_sum(tf.log( self.clip_but_pass_gradient(1 - pi_action**2, l=0, u=1) + 1e-6), axis=-1) # TODO: Remove alpha # compute softmax prob. of alpha max_alpha = tf.reduce_max(pi_alpha, axis=1, keepdims=True) pi_alpha = tf.subtract(pi_alpha, max_alpha) pi_alpha = tf.exp(pi_alpha) normalize_alpha = tf.reciprocal( tf.reduce_sum(pi_alpha, axis=1, keepdims=True)) pi_alpha = tf.multiply(normalize_alpha, pi_alpha) # Q branch with tf.variable_scope('qf'): q_actions_prediction = self.q_network(shared_net, q_action, phase) with tf.variable_scope('qf', reuse=True): # if len(tf.shape(pi_samples)) == 3: pi_samples_reshaped = tf.reshape( pi_samples, (self.batch_size * num_samples, self.action_dim)) # else: # assert(len(tf.shape(pi_samples)) == 2) # pi_samples_reshaped = pi_samples q_samples_prediction = self.q_network(shared_net, pi_samples_reshaped, phase) # print(pi_raw_action, pi_action) # print(pi_raw_actions_logprob, pi_raw_actions_logprob) # print(pi_action, pi_actions_logprob) return pi_alpha, pi_mu, pi_std, pi_raw_samples, pi_samples, pi_samples_logprob, pi_actions_logprob, q_samples_prediction, q_actions_prediction
def compute_prob(self,x,h): mu,rho = self.compute_params(x) return MultivariateNormalDiag(mu, rho, validate_args=False).pdf(h)
std_encoder = tf.exp(0.5 * logvar_encoder)+1e-5 std_encoder_samples = tf.reshape(tf.tile(std_encoder, [1, n_samples]), [-1, latent_dim]) z = mu_encoder_samples + tf.multiply(std_encoder_samples, epsilon) W_decoder_z_hidden = weight_variable([latent_dim, hidden_decoder_dim],"W_decoder_z_hidden") b_decoder_z_hidden = bias_variable([hidden_decoder_dim],"b_decoder_z_hidden") # Hidden layer decoder hidden_decoder = tf.nn.relu(tf.matmul(z, W_decoder_z_hidden) + b_decoder_z_hidden) #log_pdfs_reconstruct, std_decoder, mu_decoder, bt, at, b, a,term_sup, term_inf, dawson_inf, dawson_sup, h_z1 = gaussian_Renyi_cdf_decoder(hidden_decoder, x_samples) std_decoder, mu_decoder, bt, at, b, a,dawson_sup, h_z1,elem1, elem2, log_Id, term1, term1_2, term2, rez,log_pxz, pxz, log_cdf2_pxz = gaussian_Renyi_cdf_decoder(hidden_decoder, x_samples) # evaluate the pdf q(z|x_samples) log_pdf_qzx = MultivariateNormalDiag(mu_encoder_samples,std_encoder_samples).log_prob(z) pdf_qzx = MultivariateNormalDiag(mu_encoder_samples,std_encoder_samples).prob(z) # evaluate the pdf p(z) mu_z = tf.constant(0.0, shape=[batch_size,latent_dim], dtype=tf_type) mu_z_samples = tf.reshape(tf.tile(mu_z, [1, n_samples]), [-1, latent_dim]) std_z = tf.constant(1.0, shape=[batch_size,latent_dim], dtype=tf_type) std_z_samples = tf.reshape(tf.tile(std_z, [1, n_samples]), [-1, latent_dim]) log_pdf_pz = MultivariateNormalDiag(mu_z_samples,std_z_samples).log_prob(z) pdf_pz = MultivariateNormalDiag(mu_z_samples,std_z_samples).prob(z) h_z2 = log_pdf_qzx-log_pdf_pz sum_h_z1 = tf.reduce_sum(h_z1,1)