def variational_model_fn(self): qw = yield JDCRoot( Independent( tfd.Normal(loc=self.qw_loc_var, scale=tf.nn.softplus(self.qw_softplus_scale_var)))) qz_scale = yield JDCRoot( Independent( SoftplusNormal(loc=self.qz_scale_loc_var, scale=tf.nn.softplus( self.qz_scale_softplus_scale_var)))) qF_test = yield JDCRoot( Independent( tfd.RelaxedOneHotCategorical( temperature=self.qF_temperature_var, logits=self.qF_logits_var))) # qz = yield JDCRoot(Independent(tfd.Normal( # loc=qz_loc_var, # scale=tf.nn.softplus(qz_softplus_scale_var)))) qz = yield JDCRoot(Independent(tfd.Deterministic(loc=self.qz_loc_var))) qx_bias = yield JDCRoot( Independent( tfd.Normal(loc=self.qx_bias_loc_var, scale=tf.nn.softplus( self.qx_bias_softplus_scale_var)))) qx_scale_concentration_c = yield JDCRoot( Independent( tfd.Deterministic(loc=tf.nn.softplus( self.qx_scale_concentration_c_loc_var)))) qx_scale_mode_c = yield JDCRoot( Independent( tfd.Deterministic( loc=tf.nn.softplus(self.qx_scale_mode_c_loc_var)))) qx_scale = yield JDCRoot( Independent( SoftplusNormal(loc=self.qx_scale_loc_var, scale=tf.nn.softplus( self.qx_scale_softplus_scale_var)))) if self.use_point_estimates: qx = yield JDCRoot( Independent(tfd.Deterministic(loc=self.qx_loc_var))) else: qx = yield JDCRoot( Independent( tfd.Normal(loc=self.qx_loc_var, scale=tf.nn.softplus( self.qx_softplus_scale_var)))) qrnaseq_reads = yield JDCRoot( Independent(tfd.Deterministic(tf.zeros([self.num_samples]))))
def __call__(self): """Get the distribution object from the backend""" if get_backend() == 'pytorch': raise NotImplementedError else: from tensorflow_probability import distributions as tfd return tfd.Deterministic(self.loc)
def feed_forward( state, data_shape, num_layers=2, activation=tf.nn.relu, mean_activation=None, stop_gradient=False, trainable=True, units=100, std=1.0, low=-1.0, high=1.0, dist='normal'): """Create a model returning unnormalized MSE distribution.""" hidden = state if stop_gradient: hidden = tf.stop_gradient(hidden) for _ in range(num_layers): hidden = tf.compat.v1.layers.dense(hidden, units, activation) mean = tf.compat.v1.layers.dense( hidden, int(np.prod(data_shape)), mean_activation, trainable=trainable) mean = tf.reshape(mean, tools.shape(state)[:-1] + data_shape) if std == 'learned': std = tf.compat.v1.layers.dense( hidden, int(np.prod(data_shape)), None, trainable=trainable) std = tf.nn.softplus(std + 0.55) + 0.01 std = tf.reshape(std, tools.shape(state)[:-1] + data_shape) if dist == 'normal': dist = tfd.Normal(mean, std) elif dist == 'truncated_normal': # https://www.desmos.com/calculator/3o96eyqxib dist = tfd.TruncatedNormal(mean, std, low, high) elif dist == 'tanh_normal': # https://www.desmos.com/calculator/sxpp7ectjv dist = tfd.Normal(mean, std) dist = tfd.TransformedDistribution(dist, tfp.bijectors.Tanh()) elif dist == 'deterministic': dist = tfd.Deterministic(mean) else: raise NotImplementedError(dist) dist = tfd.Independent(dist, len(data_shape)) return dist
def one_step_model(state, prev_action, data_shape, model_width_factor, max_objective=False, dist='deterministic'): num_layers = 2 activation = tf.nn.relu units = data_shape[0] * model_width_factor state = tf.stop_gradient(state) prev_action = tf.stop_gradient(prev_action) inputs = tf.concat([state, prev_action], -1) for _ in range(num_layers): hidden = tf.layers.dense(inputs, units, activation) inputs = tf.concat([hidden, prev_action], -1) mean = tf.layers.dense(inputs, int(np.prod(data_shape)), None) mean = tf.reshape(mean, tools.shape(state)[:-1] + data_shape) if max_objective: min_std = 1e-2 init_std = 1.0 std = tf.layers.dense(inputs, int(np.prod(data_shape)), None) init_std = np.log(np.exp(init_std) - 1) std = tf.nn.softplus(std + init_std) + min_std std = tf.reshape(std, tools.shape(state)[:-1] + data_shape) dist = tfd.Normal(mean, std) dist = tfd.Independent(dist, len(data_shape)) else: dist = tfd.Deterministic(mean) dist = tfd.Independent(dist, len(data_shape)) return dist
def __call__(self): """Get the distribution object from the backend""" if get_backend() == 'pytorch': TorchDeterministic = get_TorchDeterministic() return TorchDeterministic(self['loc']) else: from tensorflow_probability import distributions as tfd return tfd.Deterministic(self['loc'])
def network(data, layer_sizes=[256, 256], ncp_scale=0.1): ''' Defines network topology ''' # Define neural network topology (in this case, a simple MLP) hidden = data[0] labels = data[1] for size in layer_sizes[:-1]: hidden = tf.layers.dense(inputs=hidden, units=size, activation=tf.nn.leaky_relu) weight_std = 0.1 init_std = np.log(np.exp(weight_std) - 1).astype(np.float32) kernel_posterior = tfd.Independent( tfd.Normal( tf.get_variable('kernel_mean', (hidden.shape[-1].value, layer_sizes[-1]), tf.float32, tf.random_normal_initializer(0, weight_std)), tf.nn.softplus( tf.get_variable('kernel_std', (hidden.shape[-1].value, layer_sizes[-1]), tf.float32, tf.constant_initializer(init_std)))), 2) kernel_prior = tfd.Independent( tfd.Normal( tf.zeros_like(kernel_posterior.mean()), tf.zeros_like(kernel_posterior.mean()) + tf.nn.softplus(init_std)), 2) bias_prior = None bias_posterior = tfd.Deterministic( tf.get_variable('bias_mean', (layer_sizes[-1], ), tf.float32, tf.constant_initializer(0.0))) logits = tfp.layers.DenseReparameterization( layer_sizes[-1], kernel_prior_fn=lambda *args, **kwargs: kernel_prior, kernel_posterior_fn=lambda *args, **kwargs: kernel_posterior, bias_prior_fn=lambda *args, **kwargs: bias_prior, bias_posterior_fn=lambda *args, **kwargs: bias_posterior)(hidden) standard_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)) logits = logits - tf.reduce_mean(logits) class_probabilities = tf.nn.softmax( logits * tf.constant(ncp_scale, dtype=tf.float32)) entropy = -class_probabilities * tf.log( tf.clip_by_value(class_probabilities, 1e-20, 1)) if NORMALIZE_ENTROPY is True: baseK = tf.constant(layer_sizes[-1], dtype=tf.float32, shape=(layer_sizes[-1], )) entropy /= tf.log(baseK) mean, variance = tf.nn.moments(entropy, axes=[1]) ncp_loss = tf.reduce_mean(mean) ncp_std = tf.reduce_mean(tf.math.sqrt(variance)) return standard_loss, ncp_loss, logits, ncp_std
def _init_distribution(conditions, **kwargs): return tfd.Mixture( cat=tfd.Categorical( probs=[1.0 - conditions["psi"], conditions["psi"]]), components=[ tfd.Deterministic(loc=tf.zeros_like(conditions["theta"])), tfd.Poisson(rate=conditions["theta"]), ], **kwargs, )
def _init_distribution(conditions, **kwargs): return tfd.Mixture( cat=tfd.Categorical( probs=[1 - conditions["psi"], conditions["psi"]]), components=[ tfd.Deterministic(loc=tf.zeros_like(conditions["n"])), tfd.Binomial(total_count=conditions["n"], probs=conditions["p"]), ], **kwargs, )
def mix(gamma, eta, loc, scale, neg_inf): _gamma = gamma[..., tf.newaxis] # FIXME: Possible to use tfd.Blockwise? return tfd.Mixture( cat=tfd.Categorical(probs=tf.concat([_gamma, 1 - _gamma], axis=-1)), components=[ tfd.Deterministic(np.float64(neg_inf)), tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical(probs=eta), components_distribution=tfd.Normal(loc=loc, scale=scale)), ])
def _base_dist(self, *args, **kwargs): """ Zero-inflated Poisson base distribution. A ZeroInflatedPoisson is a mixture between a deterministic distribution and a Poisson distribution. """ mix = kwargs.pop("mix") return tfd.Mixture( cat=tfd.Categorical(probs=[mix, 1.0 - mix]), components=[tfd.Deterministic(0.0), tfd.Poisson(*args, **kwargs)], name="ZeroInflatedPoisson", )
def _init_distribution(conditions, **kwargs): return tfd.Mixture( cat=tfd.Categorical( probs=[1.0 - conditions["psi"], conditions["psi"]]), components=[ tfd.Deterministic(loc=tf.zeros_like(conditions["mu"])), tfd.NegativeBinomial( total_count=conditions["alpha"], probs=(conditions["mu"]) / (conditions["mu"] + conditions["alpha"]), ), ], **kwargs, )
def network(inputs, config): init_std = np.log(np.exp(config.weight_std) - 1).astype(np.float32) hidden = inputs # Define hidden layers according to config.layer_sizes for size in config.layer_sizes: hidden = tf.layers.dense(hidden, size, tf.nn.leaky_relu) #Define posterior distribution on weights as a Normal distribution with initial parameters 0 and config.weight_std kernel_posterior = tfd.Independent( tfd.Normal( tf.get_variable('kernel_mean', (hidden.shape[-1].value, 1), tf.float32, tf.random_normal_initializer(0, config.weight_std)), tf.nn.softplus( tf.get_variable('kernel_std', (hidden.shape[-1].value, 1), tf.float32, tf.constant_initializer(init_std)))), 2) #Define prior distribution on weights as Normal distribution kernel_prior = tfd.Independent( tfd.Normal( tf.zeros_like(kernel_posterior.mean()), tf.zeros_like(kernel_posterior.mean()) + tf.nn.softplus(init_std)), 2) bias_prior = None bias_posterior = tfd.Deterministic( tf.get_variable('bias_mean', (1, ), tf.float32, tf.constant_initializer(0.0))) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tfd.kl_divergence(kernel_posterior, kernel_prior)) #Create final bayesian layer which computes the mean mean = tfp.layers.DenseReparameterization( 1, kernel_prior_fn=lambda *args, **kwargs: kernel_prior, kernel_posterior_fn=lambda *args, **kwargs: kernel_posterior, bias_prior_fn=lambda *args, **kwargs: bias_prior, bias_posterior_fn=lambda *args, **kwargs: bias_posterior)(hidden) #Compute distribution of the mean mean_dist = tfd.Normal( tf.matmul(hidden, kernel_posterior.mean()) + bias_posterior.mean(), tf.sqrt(tf.matmul(hidden**2, kernel_posterior.variance()))) #Compute standard deviation through final non-bayesian dense layer (in parallel with mean layer) std = tf.layers.dense(hidden, 1, tf.nn.softplus) + 1e-6 data_dist = tfd.Normal(mean, std) return data_dist, mean_dist
def _base_dist(self, *args, **kwargs): """ Zero-inflated negative binomial base distribution. A ZeroInflatedNegativeBinomial is a mixture between a deterministic distribution and a NegativeBinomial distribution. """ mix = kwargs.pop("mix") return tfd.Mixture( cat=tfd.Categorical(probs=[mix, 1.0 - mix]), components=[ tfd.Deterministic(0.0), tfd.NegativeBinomial(*args, **kwargs) ], name="ZeroInflatedNegativeBinomial", )
def variational_model_fn(self, surrogate_likelihood_model): qx_global_scale_variance = yield JDCRoot( Independent( SoftplusNormal( loc=self.qx_global_scale_variance_loc_var, scale=tf.nn.softplus( self.qx_global_scale_variance_softplus_scale_var)))) qx_global_scale_noncentered = yield JDCRoot( Independent( SoftplusNormal( loc=self.qx_global_scale_noncentered_loc_var, scale=tf.nn.softplus( self.qx_global_scale_noncentered_softplus_scale_var)))) qx_local1_scale_variance = yield JDCRoot( Independent( SoftplusNormal( loc=self.qx_local1_scale_variance_loc_var, scale=tf.nn.softplus( self.qx_local1_scale_variance_softplus_scale_var)))) qx_local1_scale_noncentered = yield JDCRoot( Independent( SoftplusNormal( loc=self.qx_local1_scale_noncentered_loc_var, scale=tf.nn.softplus( self.qx_local1_scale_noncentered_softplus_scale_var)))) qx_bias = yield JDCRoot( Independent( tfd.Normal(loc=self.qx_bias_loc_var, scale=tf.nn.softplus( self.qx_bias_softplus_scale_var)))) if self.use_point_estimates: qx = yield JDCRoot( Independent(tfd.Deterministic(loc=self.qx_loc_var))) else: qx = yield JDCRoot( Independent( tfd.Normal(loc=self.qx_loc_var, scale=tf.nn.softplus( self.qx_softplus_scale_var)))) yield from surrogate_likelihood_model(qx)
def feed_forward( features, data_shape, num_layers=2, activation=tf.nn.relu, mean_activation=None, stop_gradient=False, trainable=True, units=100, std=1.0, low=-1.0, high=1.0, dist='normal', min_std=1e-2, init_std=1.0): hidden = features if stop_gradient: hidden = tf.stop_gradient(hidden) for _ in range(num_layers): hidden = tf.layers.dense(hidden, units, activation, trainable=trainable) mean = tf.layers.dense( hidden, int(np.prod(data_shape)), mean_activation, trainable=trainable) mean = tf.reshape(mean, tools.shape(features)[:-1] + data_shape) if std == 'learned': std = tf.layers.dense( hidden, int(np.prod(data_shape)), None, trainable=trainable) init_std = np.log(np.exp(init_std) - 1) std = tf.nn.softplus(std + init_std) + min_std std = tf.reshape(std, tools.shape(features)[:-1] + data_shape) if dist == 'normal': dist = tfd.Normal(mean, std) dist = tfd.Independent(dist, len(data_shape)) elif dist == 'deterministic': dist = tfd.Deterministic(mean) dist = tfd.Independent(dist, len(data_shape)) elif dist == 'binary': dist = tfd.Bernoulli(mean) dist = tfd.Independent(dist, len(data_shape)) elif dist == 'trunc_normal': # https://www.desmos.com/calculator/rnksmhtgui dist = tfd.TruncatedNormal(mean, std, low, high) dist = tfd.Independent(dist, len(data_shape)) elif dist == 'tanh_normal': # https://www.desmos.com/calculator/794s8kf0es dist = distributions.TanhNormal(mean, std) elif dist == 'tanh_normal_tanh': # https://www.desmos.com/calculator/794s8kf0es mean = 5.0 * tf.tanh(mean / 5.0) dist = distributions.TanhNormal(mean, std) elif dist == 'onehot_score': dist = distributions.OneHot(mean, gradient='score') elif dist == 'onehot_straight': dist = distributions.OneHot(mean, gradient='straight') else: raise NotImplementedError(dist) return dist
def network(inputs, config): init_std = np.log(np.exp(config.weight_std) - 1).astype(np.float32) hidden = inputs for size in config.layer_sizes: hidden = tf.layers.dense(hidden, size, tf.nn.leaky_relu) kernel_posterior = tfd.Independent( tfd.Normal( tf.get_variable( 'kernel_mean', (hidden.shape[-1].value, 1), tf.float32, tf.random_normal_initializer(0, config.weight_std) ), #initializes mean of weights in final layer. This is the only 'bayesian' layer. tf.nn.softplus( tf.get_variable('kernel_std', (hidden.shape[-1].value, 1), tf.float32, tf.constant_initializer(init_std)))), 2) #initializes std of weights in final layer kernel_prior = tfd.Independent( tfd.Normal( tf.zeros_like(kernel_posterior.mean()), tf.zeros_like(kernel_posterior.mean()) + tf.nn.softplus(init_std)), 2) bias_prior = None bias_posterior = tfd.Deterministic( tf.get_variable('bias_mean', (1, ), tf.float32, tf.constant_initializer(0.0))) tf.add_to_collection( tf.GraphKeys.REGULARIZATION_LOSSES, tfd.kl_divergence(kernel_posterior, kernel_prior)) #adds loss to collection mean = tfp.layers.DenseReparameterization( #ensures the kernels and biases are drawn from distributions 1, kernel_prior_fn=lambda *args, **kwargs: kernel_prior, kernel_posterior_fn=lambda *args, **kwargs: kernel_posterior, bias_prior_fn=lambda *args, **kwargs: bias_prior, bias_posterior_fn=lambda *args, **kwargs: bias_posterior)(hidden) mean_dist = tfd.Normal( tf.matmul(hidden, kernel_posterior.mean()) + bias_posterior.mean(), tf.sqrt(tf.matmul(hidden**2, kernel_posterior.variance()))) std = tf.layers.dense(hidden, 1, tf.nn.softplus) + 1e-6 data_dist = tfd.Normal(mean, std) return data_dist, mean_dist
def construct_model(self): with self.graph.as_default(): self.sess.close() self.sess = tf.compat.v1.InteractiveSession() self.sess.as_default() self.x = tf.convert_to_tensor(self.rescaled_features, dtype = tf.float32) self.y = tf.convert_to_tensor(self.targets, dtype = tf.float32) # construct precisness self.tau_rescaling = np.zeros((self.num_obs, self.bnn_output_size)) kernel_ranges = self.config.kernel_ranges for obs_index in range(self.num_obs): self.tau_rescaling[obs_index] += kernel_ranges self.tau_rescaling = self.tau_rescaling**2 # construct weight and bias shapes activations = [tf.nn.tanh] weight_shapes, bias_shapes = [[self.feature_size, self._hidden_shape]], [[self._hidden_shape]] for _ in range(1, self._num_layers - 1): activations.append(tf.nn.tanh) weight_shapes.append([self._hidden_shape, self._hidden_shape]) bias_shapes.append([self._hidden_shape]) activations.append(lambda x: x) weight_shapes.append([self._hidden_shape, self.bnn_output_size]) bias_shapes.append([self.bnn_output_size]) # construct prior self.prior_layer_outputs = [self.x] self.priors = {} for layer_index in range(self._num_layers): weight_shape, bias_shape = weight_shapes[layer_index], bias_shapes[layer_index] activation = activations[layer_index] weight = tfd.Normal(loc = tf.zeros(weight_shape) + self._weight_loc, scale = tf.zeros(weight_shape) + self._weight_scale) bias = tfd.Normal(loc = tf.zeros(bias_shape) + self._bias_loc, scale = tf.zeros(bias_shape) + self._bias_scale) self.priors['weight_%d' % layer_index] = weight self.priors['bias_%d' % layer_index] = bias prior_layer_output = activation(tf.matmul(self.prior_layer_outputs[-1], weight.sample()) + bias.sample()) self.prior_layer_outputs.append(prior_layer_output) self.prior_bnn_output = self.prior_layer_outputs[-1] self.prior_tau_normed = tfd.Gamma( self.num_obs**2 + tf.zeros((self.num_obs, self.bnn_output_size)), tf.ones((self.num_obs, self.bnn_output_size))) self.prior_tau = self.prior_tau_normed.sample() / self.tau_rescaling self.prior_scale = tfd.Deterministic(1. / tf.sqrt(self.prior_tau)) # construct posterior self.post_layer_outputs = [self.x] self.posteriors = {} for layer_index in range(self._num_layers): weight_shape, bias_shape = weight_shapes[layer_index], bias_shapes[layer_index] activation = activations[layer_index] weight = tfd.Normal(loc = tf.Variable(tf.random.normal(weight_shape)), scale = tf.nn.softplus(tf.Variable(tf.zeros(weight_shape)))) bias = tfd.Normal(loc = tf.Variable(tf.random.normal(bias_shape)), scale = tf.nn.softplus(tf.Variable(tf.zeros(bias_shape)))) self.posteriors['weight_%d' % layer_index] = weight self.posteriors['bias_%d' % layer_index] = bias post_layer_output = activation(tf.matmul(self.post_layer_outputs[-1], weight.sample()) + bias.sample()) self.post_layer_outputs.append(post_layer_output) self.post_bnn_output = self.post_layer_outputs[-1] self.post_tau_normed = tfd.Gamma( self.num_obs**2 + tf.Variable(tf.zeros((self.num_obs, self.bnn_output_size))), tf.nn.softplus(tf.Variable(tf.ones((self.num_obs, self.bnn_output_size))))) self.post_tau = self.post_tau_normed.sample() / self.tau_rescaling self.post_sqrt_tau = tf.sqrt(self.post_tau) self.post_scale = tfd.Deterministic(1. / self.post_sqrt_tau) # map bnn output to prediction post_kernels = {} targets_dict = {} inferences = [] target_element_index = 0 kernel_element_index = 0 while kernel_element_index < len(self.config.kernel_names): kernel_type = self.config.kernel_types[kernel_element_index] kernel_size = self.config.kernel_sizes[kernel_element_index] feature_begin, feature_end = target_element_index, target_element_index + 1 kernel_begin, kernel_end = kernel_element_index, kernel_element_index + kernel_size prior_relevant = self.prior_bnn_output[:, kernel_begin : kernel_end] post_relevant = self.post_bnn_output[:, kernel_begin : kernel_end] target = self.y[:, kernel_begin : kernel_end] lowers, uppers = self.config.kernel_lowers[kernel_begin : kernel_end], self.config.kernel_uppers[kernel_begin : kernel_end] prior_support = (uppers - lowers) * (1.2 * tf.nn.sigmoid(prior_relevant) - 0.1) + lowers post_support = (uppers - lowers) * (1.2 * tf.nn.sigmoid(post_relevant) - 0.1) + lowers prior_predict = tfd.Normal(prior_support, self.prior_scale[:, kernel_begin : kernel_end].sample()) post_predict = tfd.Normal(post_support, self.post_scale[:, kernel_begin : kernel_end].sample()) targets_dict[prior_predict] = target post_kernels['param_%d' % target_element_index] = { 'loc': tfd.Deterministic(post_support), 'sqrt_prec': tfd.Deterministic(self.post_sqrt_tau[:, kernel_begin : kernel_end]), 'scale': tfd.Deterministic(self.post_scale[:, kernel_begin : kernel_end].sample())} inference = {'pred': post_predict, 'target': target} inferences.append(inference) target_element_index += 1 kernel_element_index += kernel_size self.post_kernels = post_kernels self.targets_dict = targets_dict loss = 0. for inference in inferences: loss += - tf.reduce_sum( inference['pred'].log_prob(inference['target']) ) self.optimizer = tf.compat.v1.train.AdamOptimizer(self._learning_rate) self.train_op = self.optimizer.minimize(loss) tf.compat.v1.global_variables_initializer().run()
def surrogate_latent_space_model_fn(self, qz_loc_var, qz_softplus_scale_var): qz = yield JDCRoot(Independent(tfd.Deterministic(loc=qz_loc_var)))
def _base_dist(self, value: TensorLike, *args, **kwargs): return tfd.Deterministic(loc=value, *args, **kwargs)
def __call__(self, logits, **kwargs): lkl = tfd.Deterministic(loc=logits, **kwargs) return tfd.Independent(lkl, reinterpreted_batch_ndims=1)
def construct_model(self, learning_rate=None): if learning_rate is None: learning_rate = self.learning_rate with self.graph.as_default(): self.sess.close() self.sess = tf.compat.v1.InteractiveSession() self.sess.as_default() self.x = tf.convert_to_tensor(self.rescaled_features, dtype=tf.float32) self.y = tf.convert_to_tensor(self.targets, dtype=tf.float32) # construct precisness self.tau_rescaling = np.zeros((self.num_obs, self.bnn_output_size)) kernel_ranges = self.config.kernel_ranges for obs_index in range(self.num_obs): self.tau_rescaling[obs_index] += kernel_ranges self.tau_rescaling = self.tau_rescaling**2 # construct weight and bias shapes activations = [tf.nn.tanh] weight_shapes, bias_shapes = [[self.feature_size, self.hidden_shape]], [[self.hidden_shape]] for _ in range(1, self.num_layers - 1): activations.append(tf.nn.tanh) weight_shapes.append([self.hidden_shape, self.hidden_shape]) bias_shapes.append([self.hidden_shape]) activations.append(lambda x: x) weight_shapes.append([self.hidden_shape, self.bnn_output_size]) bias_shapes.append([self.bnn_output_size]) # --------------- # construct prior # --------------- self.prior_layer_outputs = [self.x] self.priors = {} for layer_index in range(self.num_layers): weight_shape, bias_shape = weight_shapes[layer_index], bias_shapes[layer_index] activation = activations[layer_index] weight = tfd.Normal(loc=tf.zeros(weight_shape) + self.weight_loc, scale=tf.zeros(weight_shape) + self.weight_scale) bias = tfd.Normal(loc=tf.zeros(bias_shape) + self.bias_loc, scale=tf.zeros(bias_shape) + self.bias_scale) self.priors['weight_%d' % layer_index] = weight self.priors['bias_%d' % layer_index] = bias prior_layer_output = activation(tf.matmul(self.prior_layer_outputs[-1], weight.sample()) + bias.sample()) self.prior_layer_outputs.append(prior_layer_output) self.prior_bnn_output = self.prior_layer_outputs[-1] # draw precisions from gamma distribution self.prior_tau_normed = tfd.Gamma( 12*(self.num_obs/self.frac_feas)**2 + tf.zeros((self.num_obs, self.bnn_output_size)), tf.ones((self.num_obs, self.bnn_output_size)), ) self.prior_tau = self.prior_tau_normed.sample() / self.tau_rescaling self.prior_scale = tfd.Deterministic(1. / tf.sqrt(self.prior_tau)) # ------------------- # construct posterior # ------------------- self.post_layer_outputs = [self.x] self.posteriors = {} for layer_index in range(self.num_layers): weight_shape, bias_shape = weight_shapes[layer_index], bias_shapes[layer_index] activation = activations[layer_index] weight = tfd.Normal(loc=tf.Variable(tf.random.normal(weight_shape)), scale=tf.nn.softplus(tf.Variable(tf.zeros(weight_shape)))) bias = tfd.Normal(loc=tf.Variable(tf.random.normal(bias_shape)), scale=tf.nn.softplus(tf.Variable(tf.zeros(bias_shape)))) self.posteriors['weight_%d' % layer_index] = weight self.posteriors['bias_%d' % layer_index] = bias post_layer_output = activation(tf.matmul(self.post_layer_outputs[-1], weight.sample()) + bias.sample()) self.post_layer_outputs.append(post_layer_output) self.post_bnn_output = self.post_layer_outputs[-1] self.post_tau_normed = tfd.Gamma( 12*(self.num_obs/self.frac_feas)**2 + tf.Variable(tf.zeros((self.num_obs, self.bnn_output_size))), tf.nn.softplus(tf.Variable(tf.ones((self.num_obs, self.bnn_output_size)))), ) self.post_tau = self.post_tau_normed.sample() / self.tau_rescaling self.post_sqrt_tau = tf.sqrt(self.post_tau) self.post_scale = tfd.Deterministic(1. / self.post_sqrt_tau) # map bnn output to prediction post_kernels = {} targets_dict = {} inferences = [] target_element_index = 0 kernel_element_index = 0 while kernel_element_index < len(self.config.kernel_names): kernel_type = self.config.kernel_types[kernel_element_index] kernel_size = self.config.kernel_sizes[kernel_element_index] feature_begin, feature_end = target_element_index, target_element_index + 1 kernel_begin, kernel_end = kernel_element_index, kernel_element_index + kernel_size prior_relevant = self.prior_bnn_output[:, kernel_begin: kernel_end] post_relevant = self.post_bnn_output[:, kernel_begin: kernel_end] if kernel_type == 'continuous': target = self.y[:, kernel_begin: kernel_end] lowers, uppers = self.config.kernel_lowers[kernel_begin: kernel_end], self.config.kernel_uppers[kernel_begin : kernel_end] prior_support = (uppers - lowers) * (1.2 * tf.nn.sigmoid(prior_relevant) - 0.1) + lowers post_support = (uppers - lowers) * (1.2 * tf.nn.sigmoid(post_relevant) - 0.1) + lowers prior_predict = tfd.Normal(prior_support, self.prior_scale[:, kernel_begin: kernel_end].sample()) post_predict = tfd.Normal(post_support, self.post_scale[:, kernel_begin: kernel_end].sample()) targets_dict[prior_predict] = target post_kernels['param_%d' % target_element_index] = { 'loc': tfd.Deterministic(post_support), 'sqrt_prec': tfd.Deterministic(self.post_sqrt_tau[:, kernel_begin: kernel_end]), 'scale': tfd.Deterministic(self.post_scale[:, kernel_begin: kernel_end].sample())} inference = {'pred': post_predict, 'target': target} inferences.append(inference) elif kernel_type in ['categorical', 'discrete']: target = tf.cast(self.y[:, kernel_begin: kernel_end], tf.int32) prior_temperature = 0.5 + 10.0 / (self.num_obs / self.frac_feas) #prior_temperature = 1.0 post_temperature = prior_temperature prior_support = prior_relevant post_support = post_relevant prior_predict_relaxed = tfd.RelaxedOneHotCategorical(prior_temperature, prior_support) prior_predict = tfd.OneHotCategorical(probs=prior_predict_relaxed.sample()) post_predict_relaxed = tfd.RelaxedOneHotCategorical(post_temperature, post_support) post_predict = tfd.OneHotCategorical(probs=post_predict_relaxed.sample()) targets_dict[prior_predict] = target post_kernels['param_%d' % target_element_index] = {'probs': post_predict_relaxed} inference = {'pred': post_predict, 'target': target} inferences.append(inference) ''' Temperature annealing schedule: - temperature of 100 yields 1e-2 deviation from uniform - temperature of 10 yields 1e-1 deviation from uniform - temperature of 1 yields *almost* perfect agreement with expectation - temperature of 0.1 yields perfect agreement with expectation ''' else: GryffinUnknownSettingsError(f'did not understand kernel type: {kernel_type}') target_element_index += 1 kernel_element_index += kernel_size self.post_kernels = post_kernels self.targets_dict = targets_dict self.loss = 0. for inference in inferences: self.loss += - tf.reduce_sum(inference['pred'].log_prob(inference['target'])) self.optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate) self.train_op = self.optimizer.minimize(self.loss) tf.compat.v1.global_variables_initializer().run()
def surrogate_likelihood_model(qx): if not use_point_estimates: qrnaseq_reads = yield JDCRoot( Independent(tfd.Deterministic(tf.zeros([num_samples, 0]))))