def testAsRandomVariable(self): # A wrapped Normal distribution should behave identically to # the builtin Normal RV. def model_builtin(): return ed.Normal(1., 0.1, name="x") def model_wrapped(): return ed.as_random_variable(tfd.Normal(1., 0.1, name="x")) # Check that both models are interceptable and yield # identical log probs. log_joint_builtin = ed.make_log_joint_fn(model_builtin) log_joint_wrapped = ed.make_log_joint_fn(model_wrapped) self.assertEqual(self.evaluate(log_joint_builtin(x=7.)), self.evaluate(log_joint_wrapped(x=7.))) # Check that our attempt to back out the variable name from the # Distribution name is robust to name scoping. with tf.name_scope("nested_scope"): dist = tfd.Normal(1., 0.1, name="x") def model_scoped(): return ed.as_random_variable(dist) log_joint_scoped = ed.make_log_joint_fn(model_scoped) self.assertEqual(self.evaluate(log_joint_builtin(x=7.)), self.evaluate(log_joint_scoped(x=7.)))
def run_parametrised_hmc(model_config, interceptor, num_samples=2000, burnin=1000, num_leapfrog_steps=4, num_adaptation_steps=500, num_optimization_steps=2000): """Given a (centred) model, this function transforms it based on the provided interceptor, and runs HMC on the reparameterised model. """ def model_ncp(*params): with ed.interception(interceptor): return model_config.model(*params) log_joint_noncentered = ed.make_log_joint_fn(model_ncp) with ed.tape() as model_tape: _ = model_ncp(*model_config.model_args) param_shapes = collections.OrderedDict() target_ncp_kwargs = {} for param in model_tape.keys(): if param not in model_config.observed_data.keys(): param_shapes[param] = model_tape[param].shape else: target_ncp_kwargs[param] = model_config.observed_data[param] def target_ncp(*param_args): i = 0 for param in model_tape.keys(): if param not in model_config.observed_data.keys(): target_ncp_kwargs[param] = param_args[i] i = i + 1 return log_joint_noncentered(*model_config.model_args, **target_ncp_kwargs) stepsize_kwargs = {'num_leapfrog_steps': num_leapfrog_steps} stepsize_kwargs = {'num_optimization_steps': num_optimization_steps} for key in model_config.observed_data: stepsize_kwargs[key] = model_config.observed_data[key] (step_size_init_ncp, stepsize_elbo_ncp, vi_time) = util.approximate_mcmc_step_size(model_ncp, *model_config.model_args, **stepsize_kwargs) results = _run_hmc( target_ncp, param_shapes, step_size_init=step_size_init_ncp, transform=model_config.to_centered, num_samples=num_samples, burnin=burnin, num_adaptation_steps=num_adaptation_steps, num_leapfrog_steps=num_leapfrog_steps) results['elbo'] = stepsize_elbo_ncp results['vi_time'] = vi_time return results
def testMakeLogJointFnUnconditional(self): """Test `make_log_joint_fn` on unconditional Edward program.""" def normal_with_unknown_mean(): loc = ed.Normal(loc=0., scale=1., name="loc") x = ed.Normal(loc=loc, scale=0.5, sample_shape=5, name="x") return x def true_log_joint(loc, x): log_prob = tf.reduce_sum(tfd.Normal(loc=0., scale=1.).log_prob(loc)) log_prob += tf.reduce_sum(tfd.Normal(loc=loc, scale=0.5).log_prob(x)) return log_prob loc_value = 0.3 x_value = tf.random_normal([5]) log_joint = ed.make_log_joint_fn(normal_with_unknown_mean) actual_log_prob = true_log_joint(loc_value, x_value) expected_log_prob = log_joint( loc=loc_value, x=x_value, f="https://github.com/tensorflow/probability/issues/160") with self.assertRaises(LookupError): _ = log_joint(loc=loc_value) actual_log_prob_, expected_log_prob_ = self.evaluate( [actual_log_prob, expected_log_prob]) self.assertEqual(actual_log_prob_, expected_log_prob_)
def testTrivialInterceptorPreservesLogJoint(self): def trivial_interceptor(fn, *args, **kwargs): # An interceptor that does nothing. return ed.interceptable(fn)(*args, **kwargs) def model(): return ed.Normal(0., 1., name="x") def transformed_model(): with ed.interception(trivial_interceptor): model() log_joint = ed.make_log_joint_fn(model) log_joint_transformed = ed.make_log_joint_fn(transformed_model) self.assertEqual(self.evaluate(log_joint(x=5.)), self.evaluate(log_joint_transformed(x=5.)))
def testMakeLogJointFnUnconditional(self): """Test `make_log_joint_fn` on unconditional Edward program.""" def model(): loc = ed.Normal(loc=0., scale=1., name="loc") x = ed.Normal(loc=loc, scale=0.5, sample_shape=5, name="x") return x def true_log_joint(loc, x): log_prob = tf.reduce_sum(tfd.Normal(loc=0., scale=1.).log_prob(loc)) log_prob += tf.reduce_sum(tfd.Normal(loc=loc, scale=0.5).log_prob(x)) return log_prob loc_value = 0.3 x_value = tf.random_normal([5]) log_joint = ed.make_log_joint_fn(model) actual_log_prob = true_log_joint(loc_value, x_value) expected_log_prob = log_joint(loc=loc_value, x=x_value) with self.assertRaises(LookupError): _ = log_joint(loc=loc_value) actual_log_prob_, expected_log_prob_ = self.evaluate( [actual_log_prob, expected_log_prob]) self.assertEqual(actual_log_prob_, expected_log_prob_)
def testMakeValueSetterWorksWithPartialAssignment(self): def normal_with_unknown_mean(): loc = ed.Normal(loc=0., scale=1., name="loc") x = ed.Normal(loc=loc, scale=0.5, name="x") return x # Setting only the latents produces the posterior predictive distribution. loc_value = 3. with ed.interception(ed.make_value_setter(loc=loc_value)): x_predictive = normal_with_unknown_mean() self.assertAllEqual(self.evaluate(x_predictive.distribution.mean()), loc_value) # Setting observed values allows calling the log joint as a fn of latents. x_value = 4. def model_with_observed_x(): with ed.interception(ed.make_value_setter(x=x_value)): normal_with_unknown_mean() observed_log_joint_fn = ed.make_log_joint_fn(model_with_observed_x) expected_joint_log_prob = ( tfd.Normal(0., 1.).log_prob(loc_value) + tfd.Normal(loc_value, 0.5).log_prob(x_value)) self.assertEqual(self.evaluate(expected_joint_log_prob), self.evaluate(observed_log_joint_fn(loc=loc_value)))
def testMakeLogJointFnTemplate(self): """Test `make_log_joint_fn` on program returned by tf.make_template.""" def variational(): loc = tf.compat.v1.get_variable("loc", []) qz = ed.Normal(loc=loc, scale=0.5, name="qz") return qz def true_log_joint(loc, qz): log_prob = tf.reduce_sum( input_tensor=tfd.Normal(loc=loc, scale=0.5).log_prob(qz)) return log_prob qz_value = 1.23 variational_template = tf.compat.v1.make_template( "variational", variational) log_joint = ed.make_log_joint_fn(variational_template) expected_log_prob = log_joint(qz=qz_value) loc = tf.compat.v1.trainable_variables("variational")[0] actual_log_prob = true_log_joint(loc, qz_value) with self.cached_session() as sess: sess.run(tf.compat.v1.initialize_all_variables()) actual_log_prob_, expected_log_prob_ = sess.run( [actual_log_prob, expected_log_prob]) self.assertEqual(actual_log_prob_, expected_log_prob_)
def make_cvip_graph(model_config, parameterisation_type='exp', tied_pparams=False): """ Constructs the cVIP graph of the given model. Resets the default TF graph. """ tf.reset_default_graph() results = collections.OrderedDict() (learnable_parameters, learnable_parametrisation, _) = ed_transforms.make_learnable_parametrisation( tau=1., parameterisation_type=parameterisation_type, tied_pparams=tied_pparams) def model_vip(*params): with ed.interception(learnable_parametrisation): return model_config.model(*params) if model_config.bijectors_fn is not None: model_vip = ed_transforms.transform_with_bijectors( model_vip, model_config.bijectors_fn) log_joint_vip = ed.make_log_joint_fn(model_vip) # log_joint_fn with ed.tape() as model_tape: _ = model_vip(*model_config.model_args) target_vip_kwargs = {} for param in model_tape.keys(): if param in model_config.observed_data.keys(): target_vip_kwargs[param] = model_config.observed_data[param] def target_vip(*param_args): # latent_log_joint_fn i = 0 for param in model_tape.keys(): if param not in model_config.observed_data.keys(): target_vip_kwargs[param] = param_args[i] i = i + 1 return log_joint_vip(*model_config.model_args, **target_vip_kwargs) #full_kwargs = collections.OrderedDict(model_config.observed_data.items()) #full_kwargs['parameterisation'] = collections.OrderedDict() #for k in learnable_parameters.keys(): # full_kwargs['parameterisation'][k] = learnable_parameters[k] elbo, variational_parameters = util.get_mean_field_elbo( model_vip, target_vip, num_mc_samples=FLAGS.num_mc_samples, model_args=model_config.model_args, model_obs_kwargs=model_config.observed_data, vi_kwargs={'parameterisation': learnable_parameters}) #vi_kwargs=full_kwargs return target_vip, model_vip, elbo, variational_parameters, learnable_parameters
def testMakeLogJointFnDynamic(self): """Test `make_log_joint_fn` on Edward program with stochastic control flow. This verifies that Edward's program transformation is done by tracing the execution at runtime (and not purely by static analysis). In particular, the execution is controlled by random variable outcomes, which in turn is controlled by the log-joint's inputs. """ if not tfe.in_eager_mode(): # Don't run test in graph mode. return def model(): loc = ed.Normal(loc=0., scale=1., name="loc") flip = ed.Bernoulli(probs=0.5, name="flip") if tf.equal(flip, 1): x = ed.Normal(loc=loc, scale=0.5, sample_shape=5, name="x") else: x = ed.Poisson(rate=tf.nn.softplus(loc), sample_shape=3, name="x") return x def true_log_joint(loc, flip, x): log_prob = tf.reduce_sum(tfd.Normal(loc=0., scale=1.).log_prob(loc)) log_prob += tf.reduce_sum(tfd.Bernoulli(probs=0.5).log_prob(flip)) if tf.equal(flip, 1): log_prob += tf.reduce_sum(tfd.Normal(loc=loc, scale=0.5).log_prob(x)) else: log_prob += tf.reduce_sum( tfd.Poisson(rate=tf.nn.softplus(loc)).log_prob(x)) return log_prob loc_value = 0.3 flip_value = tf.constant(1) x_value = tf.random_normal([5]) log_joint = ed.make_log_joint_fn(model) actual_log_prob = true_log_joint(loc_value, flip_value, x_value) expected_log_prob = log_joint(loc=loc_value, flip=flip_value, x=x_value) actual_log_prob_, expected_log_prob_ = self.evaluate( [actual_log_prob, expected_log_prob]) self.assertEqual(actual_log_prob_, expected_log_prob_) loc_value = 1.2 flip_value = tf.constant(0) x_value = tf.random_normal([3]) actual_log_prob = true_log_joint(loc_value, flip_value, x_value) expected_log_prob = log_joint(loc=loc_value, flip=flip_value, x=x_value) actual_log_prob_, expected_log_prob_ = self.evaluate( [actual_log_prob, expected_log_prob]) self.assertEqual(actual_log_prob_, expected_log_prob_)
def run_centered_hmc(model_config, num_samples=2000, burnin=1000, num_leapfrog_steps=4, num_adaptation_steps=500, num_optimization_steps=2000): """Runs HMC on the provided (centred) model.""" tf.compat.v1.reset_default_graph() log_joint_centered = ed.make_log_joint_fn(model_config.model) with ed.tape() as model_tape: _ = model_config.model(*model_config.model_args) param_shapes = collections.OrderedDict() target_cp_kwargs = {} for param in model_tape.keys(): if param not in model_config.observed_data.keys(): param_shapes[param] = model_tape[param].shape else: target_cp_kwargs[param] = model_config.observed_data[param] def target_cp(*param_args): i = 0 for param in model_tape.keys(): if param not in model_config.observed_data.keys(): target_cp_kwargs[param] = param_args[i] i = i + 1 return log_joint_centered(*model_config.model_args, **target_cp_kwargs) stepsize_kwargs = {'num_leapfrog_steps': num_leapfrog_steps} stepsize_kwargs = {'num_optimization_steps': num_optimization_steps} for key in model_config.observed_data: stepsize_kwargs[key] = model_config.observed_data[key] (step_size_init_cp, stepsize_elbo_cp, vi_time) = util.approximate_mcmc_step_size(model_config.model, *model_config.model_args, **stepsize_kwargs) results = _run_hmc( target_cp, param_shapes, step_size_init=step_size_init_cp, num_samples=num_samples, burnin=burnin, num_adaptation_steps=num_adaptation_steps, num_leapfrog_steps=num_leapfrog_steps) results['elbo'] = stepsize_elbo_cp results['vi_time'] = vi_time return results
def binary_bayesian_network(x_train, y_train, x_test): n_samples = 4000 n_burn = 2000 # set initial state mu, sigma = 0, 1. q_w1 = tf.random.normal([], mean=mu*np.ones([2, 16]), stddev=sigma*np.ones([2, 16]), dtype=tf.float32) q_b1 = tf.random.normal([], mean=mu*np.ones([1, 16]), stddev=sigma*np.ones([1, 16]), dtype=tf.float32) q_w2 = tf.random.normal([], mean=mu*np.ones([16, 1]), stddev=sigma*np.ones([16, 1]), dtype=tf.float32) q_b2 = tf.random.normal([], mean=mu*np.ones(1), stddev=sigma*np.ones(1), dtype=tf.float32) # convert train data to tensors x_train_tensor = tf.convert_to_tensor(x_train, dtype=tf.float32) y_train_tensor = tf.convert_to_tensor(y_train, dtype=tf.float32) log_joint = ed.make_log_joint_fn(binary_bayesian_network_log_likelihood) def target_log_prob_fn(w1, b1, w2, b2): return log_joint(x_train_tensor, mu, sigma, w1=w1, b1=b1, w2=w2, b2=b2, y=y_train_tensor) # set up Hamiltonian MC hmc_kernel = tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_log_prob_fn, step_size=0.01, num_leapfrog_steps=5) # set sampler states, kernel_results = tfp.mcmc.sample_chain( num_results=n_samples, current_state=[q_w1, q_b1, q_w2, q_b2], kernel=hmc_kernel, num_burnin_steps=n_burn) # run the session to extract the samples with tf.Session() as sess: states, is_accepted_state = sess.run([states, kernel_results.is_accepted]) w1 = np.mean(states[0][np.where(is_accepted_state)], 0) b1 = np.mean(states[1][np.where(is_accepted_state)], 0) w2 = np.mean(states[2][np.where(is_accepted_state)], 0) b2 = np.mean(states[3][np.where(is_accepted_state)], 0) x_test_tensor = tf.convert_to_tensor(x_test, dtype=tf.float32) y1 = tf.math.tanh(tf.add(tf.matmul(x_test_tensor, w1), b1)) y2 = tf.math.sigmoid(tf.add(tf.matmul(y1, w2), b2)) with tf.Session() as sess: y_predicted = y2.eval(session=sess) y_predicted_classes = np.array([0 if y < .5 else 1 for y in y_predicted]) return y_predicted_classes
def make_dvip_graph(model_config, reparam, parameterisation_type='exp'): """ Constructs the dVIP graph of the given model, where `reparam` is a cVIP reparameterisation obtained previously. Resets the default TF graph. """ tf.reset_default_graph() results = collections.OrderedDict() _, insightful_parametrisation, _ = ed_transforms.make_learnable_parametrisation( learnable_parameters=reparam, parameterisation_type=parameterisation_type) def model_vip(*params): with ed.interception(insightful_parametrisation): return model_config.model(*params) if model_config.bijectors_fn is not None: model_vip = ed_transforms.transform_with_bijectors( model_vip, model_config.bijectors_fn) log_joint_vip = ed.make_log_joint_fn(model_vip) # log_joint_fn with ed.tape() as model_tape: _ = model_vip(*model_config.model_args) target_vip_kwargs = {} for param in model_tape.keys(): if param in model_config.observed_data.keys(): target_vip_kwargs[param] = model_config.observed_data[param] def target_vip(*param_args): # latent_log_joint_fn i = 0 for param in model_tape.keys(): if param not in model_config.observed_data.keys(): target_vip_kwargs[param] = param_args[i] i = i + 1 return log_joint_vip(*model_config.model_args, **target_vip_kwargs) elbo, variational_parameters = util.get_mean_field_elbo( model_vip, target_vip, num_mc_samples=FLAGS.num_mc_samples, model_args=model_config.model_args, model_obs_kwargs=model_config.observed_data, vi_kwargs={'parameterisation': reparam}) return target_vip, model_vip, elbo, variational_parameters, None
def testMakeLogJointFnError(self): """Test `make_log_joint_fn` raises errors when `name`(s) not supplied.""" def model(): loc = ed.Normal(loc=0., scale=1., name="loc") x = ed.Normal(loc=loc, scale=0.5, sample_shape=5) return x loc_value = 0.3 x_value = tf.random_normal([5]) log_joint = ed.make_log_joint_fn(model) with self.assertRaises(KeyError): _ = log_joint(loc=loc_value, x=x_value)
def make_ncp_graph(model_config): """ Constructs the CP graph of the given model. Resets the default TF graph. """ tf.reset_default_graph() interceptor = ed_transforms.ncp def model_ncp(*params): with ed.interception(interceptor): return model_config.model(*params) if model_config.bijectors_fn is not None: model_ncp = ed_transforms.transform_with_bijectors( model_ncp, model_config.bijectors_fn) log_joint_noncentered = ed.make_log_joint_fn(model_ncp) with ed.tape() as model_tape: _ = model_ncp(*model_config.model_args) target_ncp_kwargs = {} for param in model_tape.keys(): if param in model_config.observed_data.keys(): target_ncp_kwargs[param] = model_config.observed_data[param] def target_ncp(*param_args): i = 0 for param in model_tape.keys(): if param not in model_config.observed_data.keys(): target_ncp_kwargs[param] = param_args[i] i = i + 1 return log_joint_noncentered(*model_config.model_args, **target_ncp_kwargs) elbo, variational_parameters = util.get_mean_field_elbo( model_config.model, target_ncp, num_mc_samples=FLAGS.num_mc_samples, model_args=model_config.model_args, model_obs_kwargs=model_config.observed_data, vi_kwargs=None) return target_ncp, model_ncp, elbo, variational_parameters, None
def testMakeLogJointFnConditional(self): """Test `make_log_joint_fn` on conditional Edward program.""" def model(features, prior_precision): w = ed.Normal(loc=0., scale=tf.rsqrt(prior_precision), sample_shape=features.shape[1], name="w") y = ed.Normal(loc=tf.tensordot(features, w, [[1], [0]]), scale=1., name="y") return y features = tf.random_normal([3, 2]) prior_precision = 0.5 w_value = tf.random_normal([2]) y_value = tf.random_normal([3]) def true_log_joint(features, prior_precision, w, y): log_prob = tf.reduce_sum( tfd.Normal(loc=0., scale=tf.rsqrt(prior_precision)).log_prob(w)) log_prob += tf.reduce_sum( tfd.Normal(loc=tf.tensordot(features, w, [[1], [0]]), scale=1.).log_prob(y)) return log_prob log_joint = ed.make_log_joint_fn(model) actual_log_prob = true_log_joint(features, prior_precision, w_value, y_value) expected_log_prob = log_joint(features, prior_precision, y=y_value, w=w_value) with self.assertRaises(LookupError): _ = log_joint(features, prior_precision, w=w_value) actual_log_prob_, expected_log_prob_ = self.evaluate( [actual_log_prob, expected_log_prob]) self.assertEqual(actual_log_prob_, expected_log_prob_)
def make_cp_graph(model_config): """ Constructs the CP graph of the given model. Resets the default TF graph. """ tf.reset_default_graph() log_joint_centered = ed.make_log_joint_fn(model_config.model) with ed.tape() as model_tape: _ = model_config.model(*model_config.model_args) param_shapes = collections.OrderedDict() target_cp_kwargs = {} for param in model_tape.keys(): if param not in model_config.observed_data.keys(): param_shapes[param] = model_tape[param].shape else: target_cp_kwargs[param] = model_config.observed_data[param] def target_cp(*param_args): i = 0 for param in model_tape.keys(): if param not in model_config.observed_data.keys(): target_cp_kwargs[param] = param_args[i] i = i + 1 return log_joint_centered(*model_config.model_args, **target_cp_kwargs) elbo, variational_parameters = util.get_mean_field_elbo( model_config.model, target_cp, num_mc_samples=FLAGS.num_mc_samples, model_args=model_config.model_args, model_obs_kwargs=model_config.observed_data, vi_kwargs=None) return target_cp, model_config.model, elbo, variational_parameters, None
def testMakeLogJointFnConditional(self): """Test `make_log_joint_fn` on conditional Edward program.""" def model(features, prior_precision): w = ed.Normal(loc=0., scale=tf.rsqrt(prior_precision), sample_shape=features.shape[1], name="w") y = ed.Normal(loc=tf.tensordot(features, w, [[1], [0]]), scale=1., name="y") return y features = tf.random_normal([3, 2]) prior_precision = 0.5 w_value = tf.random_normal([2]) y_value = tf.random_normal([3]) def true_log_joint(features, prior_precision, w, y): log_prob = tf.reduce_sum(tfd.Normal( loc=0., scale=tf.rsqrt(prior_precision)).log_prob(w)) log_prob += tf.reduce_sum(tfd.Normal( loc=tf.tensordot(features, w, [[1], [0]]), scale=1.).log_prob(y)) return log_prob log_joint = ed.make_log_joint_fn(model) actual_log_prob = true_log_joint( features, prior_precision, w_value, y_value) expected_log_prob = log_joint( features, prior_precision, y=y_value, w=w_value) with self.assertRaises(LookupError): _ = log_joint(features, prior_precision, w=w_value) actual_log_prob_, expected_log_prob_ = self.evaluate( [actual_log_prob, expected_log_prob]) self.assertEqual(actual_log_prob_, expected_log_prob_)
def testMakeLogJointFnTemplate(self): """Test `make_log_joint_fn` on program returned by tf.make_template.""" def variational(): loc = tf.get_variable("loc", []) qz = ed.Normal(loc=loc, scale=0.5, name="qz") return qz def true_log_joint(loc, qz): log_prob = tf.reduce_sum(tfd.Normal(loc=loc, scale=0.5).log_prob(qz)) return log_prob qz_value = 1.23 variational_template = tf.make_template("variational", variational) log_joint = ed.make_log_joint_fn(variational_template) expected_log_prob = log_joint(qz=qz_value) loc = tf.trainable_variables("variational")[0] actual_log_prob = true_log_joint(loc, qz_value) with self.test_session() as sess: sess.run(tf.initialize_all_variables()) actual_log_prob_, expected_log_prob_ = sess.run( [actual_log_prob, expected_log_prob]) self.assertEqual(actual_log_prob_, expected_log_prob_)
def inference_vi1(num_epochs=2000, learning_rate=0.05): def variational_model(): qpi = tf.nn.softplus(tf.Variable( np.zeros(K) * 1.0 / K, dtype=dtype), name='qpi') qmu = tf.Variable(np.zeros([K, D]), dtype=dtype, name='qmu') qsigma = tf.nn.softplus(tf.Variable( np.ones([K, D]), dtype=dtype, name='qsigma')) return qpi, qmu, qsigma log_q = ed.make_log_joint_fn(variational_model) def joint_log_variational_model(qpi, qmu, qsigma): return log_q(qpi=qpi, qmu=qmu, qsigma=qsigma) qpi, qmu, qsigma = variational_model() energy = joint_log_prob(qpi, qmu, qsigma) entropy = - joint_log_variational_model(qpi, qmu, qsigma) elbo = energy + entropy optimizer = tf.train.AdamOptimizer(learning_rate) training_process = optimizer.minimize(- elbo) init_process = tf.global_variables_initializer() losses = [] with tf.Session() as sess: sess.run(init_process) for i in range(num_epochs): sess.run(training_process) if i % 10 == 0: losses.append(sess.run(elbo)) posterior_mu = sess.run([qmu]) print(posterior_mu) plt.plot(range(len(losses)), losses) plt.savefig('./plots/gmm_tfp_simples_loss_vi1.png')
def fit(self, input_fn): def linear_model(X): # (unmodeled) data w = ed.Normal(loc=tf.zeros([X.shape[1]]), scale=tf.ones([X.shape[1]]), name="w") # parameter b = ed.Normal(loc=tf.zeros([]), scale=tf.ones([]), name="b") # parameter y = ed.Normal(loc=tf.tensordot(X, w, axes=1) + b, scale=1.0, name="y") # (modeled) data return y def variational_model(qw_mean, qw_stddv, qb_mean, qb_stddv): qw = ed.Normal(loc=qw_mean, scale=qw_stddv, name="qw") qb = ed.Normal(loc=qb_mean, scale=qb_stddv, name="qb") return qw, qb x_tensor = tf.convert_to_tensor(x_train, tf.float32) y_tensor = tf.convert_to_tensor(y_train, tf.float32) # make dataset dataset = tf.data.Dataset.from_tensor_slices((x_tensor, y_tensor)) shuffle = dataset.shuffle(hparams.shuffle_iterations) batches = shuffle.repeat().batch(hparams.batch_size) iterator = batches.make_one_shot_iterator() features, labels = iterator.get_next() log_q = ed.make_log_joint_fn(variational_model) def target_q(qw, qb): return log_q(qw_mean=qw_mean, qw_stddv=qw_stddv, qb_mean=qb_mean, qb_stddv=qb_stddv, qw=qw, qb=qb) qw_mean = tf.Variable(tf.zeros([int(features.shape[1])]), dtype=tf.float32) qb_mean = tf.Variable(tf.zeros([]), dtype=tf.float32) qw_stddv = tf.nn.softplus( tf.Variable(tf.ones([int(features.shape[1])]), dtype=tf.float32)) qb_stddv = tf.nn.softplus(tf.Variable(tf.ones([]), dtype=tf.float32)) qw, qb = variational_model(qw_mean=qw_mean, qw_stddv=qw_stddv, qb_mean=qb_mean, qb_stddv=qb_stddv) log_joint = ed.make_log_joint_fn(linear_model) def target(qw, qb): """Unnormalized target density as a function of the parameters.""" return log_joint(w=qw, b=qb, X=features, y=labels) energy = target(qw, qb) entropy = -target_q(qw, qb) / hparams.batch_size elbo = energy + entropy optimizer = tf.train.AdamOptimizer( learning_rate=hparams.learning_rate) train = optimizer.minimize(-elbo) init = tf.global_variables_initializer() t = [] weights_dict = {} num_steps = int(x_train.shape[0] / hparams.batch_size) with tf.Session() as sess: sess.run(init) for step in range(num_steps): _ = sess.run([train]) if step % 100 == 0: t.append(sess.run([elbo])) weights_dict['w'], weights_dict['b'] = sess.run( [qw.distribution.sample(1000), qb.distribution.sample(1000)]) return weights_dict['w'], weights_dict['b']
def bnn(X, y, x): from tensorflow_probability import edward2 as ed X = X.numpy() y = y.numpy().reshape(-1, 1) x = x.numpy() def Net(features): W0 = ed.Normal(loc=tf.zeros([1, n_neurons]), scale=10 * tf.ones([1, n_neurons]), name='W0') b0 = ed.Normal(loc=tf.zeros(n_neurons), scale=10 * tf.ones(n_neurons), name='b0') W1 = ed.Normal(loc=tf.zeros([n_neurons, 1]), scale=10 * tf.ones([n_neurons, 1]), name='W1') b1 = ed.Normal(loc=tf.zeros(1), scale=10 * tf.ones(1), name='b1') h = tf.sigmoid(tf.matmul(features, W0) + b0) mean = tf.matmul(h, W1) + b1 noise_std = ed.HalfNormal(scale=tf.ones([1]), name="noise_std") return ed.Normal(loc=mean, scale=noise_std, name='predictions') log_joint = ed.make_log_joint_fn(Net) def target_log_prob_fn(W0, b0, W1, b1, noise_std): return log_joint(features=X, W0=W0, b0=b0, W1=W1, b1=b1, noise_std=noise_std, predictions=y) num_results = int(20e3) #number of hmc iterations n_burnin = int(5e3) #number of burn-in steps step_size = 0.01 num_leapfrog_steps = 10 kernel = tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_log_prob_fn, step_size=step_size, num_leapfrog_steps=num_leapfrog_steps) states, kernel_results = tfp.mcmc.sample_chain( num_results=num_results, num_burnin_steps=n_burnin, kernel=kernel, current_state=[ tf.zeros([1, n_neurons], name='init_W0'), tf.zeros([n_neurons], name='init_b0'), tf.zeros([n_neurons, 1], name='init_W1'), tf.zeros([1], name='init_b1'), tf.ones([1], name='init_noise_std'), ]) W0, b0, W1, b1, noise_std = states with tf.Session() as sess: [W0_, b0_, W1_, b1_, noise_std_, accepted_ ] = sess.run([W0, b0, W1, b1, noise_std, kernel_results.is_accepted]) W0_samples = W0_[n_burnin:] b0_samples = b0_[n_burnin:] W1_samples = W1_[n_burnin:] b1_samples = b1_[n_burnin:] noise_std_samples = noise_std_[n_burnin:] accepted_samples = accepted_[n_burnin:] print('Acceptance rate: %0.1f%%' % (100 * np.mean(accepted_samples))) from scipy.special import expit as sigmoid def NpNet(features, W0, b0, W1, b1, noise): h = sigmoid(np.matmul(features, W0) + b0) return np.matmul(h, W1) + b1 # + noise*np.random.randn() out = [ NpNet(x, W0_samples[i], b0_samples[i], W1_samples[i], b1_samples[i], noise_std_samples[i]) for i in range(len(W0_samples)) ] out = np.array(out) y_pred = out.mean(axis=0) sigma = out.std(axis=0) return y_pred, sigma
# tf.reset_default_graph() mcmc_graph = tf.Graph() with mcmc_graph.as_default(): # build RBF features N = X_train.shape[0] K_mat = gp.rbf(X_train, ls=ls_val) + 1e-6 * tf.eye(N) S, U, V = tf.svd(K_mat) num_feature = tf.reduce_sum(tf.cast(S > 1e-10, tf.int32)) features = tf.tensordot(U, tf.diag(S), [[1], [0]])[:, :num_feature] # features = tf.random_normal([10, 2]) # alpha = tf.random_normal(shape=[num_feature]) # outcomes_value = tf.tensordot(features, alpha, [[1], [0]]) outcomes_value = tf.constant(y_train) log_joint = ed.make_log_joint_fn(linear_regression) def target_log_prob_fn(coeffs_value): return log_joint(features, coeffs=coeffs_value, outcomes=outcomes_value) # set up state container initial_state = [ tf.random_normal([num_feature], stddev=0.01, name='init_gp_func'), ] # set up HMC transition kernel step_size = tf.get_variable( name='step_size', initializer=1.,
return ed.interceptable(f)(*args, **kwargs) return set_values DATA_SIZE = 100 FEATURE_SIZE = 41 UNITS = [23, 7, 2] SHAPE = 0.1 x, w2, w1, w0, z2, z1, z0 = deep_exponential_family(DATA_SIZE, FEATURE_SIZE, UNITS, SHAPE) num_samples, qw2, qw1, qw0, qz2, qz1, qz0 = deep_exponential_family_variational( DATA_SIZE, FEATURE_SIZE, UNITS) x_sample = tf.placeholder(tf.float32, shape=[DATA_SIZE, FEATURE_SIZE]) # 计算MCMC采样参数 log_joint = ed.make_log_joint_fn( deep_exponential_family) # 对model进行包装,生成其对应的log参数 def target_log_prob_fn(w2, w1, w0, z2, z1, z0): # 用此包装后,可以做到后验分布的概率 # 就是依赖于w2, w1, w0, z2, z1, z0与x的输入的对数似然函数,可以自定义 return log_joint(DATA_SIZE, FEATURE_SIZE, UNITS, SHAPE, w2=w2, w1=w1, w0=w0, z2=z2, z1=z1, z0=z0, x=x_sample)
avg_effect = ed.Normal(loc=0., scale=10., name="avg_effect") # `mu` above avg_stddev = ed.Normal(loc=5., scale=1., name="avg_stddev") # `log(tau)` above school_effects_standard = ed.Normal( loc=tf.zeros(num_schools), scale=tf.ones(num_schools), name="school_effects_standard") # `theta_prime` above school_effects = avg_effect + tf.exp( avg_stddev) * school_effects_standard # `theta` above treatment_effects = ed.Normal(loc=school_effects, scale=treatment_stddevs, name="treatment_effects") # `y` above return treatment_effects log_joint = ed.make_log_joint_fn(schools_model) def target_log_prob_fn(avg_effect, avg_stddev, school_effects_standard): """Unnormalized target density as a function of states.""" return log_joint(num_schools=num_schools, treatment_stddevs=treatment_stddevs, avg_effect=avg_effect, avg_stddev=avg_stddev, school_effects_standard=school_effects_standard, treatment_effects=treatment_effects) num_results = 5000 num_burnin_steps = 3000
plt.ylim([-4.5, 4.5]) plt.savefig("./result/gpr/data.png") plt.close() """"""""""""""""""""""""""""""""" # 2. MCMC """"""""""""""""""""""""""""""""" """2.1. sampler basic config""" num_results = 10000 num_burnin_steps = 5000 # define mcmc computation graph mcmc_graph = tf.Graph() with mcmc_graph.as_default(): # build likelihood by explicitly log_joint = ed.make_log_joint_fn(gp_regression.model) def target_log_prob_fn(gp_f, sigma): """Unnormalized target density as a function of states.""" return log_joint(X_train, y=y_train, gp_f=gp_f, log_ls=_DEFAULT_LOG_LS_SCALE, sigma=sigma) # set up state container initial_state = [ tf.random_normal([N], stddev=0.01, name='init_gp_func'), # tf.constant(0.1, name='init_ls'), tf.constant(0.1, name='init_sigma'), ]
def linear_gene_regression_inference(init_feed_dict, n, F, x_gene_init, x_isoform_init, make_likelihood, x_bias_mu0, x_bias_sigma0, x_scale_hinges, sample_scales, use_point_estimates, sess, niter=20000, elbo_add_term=0.0): num_samples = int(F.shape[0]) num_factors = int(F.shape[1]) num_features = int(x_gene_init.shape[1]) log_joint = ed.make_log_joint_fn(lambda: linear_gene_regression_model( num_factors, num_features, n, F, x_bias_mu0, x_bias_sigma0, x_scale_hinges, sample_scales)) qw_global_scale_variance_loc_var = tf.Variable( 0.0, name="qw_global_scale_variance_loc_var") qw_global_scale_variance_scale_var = tf.nn.softplus( tf.Variable(-1.0, name="qw_global_scale_variance_scale_var")) qw_global_scale_noncentered_loc_var = tf.Variable( 0.0, name="qw_global_scale_noncentered_loc_var") qw_global_scale_noncentered_scale_var = tf.nn.softplus( tf.Variable(-1.0, name="qw_global_scale_noncentered_scale_var")) qw_local_scale_variance_loc_var = tf.Variable( tf.fill([num_features], 0.0), name="qw_local_scale_variance_loc_var") qw_local_scale_variance_scale_var = tf.nn.softplus( tf.Variable(tf.fill([num_features], -1.0), name="qw_local_scale_variance_scale_var")) qw_local_scale_noncentered_loc_var = tf.Variable( tf.fill([num_features], 0.0), name="qw_local_scale_noncentered_loc_var") qw_local_scale_noncentered_scale_var = tf.nn.softplus( tf.Variable(tf.fill([num_features], -1.0), name="qw_local_scale_noncentered_scale_var")) qw_loc_var = tf.Variable(tf.zeros([num_features, num_factors]), name="qw_loc_var") qw_scale_var = tf.nn.softplus( tf.Variable(tf.fill([num_features, num_factors], -2.0), name="qw_scale_var")) qx_bias_loc_var = tf.Variable(tf.reduce_mean(x_gene_init, axis=0), name="qx_bias_loc_var") qx_bias_scale_var = tf.nn.softplus( tf.Variable(tf.fill([num_features], -1.0), name="qx_bias_scale_var")) qx_scale_concentration_c_loc_var = tf.Variable( tf.fill([kernel_regression_degree], 1.0), name="qx_scale_concentration_c_loc_var") qx_scale_rate_c_loc_var = tf.Variable(tf.fill([kernel_regression_degree], -15.0), name="qx_scale_rate_c_loc_var") qx_scale_loc_var = tf.Variable(tf.fill([num_features], -0.5), name="qx_scale_loc_var") qx_scale_scale_var = tf.nn.softplus( tf.Variable(tf.fill([num_features], -1.0), name="qx_scale_scale_var")) qw_distortion_c_loc_var = tf.Variable(tf.zeros( [num_factors, kernel_regression_degree]), name="qw_distortion_c_loc_var") qx_gene_loc_var = tf.Variable(x_gene_init, name="qx_gene_loc_var", trainable=not use_point_estimates) qx_gene_scale_var = tf.nn.softplus( tf.Variable(tf.fill([num_samples, num_features], -1.0), name="qx_gene_scale_var")) qx_isoform_loc_var = tf.Variable(x_isoform_init, name="qx_isoform_loc_var", trainable=not use_point_estimates) qx_isoform_scale_var = tf.nn.softplus( tf.Variable(tf.fill([num_samples, n], -1.0), name="qx_isoform_scale_var")) qw_global_scale_variance, qw_global_scale_noncentered, \ qw_local_scale_variance, qw_local_scale_noncentered, qw, qx_bias, \ qx_scale_concentration_c, qx_scale_rate_c, qx_scale, \ qw_distortion_c, qx_gene, qx_isoform = \ linear_gene_regression_variational_model( qw_global_scale_variance_loc_var, qw_global_scale_variance_scale_var, qw_global_scale_noncentered_loc_var, qw_global_scale_noncentered_scale_var, qw_local_scale_variance_loc_var, qw_local_scale_variance_scale_var, qw_local_scale_noncentered_loc_var, qw_local_scale_noncentered_scale_var, qw_loc_var, qw_scale_var, qx_bias_loc_var, qx_bias_scale_var, qx_scale_concentration_c_loc_var, qx_scale_rate_c_loc_var, qx_scale_loc_var, qx_scale_scale_var, qw_distortion_c_loc_var, qx_gene_loc_var, qx_gene_scale_var, qx_isoform_loc_var, qx_isoform_scale_var, use_point_estimates) log_prior = log_joint( w_global_scale_variance=qw_global_scale_variance, w_global_scale_noncentered=qw_global_scale_noncentered, w_local_scale_variance=qw_local_scale_variance, w_local_scale_noncentered=qw_local_scale_noncentered, w=qw, x_bias=qx_bias, x_scale_concentration_c=qx_scale_concentration_c, x_scale_rate_c=qx_scale_rate_c, x_scale=qx_scale, w_distortion_c=qw_distortion_c, x=qx_gene, x_isoform=qx_isoform) variational_log_joint = ed.make_log_joint_fn( lambda: linear_gene_regression_variational_model( qw_global_scale_variance_loc_var, qw_global_scale_variance_scale_var, qw_global_scale_noncentered_loc_var, qw_global_scale_noncentered_scale_var, qw_local_scale_variance_loc_var, qw_local_scale_variance_scale_var, qw_local_scale_noncentered_loc_var, qw_local_scale_noncentered_scale_var, qw_loc_var, qw_scale_var, qx_bias_loc_var, qx_bias_scale_var, qx_scale_concentration_c_loc_var, qx_scale_rate_c_loc_var, qx_scale_loc_var, qx_scale_scale_var, qw_distortion_c_loc_var, qx_gene_loc_var, qx_gene_scale_var, qx_isoform_loc_var, qx_isoform_scale_var, use_point_estimates)) entropy = variational_log_joint( qw_global_scale_variance=qw_global_scale_variance, qw_global_scale_noncentered=qw_global_scale_noncentered, qw_local_scale_variance=qw_local_scale_variance, qw_local_scale_noncentered=qw_local_scale_noncentered, qw=qw, qx_bias=qx_bias, qx_scale=qx_scale, qx_scale_concentration_c=qx_scale_concentration_c, qx_scale_rate_c=qx_scale_rate_c, qw_distortion_c=qw_distortion_c, qx=qx_gene, qx_isoform=qx_isoform) log_likelihood = make_likelihood(qx_gene, qx_isoform) scale_penalty = tf.reduce_sum( tfd.Normal(loc=0.0, scale=5e-4).log_prob( tf.log(tf.reduce_sum(tf.exp(qx_gene), axis=1)))) elbo = log_prior + log_likelihood - entropy + elbo_add_term + scale_penalty elbo = tf.check_numerics(elbo, "Non-finite ELBO value") if sess is None: sess = tf.Session() train(sess, -elbo, init_feed_dict, niter, 1e-3, decay_rate=1.0) # train(sess, -elbo, init_feed_dict, niter, 1e-2, decay_rate=0.995) return (sess.run(qx_gene.distribution.mean()), sess.run(qw.distribution.mean()), sess.run(qw.distribution.stddev()), sess.run(qx_bias.distribution.mean()), sess.run(qx_scale))
def run_vip_hmc_continuous(model_config, num_samples=2000, burnin=1000, use_iaf_posterior=False, num_leapfrog_steps=4, num_adaptation_steps=500, num_optimization_steps=2000, num_mc_samples=32, tau=1., do_sample=True, description='', experiments_dir=''): tf.reset_default_graph() if use_iaf_posterior: # IAF posterior doesn't give us stddevs for step sizes for HMC (we could # extract them by sampling but I haven't implemented that), and we mostly # care about it for ELBOs anyway. do_sample = False init_val_loc = tf.placeholder('float', shape=()) init_val_scale = tf.placeholder('float', shape=()) (learnable_parameters, learnable_parametrisation, _) = ed_transforms.make_learnable_parametrisation( init_val_loc=init_val_loc, init_val_scale=init_val_scale, tau=tau) def model_vip(*params): with ed.interception(learnable_parametrisation): return model_config.model(*params) log_joint_vip = ed.make_log_joint_fn(model_vip) with ed.tape() as model_tape: _ = model_vip(*model_config.model_args) param_shapes = collections.OrderedDict() target_vip_kwargs = {} for param in model_tape.keys(): if param not in model_config.observed_data.keys(): param_shapes[param] = model_tape[param].shape else: target_vip_kwargs[param] = model_config.observed_data[param] def target_vip(*param_args): i = 0 for param in model_tape.keys(): if param not in model_config.observed_data.keys(): target_vip_kwargs[param] = param_args[i] i = i + 1 return log_joint_vip(*model_config.model_args, **target_vip_kwargs) full_kwargs = collections.OrderedDict(model_config.observed_data.items()) full_kwargs['parameterisation'] = collections.OrderedDict() for k in learnable_parameters.keys(): full_kwargs['parameterisation'][k] = learnable_parameters[k] if use_iaf_posterior: elbo = util.get_iaf_elbo( target_vip, num_mc_samples=num_mc_samples, param_shapes=param_shapes) variational_parameters = {} else: elbo, variational_parameters = util.get_mean_field_elbo( model_vip, target_vip, num_mc_samples=num_mc_samples, model_args=model_config.model_args, vi_kwargs=full_kwargs) vip_step_size_approx = util.get_approximate_step_size( variational_parameters, num_leapfrog_steps) ############################################################################## best_elbo = None model_dir = os.path.join(experiments_dir, str(description + '_' + model_config.model.__name__)) if not tf.gfile.Exists(model_dir): tf.gfile.MakeDirs(model_dir) saver = tf.train.Saver() dir_save = os.path.join(model_dir, 'saved_params_{}'.format(gen_id())) if not tf.gfile.Exists(dir_save): tf.gfile.MakeDirs(dir_save) best_lr = None best_init_loc = None best_init_scale = None learning_rate_ph = tf.placeholder(shape=[], dtype=tf.float32) learning_rate = tf.Variable(learning_rate_ph, trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train = optimizer.minimize(-elbo) init = tf.global_variables_initializer() learning_rates = [0.003, 0.01, 0.01, 0.1, 0.003, 0.01] if use_iaf_posterior: learning_rates = [3e-5, 1e-4, 3e-4, 1e-4] start_time = time.time() for learning_rate_val in learning_rates: for init_loc in [0.]: #, 10., -10.]: for init_scale in [init_loc]: timeline = [] with tf.Session() as sess: init.run(feed_dict={init_val_loc: init_loc, init_val_scale: init_scale, learning_rate_ph: learning_rate_val}) this_timeline = [] for i in range(num_optimization_steps): _, e = sess.run([train, elbo]) if np.isnan(e): util.print('got NaN in ELBO optimization, stopping...') break this_timeline.append(e) this_elbo = np.mean(this_timeline[-100:]) info_str = ('finished cVIP optimization with elbo {} vs ' 'best ELBO {}'.format(this_elbo, best_elbo)) util.print(info_str) if best_elbo is None or best_elbo < this_elbo: best_elbo = this_elbo timeline = this_timeline vals = sess.run(list(learnable_parameters.values())) learned_reparam = collections.OrderedDict( zip(learnable_parameters.keys(), vals)) vals = sess.run(list(variational_parameters.values())) learned_variational_params = collections.OrderedDict( zip(variational_parameters.keys(), vals)) util.print('learned params {}'.format(learned_reparam)) util.print('learned variational params {}'.format( learned_variational_params)) _ = saver.save(sess, dir_save) best_lr = learning_rate best_init_loc = init_loc best_init_scale = init_scale vi_time = time.time() - start_time util.print('BEST: LR={}, init={}, {}'.format(best_lr, best_init_loc, best_init_scale)) util.print('ELBO: {}'.format(best_elbo)) to_centered = model_config.make_to_centered(**learned_reparam) results = collections.OrderedDict() results['elbo'] = best_elbo with tf.Session() as sess: saver.restore(sess, dir_save) results['vp'] = learned_variational_params if do_sample: vip_step_size_init = sess.run(vip_step_size_approx) vip_step_size = [tf.get_variable( name='step_size_vip'+str(i), initializer=np.array(vip_step_size_init[i], dtype=np.float32), use_resource=True, # For TFE compatibility. trainable=False) for i in range(len(vip_step_size_init))] kernel_vip = mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_vip, step_size=vip_step_size, num_leapfrog_steps=num_leapfrog_steps, step_size_update_fn=mcmc.make_simple_step_size_update_policy( num_adaptation_steps=num_adaptation_steps, target_rate=0.85)) states, kernel_results_vip = mcmc.sample_chain( num_results=num_samples, num_burnin_steps=burnin, current_state=[ tf.zeros(param_shapes[param]) for param in param_shapes.keys() ], kernel=kernel_vip, num_steps_between_results=1) states_vip = transform_mcmc_states(states, to_centered) init_again = tf.global_variables_initializer() init_again.run(feed_dict={ init_val_loc: best_init_loc, init_val_scale: best_init_scale, learning_rate_ph: 1.0}) # learning rate doesn't matter for HMC. ess_vip = tfp.mcmc.effective_sample_size(states_vip) start_time = time.time() samples, is_accepted, ess, ss_vip, log_accept_ratio = sess.run( (states_vip, kernel_results_vip.is_accepted, ess_vip, kernel_results_vip.extra.step_size_assign, kernel_results_vip.log_accept_ratio)) sampling_time = time.time() - start_time results['samples'] = collections.OrderedDict() results['is_accepted'] = is_accepted results['acceptance_rate'] = np.sum(is_accepted) * 100. / float( num_samples) results['ess'] = ess results['sampling_time'] = sampling_time results['log_accept_ratio'] = log_accept_ratio results['step_size'] = [s[0] for s in ss_vip] i = 0 for param in param_shapes.keys(): results['samples'][param] = samples[i] i = i + 1 # end if results['parameterisation'] = collections.OrderedDict() i = 0 for param in param_shapes.keys(): name_a = param[:-5] + 'a' name_b = param[:-5] + 'b' try: results['parameterisation'][name_a] = learned_reparam[name_a] results['parameterisation'][name_b] = learned_reparam[name_b] except KeyError: continue i = i + 1 results['elbo_timeline'] = timeline results['vi_time'] = vi_time results['init_pos'] = best_init_loc return results
def confounder_ppca(X, latent_dim, holdout_portion): """ Function to estimate a substitute confounder using PPCA. Adopted from the deconfounder_tutorial.ipynb https://github.com/blei-lab/deconfounder_tutorial Args: X: A numpy array or pandas dataframe of the original covariates dimension: (n x m) latent_dim: The number of latend factors to be estimated holdout_portion: Fraction of the data to be used as holdout Returns: w_mean_inferred: (latent_dim x n) matrix w_std_inferred: (latent_dim x n) matrix z_mean_inferred: mean of substitute confounder dimension (n x latend_dim) z_std_inferred: std of substitute confounder dimension (n x latend_dim) x_vad: (nxm) matrix with the heldout entries only and 0 elsewhere holdout_mask: sparse (nxm) matrix with 1 on the heldout entries and 0 elsewhere, s.t x_vad = X*holdout_mask holdout_rows: row indeces of the heldout entries """ num_datapoints, data_dim = X.shape holdout_portion = holdout_portion n_holdout = int(holdout_portion * num_datapoints * data_dim) holdout_row = np.random.randint(num_datapoints, size=n_holdout) holdout_col = np.random.randint(data_dim, size=n_holdout) holdout_mask = (sparse.coo_matrix((np.ones(n_holdout), \ (holdout_row, holdout_col)), \ shape = X.shape)).toarray() holdout_subjects = np.unique(holdout_row) x_train = np.multiply(1 - holdout_mask, X) x_vad = np.multiply(holdout_mask, X) def ppca_model(data_dim, latent_dim, num_datapoints, stddv_datapoints): w = ed.Normal(loc=tf.zeros([latent_dim, data_dim]), scale=tf.ones([latent_dim, data_dim]), name="w") # parameter z = ed.Normal( loc=tf.zeros([num_datapoints, latent_dim]), scale=tf.ones([num_datapoints, latent_dim]), name="z") # local latent variable / substitute confounder x = ed.Normal(loc=tf.multiply(tf.matmul(z, w), 1 - holdout_mask), scale=stddv_datapoints * tf.ones([num_datapoints, data_dim]), name="x") # (modeled) data return x, (w, z) log_joint = ed.make_log_joint_fn(ppca_model) latent_dim = latent_dim stddv_datapoints = 0.1 model = ppca_model(data_dim=data_dim, latent_dim=latent_dim, num_datapoints=num_datapoints, stddv_datapoints=stddv_datapoints) def variational_model(qw_mean, qw_stddv, qz_mean, qz_stddv): qw = ed.Normal(loc=qw_mean, scale=qw_stddv, name="qw") qz = ed.Normal(loc=qz_mean, scale=qz_stddv, name="qz") return qw, qz log_q = ed.make_log_joint_fn(variational_model) def target(w, z): """Unnormalized target density as a function of the parameters.""" return log_joint(data_dim=data_dim, latent_dim=latent_dim, num_datapoints=num_datapoints, stddv_datapoints=stddv_datapoints, w=w, z=z, x=x_train) def target_q(qw, qz): return log_q(qw_mean=qw_mean, qw_stddv=qw_stddv, qz_mean=qz_mean, qz_stddv=qz_stddv, qw=qw, qz=qz) qw_mean = tf.Variable(np.ones([latent_dim, data_dim]), dtype=tf.float32) qz_mean = tf.Variable(np.ones([num_datapoints, latent_dim]), dtype=tf.float32) qw_stddv = tf.nn.softplus( tf.Variable(-4 * np.ones([latent_dim, data_dim]), dtype=tf.float32)) qz_stddv = tf.nn.softplus( tf.Variable(-4 * np.ones([num_datapoints, latent_dim]), dtype=tf.float32)) qw, qz = variational_model(qw_mean=qw_mean, qw_stddv=qw_stddv, qz_mean=qz_mean, qz_stddv=qz_stddv) energy = target(qw, qz) entropy = -target_q(qw, qz) elbo = energy + entropy optimizer = tf.train.AdamOptimizer(learning_rate=0.05) train = optimizer.minimize(-elbo) init = tf.global_variables_initializer() t = [] num_epochs = 500 with tf.Session() as sess: sess.run(init) for i in range(num_epochs): sess.run(train) if i % 5 == 0: t.append(sess.run([elbo])) w_mean_inferred = sess.run(qw_mean) w_stddv_inferred = sess.run(qw_stddv) z_mean_inferred = sess.run(qz_mean) z_stddv_inferred = sess.run(qz_stddv) print("Inferred axes:") print(w_mean_inferred) print("Standard Deviation:") print(w_stddv_inferred) plt.plot(range(1, num_epochs, 5), t) plt.show() def replace_latents(w, z): def interceptor(rv_constructor, *rv_args, **rv_kwargs): """Replaces the priors with actual values to generate samples from.""" name = rv_kwargs.pop("name") if name == "w": rv_kwargs["value"] = w elif name == "z": rv_kwargs["value"] = z return rv_constructor(*rv_args, **rv_kwargs) return interceptor return [ w_mean_inferred, w_stddv_inferred, z_mean_inferred, z_stddv_inferred ], x_vad, holdout_mask, holdout_row
def probabilistic_matrix_factorization(data_dim, latent_dim, num_datapoints, stddv_datapoints): # (unmodeled) data w = ed.Normal(loc=tf.zeros([data_dim, latent_dim]), scale=2.0 * tf.ones([data_dim, latent_dim]), name="w") # parameter z = ed.Normal(loc=tf.zeros([latent_dim, num_datapoints]), scale=tf.ones([latent_dim, num_datapoints]), name="z") # parameter x = tf.nn.softplus( ed.Normal(loc=tf.matmul(w, z), scale=stddv_datapoints * tf.ones([data_dim, num_datapoints]), name="x")) # (modeled) data return x, (w, z) log_joint = ed.make_log_joint_fn(probabilistic_matrix_factorization) def read_data(): data = np.zeros((N, M)) f = open("./data/ratings.dat", "r") lines = f.readlines() for line in lines: user, movie, rating, *_ = list(map(int, line.split("::"))) data[user - 1, movie - 1] = rating return data # def get_indicators(N, M, prob_std=0.8):
def train_ppca(x_train, data_dim, latent_dim, num_datapoints, stddv_datapoints): def probabilistic_pca(data_dim, latent_dim, num_datapoints, stddv_datapoints): w = ed.Normal(loc=tf.zeros([data_dim, latent_dim]), scale=2.0 * tf.ones([data_dim, latent_dim]), name="w") # PRINCIPAL COMPONENTS z = ed.Normal(loc=tf.zeros([latent_dim, num_datapoints]), scale=tf.ones([latent_dim, num_datapoints]), name="z") # LATENT VARIABLE x = ed.Normal(loc=tf.matmul(w, z), scale=stddv_datapoints * tf.ones([data_dim, num_datapoints]), name="x") return x, (w, z) log_joint = ed.make_log_joint_fn(probabilistic_pca) def target(w, z): """Unnormalized target density""" return log_joint(data_dim=data_dim, latent_dim=latent_dim, num_datapoints=num_datapoints, stddv_datapoints=stddv_datapoints, w=w, z=z, x=x_train) tf.reset_default_graph() def variational_model(qw_mean, qw_stddv, qz_mean, qz_stddv): qw = ed.Normal(loc=qw_mean, scale=qw_stddv, name="qw") qz = ed.Normal(loc=qz_mean, scale=qz_stddv, name="qz") return qw, qz log_q = ed.make_log_joint_fn(variational_model) def target_q(qw, qz): return log_q(qw_mean=qw_mean, qw_stddv=qw_stddv, qz_mean=qz_mean, qz_stddv=qz_stddv, qw=qw, qz=qz) qw_mean = tf.Variable(np.ones([data_dim, latent_dim]), dtype=tf.float32) qz_mean = tf.Variable(np.ones([latent_dim, num_datapoints]), dtype=tf.float32) qw_stddv = tf.nn.softplus( tf.Variable(-4 * np.ones([data_dim, latent_dim]), dtype=tf.float32)) qz_stddv = tf.nn.softplus( tf.Variable(-4 * np.ones([latent_dim, num_datapoints]), dtype=tf.float32)) qw, qz = variational_model(qw_mean=qw_mean, qw_stddv=qw_stddv, qz_mean=qz_mean, qz_stddv=qz_stddv) energy = target(qw, qz) entropy = -target_q(qw, qz) elbo = energy + entropy optimizer = tf.train.AdamOptimizer(learning_rate=0.05) train = optimizer.minimize(-elbo) init = tf.global_variables_initializer() t = [] num_epochs = 100 with tf.Session() as sess: sess.run(init) for i in range(num_epochs): sess.run(train) if i % 5 == 0: t.append(sess.run([elbo])) w_mean_inferred = sess.run(qw_mean) w_stddv_inferred = sess.run(qw_stddv) z_mean_inferred = sess.run(qz_mean) z_stddv_inferred = sess.run(qz_stddv) plt.plot(range(1, num_epochs, 5), t) plt.show() def replace_latents(w=None, z=None): """ Helper function that replaces our prior for w and z """ def interceptor(rv_constructor, *rv_args, **rv_kwargs): """Replaces the priors with actual values""" name = rv_kwargs.pop("name") if name == "w": rv_kwargs["value"] = w elif name == "z": rv_kwargs["value"] = z return rv_constructor(*rv_args, **rv_kwargs) return interceptor with ed.interception(replace_latents(w_mean_inferred, z_mean_inferred)): generate = probabilistic_pca(data_dim=data_dim, latent_dim=latent_dim, num_datapoints=num_datapoints, stddv_datapoints=stddv_datapoints) with tf.Session() as sess: x_generated, _ = sess.run(generate) return TrainingResults(qw, qz, w_mean_inferred, w_stddv_inferred, z_mean_inferred, z_stddv_inferred, x_generated)
def run_interleaved_hmc(model_config, num_samples=2000, step_size_cp=0.1, step_size_ncp=0.1, burnin=1000, num_leapfrog_steps=4): """Given a (centred) model, this function transforms it to a fully non-centred one, and uses both models to run interleaved HMC. """ tf.reset_default_graph() log_joint_centered = ed.make_log_joint_fn(model_config.model) with ed.tape() as model_tape_cp: _ = model_config.model(*model_config.model_args) param_shapes = collections.OrderedDict() target_cp_kwargs = {} for param in model_tape_cp.keys(): if param not in model_config.observed_data.keys(): param_shapes[param] = model_tape_cp[param].shape else: target_cp_kwargs[param] = model_config.observed_data[param] def target_cp(*param_args): i = 0 for param in model_tape_cp.keys(): if param not in model_config.observed_data.keys(): target_cp_kwargs[param] = param_args[i] i = i + 1 return log_joint_centered(*model_config.model_args, **target_cp_kwargs) def model_noncentered(*params): with ed.interception(ed_transforms.ncp): return model_config.model(*params) log_joint_noncentered = ed.make_log_joint_fn(model_noncentered) with ed.tape() as model_tape_ncp: _ = model_noncentered(*model_config.model_args) param_shapes = collections.OrderedDict() target_ncp_kwargs = {} for param in model_tape_ncp.keys(): if param not in model_config.observed_data.keys(): param_shapes[param] = model_tape_ncp[param].shape else: target_ncp_kwargs[param] = model_config.observed_data[param] def target_ncp(*param_args): i = 0 for param in model_tape_ncp.keys(): if param not in model_config.observed_data.keys(): target_ncp_kwargs[param] = param_args[i] i = i + 1 return log_joint_noncentered(*model_config.model_args, **target_ncp_kwargs) return _run_hmc_interleaved(target_cp, target_ncp, param_shapes, to_centered=model_config.to_centered, to_noncentered=model_config.to_noncentered, num_samples=num_samples, step_size_cp=step_size_cp, step_size_ncp=step_size_ncp, burnin=burnin, num_leapfrog_steps=num_leapfrog_steps)
m = ed.Normal(loc=mmu, scale=msigma, name="m") c = ed.Uniform(low=cmin, high=cmax, name="c") y = ed.Normal(loc=(m*x + c), scale=sigma, name="y") return y # set initial state (drawn from prior) qm = tf.random_normal([], mean=mmu, stddev=msigma, dtype=tf.float32) qc = tf.random_uniform([], minval=cmin, maxval=cmax, dtype=tf.float32) # convert x values and data to tensors x = tf.convert_to_tensor(x, dtype=tf.float32) data = tf.convert_to_tensor(data, dtype=tf.float32) # make function log_joint = ed.make_log_joint_fn(log_likelihood) # Target log-probability as a function of states def target_log_prob_fn(m, c): return log_joint(x, cmin, cmax, mmu, msigma, sigma, m=m, c=c,y=data) Nsamples = 2000 # final number of samples Nburn = 2000 # number of tuning samples # set up Hamiltonian MC hmc_kernel = tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_log_prob_fn, step_size=0.01, num_leapfrog_steps=5) states, kernel_results = tfp.mcmc.sample_chain(
def main(argv): del argv # unused if not FLAGS.skip_plots: if tf.io.gfile.exists(FLAGS.model_dir): tf.compat.v1.logging.warning( "Warning: deleting old log directory at {}".format( FLAGS.model_dir)) tf.io.gfile.rmtree(FLAGS.model_dir) tf.io.gfile.makedirs(FLAGS.model_dir) tf.compat.v1.enable_eager_execution() print("Number of available GPUs", tf.contrib.eager.num_gpus()) if FLAGS.fake_data: features = tf.random.normal([20, 55]) labels = tf.random.uniform([20], minval=0, maxval=2, dtype=tf.int32) else: features, labels = covertype() print("Data set size", features.shape[0]) print("Number of features", features.shape[1]) log_joint = ed.make_log_joint_fn(logistic_regression) @tf.function def target_log_prob_fn(coeffs): return log_joint(features=features, coeffs=coeffs, labels=labels) # Initialize using a sample from 20 steps of NUTS. This is roughly a MAP # estimate and is written explicitly to avoid differences in warm-starts # between different implementations (e.g., Stan, PyMC3). coeffs = tf.constant([ +2.03420663e+00, -3.53567265e-02, -1.49223924e-01, -3.07049364e-01, -1.00028366e-01, -1.46827862e-01, -1.64167881e-01, -4.20344204e-01, +9.47479829e-02, -1.12681836e-02, +2.64442056e-01, -1.22087866e-01, -6.00568838e-02, -3.79419506e-01, -1.06668741e-01, -2.97053963e-01, -2.05253899e-01, -4.69537191e-02, -2.78072730e-02, -1.43250525e-01, -6.77954629e-02, -4.34899796e-03, +5.90927452e-02, +7.23133609e-02, +1.38526391e-02, -1.24497898e-01, -1.50733739e-02, -2.68872194e-02, -1.80925727e-02, +3.47936489e-02, +4.03552800e-02, -9.98773426e-03, +6.20188080e-02, +1.15002751e-01, +1.32145107e-01, +2.69109547e-01, +2.45785132e-01, +1.19035013e-01, -2.59744357e-02, +9.94279515e-04, +3.39266285e-02, -1.44057125e-02, -6.95222765e-02, -7.52013028e-02, +1.21171586e-01, +2.29205526e-02, +1.47308692e-01, -8.34354162e-02, -9.34122875e-02, -2.97472421e-02, -3.03937674e-01, -1.70958012e-01, -1.59496680e-01, -1.88516974e-01, -1.20889175e+00 ]) # Initialize step size via result of 50 warmup steps from Stan. step_size = 0.00167132 coeffs_samples = [] target_log_prob = None grads_target_log_prob = None for step in range(FLAGS.max_steps): print("Step", step) [ [coeffs], target_log_prob, grads_target_log_prob, ] = no_u_turn_sampler.kernel( target_log_prob_fn=target_log_prob_fn, current_state=[coeffs], step_size=[step_size], seed=step, current_target_log_prob=target_log_prob, current_grads_target_log_prob=grads_target_log_prob) coeffs_samples.append(coeffs) if not FLAGS.skip_plots: for coeffs_sample in coeffs_samples: plt.plot(coeffs_sample.numpy()) filename = os.path.join(FLAGS.model_dir, "coeffs_samples.png") plt.savefig(filename) print("Figure saved as", filename) plt.close()
plt.close() """"""""""""""""""""""""""""""""" # 2. MCMC, simple mixture """"""""""""""""""""""""""""""""" """2.1. sampler basic config""" num_results = 10000 num_burnin_steps = 10000 ls_val = 1. n_mix_val = 20 # define mcmc computation graph mcmc_graph = tf.Graph() with mcmc_graph.as_default(): # build likelihood by explicitly log_joint = ed.make_log_joint_fn(gp_regression.model_mixture) def target_log_prob_fn(mix_prob, gp_f, sigma): """Unnormalized target density as a function of states.""" return log_joint(X_train, y=y_train, ls=ls_val, n_mix=n_mix_val, mix_prob=mix_prob, gp_f=gp_f, sigma=sigma) # set up state container initial_state = [ tf.constant([1 / n_mix_val] * n_mix_val, name='init_mix_prob'), tf.random_normal([n_mix_val, N], stddev=0.01, name='init_gp_func'), tf.constant([0.1] * n_mix_val, name='init_sigma'), ]
with open(os.path.join(_SAVE_ADDR_PREFIX, 'base/base_test_pred.pkl'), 'rb') as file: base_test_pred = pk.load(file) y_test = y_test.squeeze() # create empty placeholder for X since it is requred by model. X_test = np.zeros((len(y_test), 2)) """2.1. sampler basic config""" num_results = 10000 num_burnin_steps = 5000 if _FIT_MCMC_MODELS: # define mcmc computation graph mcmc_graph = tf.Graph() with mcmc_graph.as_default(): log_joint = ed.make_log_joint_fn(parametric_ensemble.model) ensemble_model_names = list(base_test_pred.keys()) base_weight_names = [ 'base_weight_{}'.format(model_name) for model_name in ensemble_model_names ] model_specific_varnames = base_weight_names def target_log_prob_fn(sigma, temp, *model_specific_positional_args): """Unnormalized target density as a function of states.""" # build kwargs for base model weight using positional args model_specific_kwargs = dict( zip(model_specific_varnames, model_specific_positional_args)) return log_joint(X=X_test,
m = ed.Normal(loc=mmu, scale=msigma, name="m") c = ed.Uniform(low=cmin, high=cmax, name="c") y = ed.Normal(loc=(m*x + c), scale=sigma, name="y") return y # set initial state (drawn from prior) qm = tf.random_normal([], mean=mmu, stddev=msigma, dtype=tf.float32) qc = tf.random_uniform([], minval=cmin, maxval=cmax, dtype=tf.float32) # convert x values and data to tensors x = tf.convert_to_tensor(x, dtype=tf.float32) data = tf.convert_to_tensor(data, dtype=tf.float32) # make function log_joint = ed.make_log_joint_fn(log_likelihood) def target_log_prob_fn(m, c): """Target log-probability as a function of states.""" return log_joint(x, cmin, cmax, mmu, msigma, sigma, m=m, c=c, y=data) Nsamples = 2000 # final number of samples Nburn = 2000 # number of tuning samples # set up Hamiltonian MC hmc_kernel = tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_log_prob_fn, step_size=0.01, num_leapfrog_steps=5)