Exemple #1
0
  def build_update(self):
    """Simulate Langevin dynamics using a discretized integrator. Its
    discretization error goes to zero as the learning rate decreases.

    #### Notes

    The updates assume each Empirical random variable is directly
    parameterized by `tf.Variable`s.
    """
    old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0))
                  for z, qz in six.iteritems(self.latent_vars)}

    # Simulate Langevin dynamics.
    learning_rate = self.step_size / tf.cast(self.t + 1, tf.float32)
    grad_log_joint = tf.gradients(self._log_joint(old_sample),
                                  list(six.itervalues(old_sample)))
    sample = {}
    for z, grad_log_p in zip(six.iterkeys(old_sample), grad_log_joint):
      qz = self.latent_vars[z]
      event_shape = qz.event_shape
      normal = Normal(loc=tf.zeros(event_shape),
                      scale=learning_rate * tf.ones(event_shape))
      sample[z] = old_sample[z] + \
          0.5 * learning_rate * tf.convert_to_tensor(grad_log_p) + \
          normal.sample()

    # Update Empirical random variables.
    assign_ops = []
    for z, qz in six.iteritems(self.latent_vars):
      variable = qz.get_variables()[0]
      assign_ops.append(tf.scatter_update(variable, self.t, sample[z]))

    # Increment n_accept.
    assign_ops.append(self.n_accept.assign_add(1))
    return tf.group(*assign_ops)
Exemple #2
0
  def build_update(self):
    """
    Simulate Langevin dynamics using a discretized integrator. Its
    discretization error goes to zero as the learning rate decreases.
    """
    old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0))
                  for z, qz in six.iteritems(self.latent_vars)}

    # Simulate Langevin dynamics.
    learning_rate = self.step_size / tf.cast(self.t + 1, tf.float32)
    grad_log_joint = tf.gradients(self.log_joint(old_sample),
                                  list(six.itervalues(old_sample)))
    sample = {}
    for z, qz, grad_log_p in \
        zip(six.iterkeys(self.latent_vars),
            six.itervalues(self.latent_vars),
            grad_log_joint):
      event_shape = qz.get_event_shape()
      normal = Normal(mu=tf.zeros(event_shape),
                      sigma=learning_rate * tf.ones(event_shape))
      sample[z] = old_sample[z] + 0.5 * learning_rate * grad_log_p + \
          normal.sample()

    # Update Empirical random variables.
    assign_ops = []
    variables = {x.name: x for x in
                 tf.get_default_graph().get_collection(tf.GraphKeys.VARIABLES)}
    for z, qz in six.iteritems(self.latent_vars):
      variable = variables[qz.params.op.inputs[0].op.inputs[0].name]
      assign_ops.append(tf.scatter_update(variable, self.t, sample[z]))

    # Increment n_accept.
    assign_ops.append(self.n_accept.assign_add(1))
    return tf.group(*assign_ops)
Exemple #3
0
  def build_update(self):
    """
    Simulate Langevin dynamics using a discretized integrator. Its
    discretization error goes to zero as the learning rate decreases.
    """
    old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0))
                  for z, qz in six.iteritems(self.latent_vars)}

    # Simulate Langevin dynamics.
    learning_rate = self.step_size / tf.cast(self.t + 1, tf.float32)
    grad_log_joint = tf.gradients(self._log_joint(old_sample),
                                  list(six.itervalues(old_sample)))
    sample = {}
    for z, qz, grad_log_p in \
        zip(six.iterkeys(self.latent_vars),
            six.itervalues(self.latent_vars),
            grad_log_joint):
      event_shape = qz.get_event_shape()
      normal = Normal(mu=tf.zeros(event_shape),
                      sigma=learning_rate * tf.ones(event_shape))
      sample[z] = old_sample[z] + 0.5 * learning_rate * grad_log_p + \
          normal.sample()

    # Update Empirical random variables.
    assign_ops = []
    variables = {x.name: x for x in
                 tf.get_default_graph().get_collection(tf.GraphKeys.VARIABLES)}
    for z, qz in six.iteritems(self.latent_vars):
      variable = variables[qz.params.op.inputs[0].op.inputs[0].name]
      assign_ops.append(tf.scatter_update(variable, self.t, sample[z]))

    # Increment n_accept.
    assign_ops.append(self.n_accept.assign_add(1))
    return tf.group(*assign_ops)
def _test(mu, sigma, n):
    rv = Normal(mu=mu, sigma=sigma)
    rv_sample = rv.sample(n)
    x = rv_sample.eval()
    x_tf = tf.constant(x, dtype=tf.float32)
    mu = mu.eval()
    sigma = sigma.eval()
    assert np.allclose(
        rv.log_prob(x_tf).eval(), stats.norm.logpdf(x, mu, sigma))
def _test(mu, sigma, n):
  rv = Normal(mu=mu, sigma=sigma)
  rv_sample = rv.sample(n)
  x = rv_sample.eval()
  x_tf = tf.constant(x, dtype=tf.float32)
  mu = mu.eval()
  sigma = sigma.eval()
  assert np.allclose(rv.log_prob(x_tf).eval(),
                     stats.norm.logpdf(x, mu, sigma))
Exemple #6
0
  def build_update(self):
    """Simulate Hamiltonian dynamics using a numerical integrator.
    Correct for the integrator's discretization error using an
    acceptance ratio.

    #### Notes

    The updates assume each Empirical random variable is directly
    parameterized by `tf.Variable`s.
    """
    old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0))
                  for z, qz in six.iteritems(self.latent_vars)}
    old_sample = OrderedDict(old_sample)

    # Sample momentum.
    old_r_sample = OrderedDict()
    for z, qz in six.iteritems(self.latent_vars):
      event_shape = qz.event_shape
      normal = Normal(loc=tf.zeros(event_shape), scale=tf.ones(event_shape))
      old_r_sample[z] = normal.sample()

    # Simulate Hamiltonian dynamics.
    new_sample, new_r_sample = leapfrog(old_sample, old_r_sample,
                                        self.step_size, self._log_joint,
                                        self.n_steps)

    # Calculate acceptance ratio.
    ratio = tf.reduce_sum([0.5 * tf.reduce_sum(tf.square(r))
                           for r in six.itervalues(old_r_sample)])
    ratio -= tf.reduce_sum([0.5 * tf.reduce_sum(tf.square(r))
                            for r in six.itervalues(new_r_sample)])
    ratio += self._log_joint(new_sample)
    ratio -= self._log_joint(old_sample)

    # Accept or reject sample.
    u = Uniform().sample()
    accept = tf.log(u) < ratio
    sample_values = tf.cond(accept, lambda: list(six.itervalues(new_sample)),
                            lambda: list(six.itervalues(old_sample)))
    if not isinstance(sample_values, list):
      # `tf.cond` returns tf.Tensor if output is a list of size 1.
      sample_values = [sample_values]

    sample = {z: sample_value for z, sample_value in
              zip(six.iterkeys(new_sample), sample_values)}

    # Update Empirical random variables.
    assign_ops = []
    for z, qz in six.iteritems(self.latent_vars):
      variable = qz.get_variables()[0]
      assign_ops.append(tf.scatter_update(variable, self.t, sample[z]))

    # Increment n_accept (if accepted).
    assign_ops.append(self.n_accept.assign_add(tf.where(accept, 1, 0)))
    return tf.group(*assign_ops)
Exemple #7
0
  def build_update(self):
    """Simulate Hamiltonian dynamics using a numerical integrator.
    Correct for the integrator's discretization error using an
    acceptance ratio.

    #### Notes

    The updates assume each Empirical random variable is directly
    parameterized by `tf.Variable`s.
    """
    old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0))
                  for z, qz in six.iteritems(self.latent_vars)}
    old_sample = OrderedDict(old_sample)

    # Sample momentum.
    old_r_sample = OrderedDict()
    for z, qz in six.iteritems(self.latent_vars):
      event_shape = qz.event_shape
      normal = Normal(loc=tf.zeros(event_shape), scale=tf.ones(event_shape))
      old_r_sample[z] = normal.sample()

    # Simulate Hamiltonian dynamics.
    new_sample, new_r_sample = leapfrog(old_sample, old_r_sample,
                                        self.step_size, self._log_joint,
                                        self.n_steps)

    # Calculate acceptance ratio.
    ratio = tf.reduce_sum([0.5 * tf.reduce_sum(tf.square(r))
                           for r in six.itervalues(old_r_sample)])
    ratio -= tf.reduce_sum([0.5 * tf.reduce_sum(tf.square(r))
                            for r in six.itervalues(new_r_sample)])
    ratio += self._log_joint(new_sample)
    ratio -= self._log_joint(old_sample)

    # Accept or reject sample.
    u = Uniform().sample()
    accept = tf.log(u) < ratio
    sample_values = tf.cond(accept, lambda: list(six.itervalues(new_sample)),
                            lambda: list(six.itervalues(old_sample)))
    if not isinstance(sample_values, list):
      # `tf.cond` returns tf.Tensor if output is a list of size 1.
      sample_values = [sample_values]

    sample = {z: sample_value for z, sample_value in
              zip(six.iterkeys(new_sample), sample_values)}

    # Update Empirical random variables.
    assign_ops = []
    for z, qz in six.iteritems(self.latent_vars):
      variable = qz.get_variables()[0]
      assign_ops.append(tf.scatter_update(variable, self.t, sample[z]))

    # Increment n_accept (if accepted).
    assign_ops.append(self.n_accept.assign_add(tf.where(accept, 1, 0)))
    return tf.group(*assign_ops)
Exemple #8
0
  def build_update(self):
    """
    Simulate Hamiltonian dynamics using a numerical integrator.
    Correct for the integrator's discretization error using an
    acceptance ratio.
    """
    old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0))
                  for z, qz in six.iteritems(self.latent_vars)}

    # Sample momentum.
    old_r_sample = {}
    for z, qz in six.iteritems(self.latent_vars):
      event_shape = qz.get_event_shape()
      normal = Normal(mu=tf.zeros(event_shape), sigma=tf.ones(event_shape))
      old_r_sample[z] = normal.sample()

    # Simulate Hamiltonian dynamics.
    new_sample = old_sample
    new_r_sample = old_r_sample
    for _ in range(self.n_steps):
      new_sample, new_r_sample = leapfrog(old_sample, old_r_sample,
                                          self.step_size, self.log_joint)

    # Calculate acceptance ratio.
    ratio = tf.reduce_sum([0.5 * tf.square(r)
                           for r in six.itervalues(old_r_sample)])
    ratio -= tf.reduce_sum([0.5 * tf.square(r)
                            for r in six.itervalues(new_r_sample)])
    ratio += self.log_joint(new_sample)
    ratio -= self.log_joint(old_sample)

    # Accept or reject sample.
    u = Uniform().sample()
    accept = tf.log(u) < ratio
    sample_values = tf.cond(accept, lambda: list(six.itervalues(new_sample)),
                            lambda: list(six.itervalues(old_sample)))
    if not isinstance(sample_values, list):
      # ``tf.cond`` returns tf.Tensor if output is a list of size 1.
      sample_values = [sample_values]

    sample = {z: sample_value for z, sample_value in
              zip(six.iterkeys(new_sample), sample_values)}

    # Update Empirical random variables.
    assign_ops = []
    variables = {x.name: x for x in
                 tf.get_default_graph().get_collection(tf.GraphKeys.VARIABLES)}
    for z, qz in six.iteritems(self.latent_vars):
      variable = variables[qz.params.op.inputs[0].op.inputs[0].name]
      assign_ops.append(tf.scatter_update(variable, self.t, sample[z]))

    # Increment n_accept (if accepted).
    assign_ops.append(self.n_accept.assign_add(tf.select(accept, 1, 0)))
    return tf.group(*assign_ops)
def _test(shape, n):
    rv = Normal(shape, loc=tf.zeros(shape), scale=tf.ones(shape))
    rv_sample = rv.sample(n)
    x = rv_sample.eval()
    x_tf = tf.constant(x, dtype=tf.float32)
    loc = rv.loc.eval()
    scale = rv.scale.eval()
    for idx in range(shape[0]):
        assert np.allclose(
            rv.log_prob_idx((idx, ), x_tf).eval(),
            stats.norm.logpdf(x[:, idx], loc[idx], scale[idx]))
Exemple #10
0
  def build_update(self):
    """
    Simulate Hamiltonian dynamics using a numerical integrator.
    Correct for the integrator's discretization error using an
    acceptance ratio.
    """
    old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0))
                  for z, qz in six.iteritems(self.latent_vars)}

    # Sample momentum.
    old_r_sample = {}
    for z, qz in six.iteritems(self.latent_vars):
      event_shape = qz.get_event_shape()
      normal = Normal(mu=tf.zeros(event_shape), sigma=tf.ones(event_shape))
      old_r_sample[z] = normal.sample()

    # Simulate Hamiltonian dynamics.
    new_sample = old_sample
    new_r_sample = old_r_sample
    for _ in range(self.n_steps):
      new_sample, new_r_sample = leapfrog(old_sample, old_r_sample,
                                          self.step_size, self._log_joint)

    # Calculate acceptance ratio.
    ratio = tf.reduce_sum([0.5 * tf.square(r)
                           for r in six.itervalues(old_r_sample)])
    ratio -= tf.reduce_sum([0.5 * tf.square(r)
                            for r in six.itervalues(new_r_sample)])
    ratio += self._log_joint(new_sample)
    ratio -= self._log_joint(old_sample)

    # Accept or reject sample.
    u = Uniform().sample()
    accept = tf.log(u) < ratio
    sample_values = tf.cond(accept, lambda: list(six.itervalues(new_sample)),
                            lambda: list(six.itervalues(old_sample)))
    if not isinstance(sample_values, list):
      # ``tf.cond`` returns tf.Tensor if output is a list of size 1.
      sample_values = [sample_values]

    sample = {z: sample_value for z, sample_value in
              zip(six.iterkeys(new_sample), sample_values)}

    # Update Empirical random variables.
    assign_ops = []
    variables = {x.name: x for x in
                 tf.get_default_graph().get_collection(tf.GraphKeys.VARIABLES)}
    for z, qz in six.iteritems(self.latent_vars):
      variable = variables[qz.params.op.inputs[0].op.inputs[0].name]
      assign_ops.append(tf.scatter_update(variable, self.t, sample[z]))

    # Increment n_accept (if accepted).
    assign_ops.append(self.n_accept.assign_add(tf.select(accept, 1, 0)))
    return tf.group(*assign_ops)
Exemple #11
0
    def build_update(self):
        """Use Adam to optimize the model and treat each training step as
    sample from the posterior distribution. Approximate SGLD by adding noise
    to the updates based on the current adaptive learning rate. Approximate
    the sampled distribution by diagonal Gaussians which parameters are
    incrementally updated.
    Notes
    -----
    The updates assume each Empirical random variable is directly
    parameterized by ``tf.Variable``s.
    """
        old_sample = {z: qz for z, qz in six.iteritems(self.empirical_vals)}

        # Calculate Adam updates.
        opt = tf.train.AdamOptimizer(self.learning_rate)
        grads = opt.compute_gradients(-1. * self._log_joint(old_sample),
                                      list(six.itervalues(old_sample)))
        train_step = opt.apply_gradients(grads)

        # Add noise according to current adaptive learning rate
        noise_step = []
        with tf.control_dependencies([train_step]):
            for z, qz in six.iteritems(self.empirical_vals):
                lr = (opt._lr_t * tf.sqrt(1. - opt._beta2_power) /
                      (1. - opt._beta1_power))
                m = opt.get_slot(qz, "m")
                v = opt.get_slot(qz, "v")
                eff_lr = lr * m / (tf.sqrt(v) + opt._epsilon_t)
                noise_dist = Normal(mu=tf.zeros(tf.shape(qz)),
                                    sigma=2. * eff_lr * tf.ones(tf.shape(qz)))
                noise_add = old_sample[z].assign_add(noise_dist.sample())
                noise_step.append(noise_add)

        # Update Empirical random variables and check whether the Gaussian
        # approximation should be updated this step
        with tf.control_dependencies(noise_step):
            update_approximations = tf.logical_and(
                tf.greater_equal(self.t, self.burn_in),
                tf.equal(tf.mod(self.t, self.thinning), 0))
        assign_ops = []
        assign_ops.append(
            tf.cond(update_approximations,
                    lambda: self.build_approximation_update(),
                    lambda: tf.no_op()))

        # Increment n_accept.
        assign_ops.append(self.n_accept.assign_add(1))
        return tf.group(*assign_ops)
Exemple #12
0
def sample():
    sw0 = qWs[0].sample(1).eval().tolist()[0]
    sw1 = qWs[1].sample(1).eval().tolist()[0]
    sw2 = qWs[2].sample(1).eval().tolist()[0]
    sb0 = qbs[0].sample(1).eval().tolist()[0]
    sb1 = qbs[1].sample(1).eval().tolist()[0]
    sb2 = qbs[2].sample(1).eval().tolist()[0]
    print(sw0)
    X = tf.placeholder(tf.float32, [N, D], name="X")
    X = np.array(X_train, dtype=np.float32)
    h = tf.tanh(tf.matmul(X, sw0) + sb0)
    h = tf.tanh(tf.matmul(h, sw1) + sb1)
    h = tf.matmul(h, sw2) + sb2
    h = tf.reshape(h, [-1])
    y = Normal(loc=h, scale=0.1 * tf.ones(N), name="y")
    sample = y.sample(1).eval()
    print('MMMorimori', sample)
Exemple #13
0
    def build_update(self):
        """Simulate Langevin dynamics using a discretized integrator. Its
    discretization error goes to zero as the learning rate decreases.
    Approximate the sampled distribution by diagonal Gaussians which 
    parameters are incrementally updated.
    Notes
    -----
    The updates assume each Empirical random variable is directly
    parameterized by ``tf.Variable``s.
    """
        old_sample = {z: qz for z, qz in six.iteritems(self.empirical_vals)}

        # Simulate Langevin dynamics.
        self.learning_rate = self.step_size / tf.cast(self.t + 1, tf.float32)
        grad_log_joint = tf.gradients(self._log_joint(old_sample),
                                      list(six.itervalues(old_sample)))
        train_step = []
        sample = {}

        # Build update of Empirical random variables.
        for z, grad_log_p in zip(six.iterkeys(old_sample), grad_log_joint):
            qz = self.latent_vars[z]
            event_shape = qz.get_event_shape()
            normal = Normal(mu=tf.zeros(event_shape),
                            sigma=self.learning_rate * tf.ones(event_shape))
            sample[z] = old_sample[z] + 0.5 * self.learning_rate * grad_log_p + \
                normal.sample()
            train_step.append(old_sample[z].assign(sample[z]))

        # Update Empirical random variables and check whether the Gaussian
        # approximation should be updated this step
        with tf.control_dependencies(train_step):
            update_approximations = tf.logical_and(
                tf.greater_equal(self.t, self.burn_in),
                tf.equal(tf.mod(self.t, self.thinning), 0))
        assign_ops = []
        assign_ops.append(
            tf.cond(update_approximations,
                    lambda: self.build_approximation_update(),
                    lambda: tf.no_op()))
        # Increment n_accept.
        assign_ops.append(self.n_accept.assign_add(1))
        return tf.group(*assign_ops)
Exemple #14
0
    def build_update(self):
        """
    Simulate Hamiltonian dynamics with friction using a discretized
    integrator. Its discretization error goes to zero as the learning rate
    decreases.

    Implements the update equations from (15) of Chen et al. (2014).
    """
        old_sample = {
            z: tf.gather(qz.params, tf.maximum(self.t - 1, 0))
            for z, qz in six.iteritems(self.latent_vars)
        }
        old_v_sample = {z: v for z, v in six.iteritems(self.v)}

        # Simulate Hamiltonian dynamics with friction.
        friction = tf.constant(self.friction, dtype=tf.float32)
        learning_rate = tf.constant(self.step_size * 0.01, dtype=tf.float32)
        grad_log_joint = tf.gradients(self._log_joint(old_sample),
                                      list(six.itervalues(old_sample)))

        # v_sample is so named b/c it represents a velocity rather than momentum.
        sample = {}
        v_sample = {}
        for z, grad_log_p in zip(six.iterkeys(old_sample), grad_log_joint):
            qz = self.latent_vars[z]
            event_shape = qz.get_event_shape()
            normal = Normal(mu=tf.zeros(event_shape),
                            sigma=(tf.sqrt(learning_rate * friction) *
                                   tf.ones(event_shape)))
            sample[z] = old_sample[z] + old_v_sample[z]
            v_sample[z] = ((1. - 0.5 * friction) * old_v_sample[z] +
                           learning_rate * grad_log_p + normal.sample())

        # Update Empirical random variables.
        assign_ops = []
        for z, qz in six.iteritems(self.latent_vars):
            variable = qz.get_variables()[0]
            assign_ops.append(tf.scatter_update(variable, self.t, sample[z]))
            assign_ops.append(tf.assign(self.v[z], v_sample[z]).op)

        # Increment n_accept.
        assign_ops.append(self.n_accept.assign_add(1))
        return tf.group(*assign_ops)
Exemple #15
0
  def build_update(self):
    """Simulate Hamiltonian dynamics with friction using a discretized
    integrator. Its discretization error goes to zero as the learning
    rate decreases.

    Implements the update equations from (15) of Chen et al. (2014).
    """
    old_sample = {z: tf.gather(qz.params, tf.maximum(self.t - 1, 0))
                  for z, qz in six.iteritems(self.latent_vars)}
    old_v_sample = {z: v for z, v in six.iteritems(self.v)}

    # Simulate Hamiltonian dynamics with friction.
    friction = tf.constant(self.friction, dtype=tf.float32)
    learning_rate = tf.constant(self.step_size * 0.01, dtype=tf.float32)
    grad_log_joint = tf.gradients(self._log_joint(old_sample),
                                  list(six.itervalues(old_sample)))

    # v_sample is so named b/c it represents a velocity rather than momentum.
    sample = {}
    v_sample = {}
    for z, grad_log_p in zip(six.iterkeys(old_sample), grad_log_joint):
      qz = self.latent_vars[z]
      event_shape = qz.event_shape
      normal = Normal(loc=tf.zeros(event_shape),
                      scale=(tf.sqrt(learning_rate * friction) *
                             tf.ones(event_shape)))
      sample[z] = old_sample[z] + old_v_sample[z]
      v_sample[z] = ((1. - 0.5 * friction) * old_v_sample[z] +
                     learning_rate * tf.convert_to_tensor(grad_log_p) +
                     normal.sample())

    # Update Empirical random variables.
    assign_ops = []
    for z, qz in six.iteritems(self.latent_vars):
      variable = qz.get_variables()[0]
      assign_ops.append(tf.scatter_update(variable, self.t, sample[z]))
      assign_ops.append(tf.assign(self.v[z], v_sample[z]).op)

    # Increment n_accept.
    assign_ops.append(self.n_accept.assign_add(1))
    return tf.group(*assign_ops)
Exemple #16
0
    def predict(self, X, Nsample):
        Nsample = 3

        # sample from posterior
        W_post = [
            self.qW[ii].sample(Nsample).eval()
            for ii in range(len(self.weights))
        ]
        b_post = [
            self.qb[ii].sample(Nsample).eval()
            for ii in range(len(self.weights))
        ]

        aleatoric_noise = Normal(loc=tf.cast(tf.fill([tf.shape(X)[0],1],0.0),dtype=self.dtype),\
                scale=tf.cast(tf.fill([tf.shape(X)[0],1],0.1),dtype=self.dtype))

        noise_post = aleatoric_noise.sample(Nsample).eval()

        prediction = [None for ii in range(Nsample)]
        for ii in range(Nsample):
            self.weights = [W_post[ww][ii] for ww in range(len(self.weights))]
            self.biases = [b_post[ww][ii] for ww in range(len(self.weights))]

            self.weights = [
                self.qW[ww].mean() for ww in range(len(self.weights))
            ]
            self.biases = [
                self.qb[ww].mean() for ww in range(len(self.weights))
            ]

            prediction[ii] = tf.reshape(self.neural_net(X), [-1])

        prediction_mean, prediction_var = tf.nn.moments(tf.stack(prediction),
                                                        axes=[0])

        # convert tensor -> numpy.ndarray
        prediction_mean = prediction_mean.eval()
        prediction_var = prediction_var.eval()

        return prediction_mean, np.sqrt(prediction_var)
class Distribution(object):
    def __init__(self, prior=None, name=None, **kwargs):
        self.prior = prior

    def init_posterior(self, positive=True, empirical=True, n_samples=1000):

        if empirical:
            if positive:
                self.posterior = Empirical(params=tf.nn.softplus(
                    tf.Variable(tf.random_normal([n_samples]))))
            else:
                self.posterior = Empirical(
                    params=tf.Variable(tf.random_normal([
                        n_samples,
                    ])))

        else:
            if positive:
                self.posterior = Normal(
                    loc=tf.Variable(tf.random_normal([1])),
                    scale=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))

    def plot(self, n_samples=10000, show=True, bins=20):
        sns.distplot(self.prior.sample(n_samples).eval(),
                     bins=bins,
                     label="prior",
                     hist=False,
                     kde_kws={"shade": True})
        sns.distplot(self.posterior.sample(n_samples).eval(),
                     bins=bins,
                     label="posterior",
                     hist=False,
                     kde_kws={"shade": True})
        plt.legend()
        if show:
            plt.show()
def _test(shape, loc, scale, n):
    x = Normal(shape, loc, scale)
    val_est = tuple(get_dims(x.sample(n)))
    val_true = (n, ) + shape
    assert val_est == val_true
qW_0 = Normal(mu=tf.Variable(tf.random_normal([D, 2])),
              sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D, 2]))))
qW_1 = Normal(mu=tf.Variable(tf.random_normal([2, 1])),
              sigma=tf.nn.softplus(tf.Variable(tf.random_normal([2, 1]))))
qb_0 = Normal(mu=tf.Variable(tf.random_normal([2])),
              sigma=tf.nn.softplus(tf.Variable(tf.random_normal([2]))))
qb_1 = Normal(mu=tf.Variable(tf.random_normal([1])),
              sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))

# Sample functions from variational model to visualize fits.
rs = np.random.RandomState(0)
inputs = np.linspace(-5, 5, num=400, dtype=np.float32)
x = tf.expand_dims(tf.constant(inputs), 1)
mus = []
for s in range(10):
  mus += [neural_network(x, qW_0.sample(), qW_1.sample(),
                         qb_0.sample(), qb_1.sample())]

mus = tf.stack(mus)

# FIRST VISUALIZATION (prior)

sess = ed.get_session()
tf.global_variables_initializer().run()
outputs = mus.eval()

fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
ax.set_title("Iteration: 0 - (CLOSE WINDOW TO CONTINUE)")
ax.plot(x_train, y_train, 'ks', alpha=0.5, label='(x, y)')
ax.plot(inputs, outputs[0].T, 'r', lw=2, alpha=0.5, label='prior draws')
inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train})
inference.initialize(n_print=10, n_iter=600)

tf.global_variables_initializer().run()

# Set up figure.
fig = plt.figure(figsize=(8, 8), facecolor='white')
ax = fig.add_subplot(111, frameon=False)
plt.ion()
plt.show(block=False)

# Build samples from inferred posterior.
n_samples = 50
inputs = np.linspace(-5, 3, num=400, dtype=np.float32).reshape((400, 1))
probs = tf.stack([tf.sigmoid(ed.dot(inputs, qw.sample()) + qb.sample())
                  for _ in range(n_samples)])

for t in range(inference.n_iter):
  info_dict = inference.update()
  inference.print_progress(info_dict)

  if t % inference.n_print == 0:
    outputs = probs.eval()

    # Plot data and functions
    plt.cla()
    ax.plot(X_train[:], y_train, 'bx')
    for s in range(n_samples):
      ax.plot(inputs[:], outputs[s], alpha=0.2)
Exemple #21
0
sns.jointplot(qb.params.eval()[nburn:T:stride],
              qw.params.eval()[nburn:T:stride])
plt.show()

# Posterior predictive checks.
y_post = ed.copy(y, {w: qw, b: qb})
# This is equivalent to
# y_post = Normal(mu=ed.dot(X, qw) + qb, sigma=tf.ones(N))

print("Mean squared error on test data:")
print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))

print("Displaying prior predictive samples.")
n_prior_samples = 10

w_prior = w.sample(n_prior_samples).eval()
b_prior = b.sample(n_prior_samples).eval()

plt.scatter(X_train, y_train)

inputs = np.linspace(-1, 10, num=400, dtype=np.float32)
for ns in range(n_prior_samples):
    output = inputs * w_prior[ns] + b_prior[ns]
    plt.plot(inputs, output)

plt.show()

print("Displaying posterior predictive samples.")
n_posterior_samples = 10

w_post = qw.sample(n_posterior_samples).eval()
Exemple #22
0
    def __init__(self, hdims, zdim, xdim, gen_scale=1.):
        x_ph = tf.placeholder(tf.float32, [None, xdim])
        batch_size = tf.shape(x_ph)[0]
        sample_size = tf.placeholder(tf.int32, [])

        # Define the generative network (p(x | z))
        with tf.variable_scope('generative', reuse=tf.AUTO_REUSE):
            z = Normal(loc=tf.zeros([batch_size, zdim]),
                       scale=tf.ones([batch_size, zdim]))

            hidden = tf.layers.dense(z, hdims[0], activation=tf.nn.relu, name="dense1")
            loc = tf.layers.dense(hidden, xdim, name="dense2")

            x_gen = TransformedDistribution(
                distribution=tfd.Normal(loc=loc, scale=gen_scale),
                bijector=tfd.bijectors.Exp(),
                name="LogNormalTransformedDistribution"
            )
            #x_gen = Bernoulli(logits=loc)

        # Define the inference network (q(z | x))
        with tf.variable_scope('inference', reuse=tf.AUTO_REUSE):
            hidden = tf.layers.dense(x_ph, hdims[0], activation=tf.nn.relu)
            qloc = tf.layers.dense(hidden, zdim)
            qscale = tf.layers.dense(hidden, zdim, activation=tf.nn.softplus)
            qz = Normal(loc=qloc, scale=qscale)
            qz_sample = qz.sample(sample_size)

        # Define the generative network using posterior samples from q(z | x)
        with tf.variable_scope('generative'):
            qz_sample = tf.reshape(qz_sample, [-1, zdim])
            hidden = tf.layers.dense(qz_sample, hdims[0], activation=tf.nn.relu, reuse=True, name="dense1")
            loc = tf.layers.dense(hidden, xdim, reuse=True, name="dense2")

            x_gen_post = tf.exp(loc)

        self.x_ph = x_ph
        self.x_data = self.x_ph
        self.batch_size = batch_size
        self.sample_size = sample_size

        self.ops = {
            'generative': x_gen,
            'inference': qz_sample,
            'generative_post': x_gen_post
        }

        self.kl_coef = tf.placeholder(tf.float32, ())
        with tf.variable_scope('inference', reuse=tf.AUTO_REUSE):
            self.inference = ed.KLqp({z: qz}, data={x_gen: self.x_data})
            self.lr = tf.placeholder(tf.float32, shape=())

            optimizer = tf.train.RMSPropOptimizer(self.lr, epsilon=0.9)

            self.inference.initialize(
                optimizer=optimizer,
                n_samples=10,
                kl_scaling={z: self.kl_coef}
            )

            # Build elbo loss to evaluate on validation data
            self.eval_loss, _ = self.inference.build_loss_and_gradients([])
    inference.print_progress(info_dict)

#################################Testing####################################
# Load the test images.
X_test = mnist.test.images
# TensorFlow method gives the label data in a one hot vetor format. We convert that into a single label.
Y_test = np.argmax(mnist.test.labels, axis=1)

# Generate samples the posterior and store them.
n_samples = 100
prob_lst = []
samples = []
w_samples = []
b_samples = []
for _ in range(n_samples):
    w_samp = qw.sample()
    b_samp = qb.sample()
    w_samples.append(w_samp)
    b_samples.append(b_samp)
    # Also compue the probabiliy of each class for each (w,b) sample.
    prob = tf.nn.softmax(tf.matmul(X_test, w_samp) + b_samp)
    prob_lst.append(prob.eval())
    sample = tf.concat([tf.reshape(w_samp, [-1]), b_samp], 0)
    samples.append(sample.eval())

# Compute the accuracy of the model.
# For each sample we compute the predicted class and compare with the test labels.
# Predicted class is defined as the one which as maximum proability.
# We perform this test for each (w,b) in the posterior giving us a set of accuracies
# Finally we make a histogram of accuracies for the test data.
accy_test = []
Exemple #24
0
        x: np.reshape(X_batch, (N, 32 * 32 * 3)),
        y_ph: np.reshape(Y_batch, (-1))
    })
    inference.print_progress(info_dict)

# Load the test images.
X_test = np.reshape(test_data[0], (-1, 32 * 32 * 3)).astype(np.float32)
# TensorFlow method gives the label data in a one hot vetor format. We convert that into a single label.
Y_test = np.reshape(test_data[1], (-1))

# Generate samples the posterior and store them.
n_samples = 10
prob_lst = []

for i in range(n_samples):
    w1_samp = qw1.sample()
    b1_samp = qb1.sample()
    w2_samp = qw2.sample()
    b2_samp = qb2.sample()
    w3_samp = qw3.sample()
    b3_samp = qb3.sample()

    # Also compue the probabiliy of each class for each (w,b) sample.
    l1_samp = tf.nn.leaky_relu(tf.matmul(X_test, w1_samp) + b1_samp)
    l2_samp = tf.nn.leaky_relu(tf.matmul(l1_samp, w2_samp) + b2_samp)
    l3_samp = tf.matmul(l2_samp, w3_samp) + b3_samp

    prob = tf.nn.softmax(l3_samp)
    prob_lst.append(prob.eval())
    print(i + 1, "steps completed.")
Exemple #25
0
inference.run(n_iter=1000)

# Sample functions from variational model to visualize fits.
# rs = np.random.RandomState(0)
# inputs = np.linspace(-5, 5, num=400, dtype=np.float32)
# x = tf.expand_dims(inputs, 1)
# mus = tf.stack([neural_network(x, qW_0.sample(), qW_1.sample(), qW_2.sample(), qb_0.sample(),
#                 qb_1.sample(), qb_2.sample())
#                 for _ in range(100)])

rs = np.random.RandomState(0)
inputs = np.linspace(-5, 5, num=400, dtype=np.float32)
x = tf.expand_dims(inputs, 1)
mus = tf.stack([
    neural_network(x, qW_0.sample(),
                   qW_1.sample(), qW_2.sample(), qb_0.sample(), qb_1.sample(),
                   qb_2.sample()) for _ in range(100)
])

outputs = mus.eval()

fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
ax.set_title("Iteration: 1000")
ax.plot(x_train, y_train, 'ks', alpha=0.5, label='(x, y)')
ax.plot(inputs, outputs[0].T, 'r', lw=2, alpha=0.5, label='posterior draws')
ax.plot(inputs, outputs[1:].T, 'r', lw=2, alpha=0.5)
ax.set_xlim([-5, 5])
ax.set_ylim([-2, 2])
ax.legend()
qpi_alpha = tf.nn.softplus(tf.Variable(tf.random_normal([K])))
qmu_mu = tf.Variable(tf.random_normal([K * D]))
qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([K * D])))
qsigma_alpha = tf.nn.softplus(tf.Variable(tf.random_normal([K * D])))
qsigma_beta = tf.nn.softplus(tf.Variable(tf.random_normal([K * D])))

qpi = Dirichlet(alpha=qpi_alpha)
qmu = Normal(mu=qmu_mu, sigma=qmu_sigma)
qsigma = InverseGamma(alpha=qsigma_alpha, beta=qsigma_beta)

data = {'x': x_train}
inference = ed.KLqp({'pi': qpi, 'mu': qmu, 'sigma': qsigma}, data, model)
inference.run(n_iter=2500, n_samples=10, n_minibatch=20)

# Average per-cluster and per-data point likelihood over many posterior samples.
log_liks = []
for s in range(100):
  zrep = {'pi': qpi.sample(()),
          'mu': qmu.sample(()),
          'sigma': qsigma.sample(())}
  log_liks += [model.predict(data, zrep)]

log_liks = tf.reduce_mean(log_liks, 0)

# Choose the cluster with the highest likelihood for each data point.
clusters = tf.argmax(log_liks, 0).eval()
plt.scatter(x_train[:, 0], x_train[:, 1], c=clusters, cmap=cm.bwr)
plt.axis([-3, 3, -3, 3])
plt.title("Predicted cluster assignments")
plt.show()
Exemple #27
0
sess = ed.get_session()
init = tf.initialize_all_variables()
init.run()

for _ in range(inference.n_iter):
    info_dict = inference.update()
    inference.print_progress(info_dict)
    t = info_dict['t']
    if t % inference.n_print == 0:
        print("Inferred cluster means:")
        print(sess.run(qmu.value()))

# Average per-cluster and per-data point likelihood over many posterior samples.
log_liks = []
for _ in range(100):
    mu_sample = qmu.sample()
    sigma_sample = qsigma.sample()
    # Take per-cluster and per-data point likelihood.
    log_lik = []
    for k in range(K):
        x_post = Normal(mu=tf.ones([N, 1]) * tf.gather(mu_sample, k),
                        sigma=tf.ones([N, 1]) * tf.gather(sigma_sample, k))
        log_lik.append(tf.reduce_sum(x_post.log_prob(x_train), 1))

    log_lik = tf.pack(log_lik)  # has shape (K, N)
    log_liks.append(log_lik)

log_liks = tf.reduce_mean(log_liks, 0)

# Choose the cluster with the highest likelihood for each data point.
clusters = tf.argmax(log_liks, 0).eval()
Exemple #28
0
def fwd_infer(x):
    h = tf.nn.relu(ed.dot(x, qW_0.sample()) + qb_0.sample())
    h = tf.nn.relu(ed.dot(h, qW_1.sample()) + qb_1.sample())
    h = tf.nn.sigmoid(ed.dot(h, qW_2.sample()) + qb_2.sample())
    return h


# Build samples from inferred posterior.

print('start learning')
for t in range(inference.n_iter):
    info_dict = inference.update()
    inference.print_progress(info_dict)

    if t % inference.n_print == 0:
        predict = tf.round(
            tf.sigmoid(ed.dot(train, qw.sample()) + qb.sample()))
        # predict = tf.round(fwd_infer(train))
        correct_prediction = tf.equal(predict, label[:, 1])
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        val_predict = tf.round(
            tf.sigmoid(ed.dot(test, qw.sample()) + qb.sample()))
        # val_predict = tf.round(fwd_infer(test))
        val_correct_prediction = tf.equal(val_predict, label[:, 1])
        val_accuracy = tf.reduce_mean(
            tf.cast(val_correct_prediction, tf.float32))
        print('\n  \n training_accuracy : {}'.format(accuracy.eval()))
        print(' validation_accuracy : {} \n \n '.format(val_accuracy.eval()))

        ax.plot(predict.eval())
class bayesian_dynamics_model:
    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = 20

        # Declare placholder.
        self.x = tf.placeholder(shape=[None, self.input_size], dtype=tf.float32)
        self.y_ph = tf.placeholder(shape=[None, self.output_size], dtype=tf.float32)

        # Declare weights.
        self.W_0 = Normal(loc=tf.zeros([self.input_size, self.hidden_size]), scale=tf.ones([self.input_size, self.hidden_size]))
        self.W_1 = Normal(loc=tf.zeros([self.hidden_size, self.hidden_size]), scale=tf.ones([self.hidden_size, self.hidden_size]))
        self.W_2 = Normal(loc=tf.zeros([self.hidden_size, self.output_size]), scale=tf.ones([self.hidden_size, self.output_size]))

        self.b_0 = Normal(loc=tf.zeros(self.hidden_size), scale=tf.ones(self.hidden_size))
        self.b_1 = Normal(loc=tf.zeros(self.hidden_size), scale=tf.ones(self.hidden_size))
        self.b_2 = Normal(loc=tf.zeros(self.output_size), scale=tf.ones(self.output_size))

        # Output of computational graph.
        nn_out = self.build(self.x, self.W_0, self.W_1, self.W_2, self.b_0, self.b_1, self.b_2)
        self.y = Normal(loc=nn_out, scale=tf.ones_like(nn_out) * .1)

        # Variables.
        self.qW_0 = Normal(loc=tf.get_variable('qW_0/loc', [self.input_size, self.hidden_size]),
                           scale=tf.nn.softplus(tf.get_variable('qW_0/scale', [self.input_size, self.hidden_size])))
        self.qW_1 = Normal(loc=tf.get_variable('qW_1/loc', [self.hidden_size, self.hidden_size]),
                           scale=tf.nn.softplus(tf.get_variable('qW_1/scale', [self.hidden_size, self.hidden_size])))
        self.qW_2 = Normal(loc=tf.get_variable('qW_2/loc', [self.hidden_size, self.output_size]),
                           scale=tf.nn.softplus(tf.get_variable('qW_2/scale', [self.hidden_size, self.output_size])))

        self.qb_0 = Normal(loc=tf.get_variable('qb_0/loc', [self.hidden_size]),
                           scale=tf.nn.softplus(tf.get_variable('qb_0/scale', [self.hidden_size])))
        self.qb_1 = Normal(loc=tf.get_variable('qb_1/loc', [self.hidden_size]),
                           scale=tf.nn.softplus(tf.get_variable('qb_1/scale', [self.hidden_size])))
        self.qb_2 = Normal(loc=tf.get_variable('qb_2/loc', [self.output_size]),
                           scale=tf.nn.softplus(tf.get_variable('qb_2/scale', [self.output_size])))

        # Sample of the posterior model.
        self.sample_model = [self.qW_0.sample(), self.qW_1.sample(), self.qW_2.sample(), self.qb_0.sample(),
                             self.qb_1.sample(), self.qb_2.sample()]

        # Sample functions from variational model to visualize fits.
        self.mus = self.build(self.x, self.qW_0.sample(), self.qW_1.sample(), self.qW_2.sample(), self.qb_0.sample(), self.qb_1.sample(), self.qb_2.sample())

    def initialize_inference(self, n_iter=1000*5, n_samples=5):
        self.inference = ed.KLqp({self.W_0: self.qW_0, self.b_0: self.qb_0,
                                  self.W_1: self.qW_1, self.b_1: self.qb_1,
                                  self.W_2: self.qW_2, self.b_2: self.qb_2}, data={self.y: self.y_ph})
        self.inference.initialize(n_iter=n_iter, n_samples=n_samples)

    def rbf(self, x):
        return tf.exp(-tf.square(x))

    def function(self, x):
        return np.sin(x)

    def build(self, x, W_0, W_1, W_2, b_0, b_1, b_2):
        '''Builds the computational graph.'''

        h_0 = self.rbf(tf.matmul(x, W_0) + b_0)
        h_1 = self.rbf(tf.matmul(h_0, W_1) + b_1)
        out = tf.matmul(h_1, W_2) + b_2
        return out

    def generate_toy_data(self, noise_sd=.1, size=50):
        x = np.random.uniform(-3., 3., size)
        y1 = np.cos(x) + np.random.normal(0, noise_sd, size=size)
        y2 = np.sin(x) + np.random.normal(0, noise_sd, size=size)

        y = np.stack([y1, y2], axis=-1)

        return x[..., np.newaxis], y

    def get_batch(self, noise_sd=.1, size=50):
        x = np.random.uniform(-3., 3., size)
        y = self.function(x) + np.random.normal(0, noise_sd, size=size)

        return x[..., np.newaxis], y[..., np.newaxis]

    def visualize(self, sess, xeval, animate=False):
        plt.cla()
        plt.scatter(xeval, self.function(xeval))
        for _ in range(10):
            yeval = sess.run(self.mus, feed_dict={self.x:xeval})
            plt.plot(xeval, yeval)
        plt.grid()
        if animate == False:
            plt.show()
        else:
            plt.pause(1. / 60.)
def _test(shape, loc, scale, size):
    x = Normal(shape, loc, scale)
    val_est = tuple(get_dims(x.sample(size=size)))
    val_true = (size, ) + shape
    assert val_est == val_true
init.run()

# Set up figure
fig = plt.figure(figsize=(8, 8), facecolor='white')
ax = fig.add_subplot(111, frameon=False)
plt.ion()
plt.show(block=False)

# draws from approximate posterior
S = 50
rs = np.random.RandomState(0)
inputs = np.linspace(-5, 3, num=400, dtype=np.float32)
x_in = tf.expand_dims(inputs, 1)
mus = []
for s in range(S):
    mus += [tf.sigmoid(ed.dot(x_in, qw.sample()) + qb.sample())]
mus = tf.stack(mus)

for t in range(inference.n_iter):
    info_dict = inference.update()
    inference.print_progress(info_dict)

    if t % inference.n_print == 0:
        outputs = mus.eval()

        # Plot data and functions
        plt.cla()
        ax.plot(x_train[:], y_train, 'bx')
        for s in range(S):
            ax.plot(inputs, outputs[s], alpha=0.2)
        ax.set_xlim([-5, 3])
Exemple #32
0
        list(np.reshape(sess.run(y_train), (-1, 10))), N)
    # TensorFlow method gives the label data in a one hot vetor format. We convert that into a single label.
    Y_batch = np.argmax(Y_batch, axis=1)
    info_dict = inference.update(feed_dict={x: X_batch, y_ph: Y_batch})
    inference.print_progress(info_dict)

# Load the test images.
X_test = tf.convert_to_tensor(x_test, dtype=tf.float32)
# TensorFlow method gives the label data in a one hot vetor format. We convert that into a single label.
Y_test = np.argmax(np.reshape(sess.run(y_test), (-1, 10)), axis=1)

# Generate samples the posterior and store them.
n_samples = 10
prob_lst = []
for i in range(n_samples):
    w1_samp = tf.convert_to_tensor(qw1.sample(), dtype=tf.float32)
    b1_samp = qb1.sample()
    w2_samp = tf.convert_to_tensor(qw2.sample(), dtype=tf.float32)
    b2_samp = qb2.sample()
    w3_samp = tf.convert_to_tensor(qw3.sample(), dtype=tf.float32)
    b3_samp = qb3.sample()
    # Also compue the probabiliy of each class for each (w,b) sample.
    prob = tf.nn.softmax(
        tf.matmul(
            tf.matmul(tf.matmul(X_test, w1_samp) + b1_samp, w2_samp) +
            b2_samp, w3_samp) + b3_samp)
    prob_lst.append(prob.eval())

    print(i + 1, "steps completed.")

# Compute the accuracy of the model.
Exemple #33
0
    qW_0 = Normal(loc=tf.get_variable("qW_0/loc", [D, 2]),
                  scale=tf.nn.softplus(tf.get_variable("qW_0/scale", [D, 2])))
    qW_1 = Normal(loc=tf.get_variable("qW_1/loc", [2, 1]),
                  scale=tf.nn.softplus(tf.get_variable("qW_1/scale", [2, 1])))
    qb_0 = Normal(loc=tf.get_variable("qb_0/loc", [2]),
                  scale=tf.nn.softplus(tf.get_variable("qb_0/scale", [2])))
    qb_1 = Normal(loc=tf.get_variable("qb_1/loc", [1]),
                  scale=tf.nn.softplus(tf.get_variable("qb_1/scale", [1])))

    # Sample functions from variational model to visualize fits.
    rs = np.random.RandomState(0)
    inputs = np.linspace(-5, 5, num=400, dtype=np.float32)
    x = tf.expand_dims(inputs, 1)
    mus = tf.stack(
        [neural_network(x, qW_0.sample(), qW_1.sample(),
                        qb_0.sample(), qb_1.sample())
         for _ in range(10)])



    # FIRST VISUALIZATION (prior)

    sess = ed.get_session()
    tf.global_variables_initializer().run()
    outputs = mus.eval()

    fig = plt.figure(figsize=(10, 6))
    ax = fig.add_subplot(111)
    ax.set_title("Iteration: 0")
    ax.plot(x_train, y_train, 'ks', alpha=0.5, label='(x, y)')
sns.jointplot(qb.params.eval()[nburn:T:stride],
              qw.params.eval()[nburn:T:stride])
plt.show()

# Posterior predictive checks.
y_post = ed.copy(y, {w: qw, b: qb})
# This is equivalent to
# y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(N))

print("Mean squared error on test data:")
print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))

print("Displaying prior predictive samples.")
n_prior_samples = 10

w_prior = w.sample(n_prior_samples).eval()
b_prior = b.sample(n_prior_samples).eval()

plt.scatter(X_train, y_train)

inputs = np.linspace(-1, 10, num=400)
for ns in range(n_prior_samples):
    output = inputs * w_prior[ns] + b_prior[ns]
    plt.plot(inputs, output)

plt.show()

print("Displaying posterior predictive samples.")
n_posterior_samples = 10

w_post = qw.sample(n_posterior_samples).eval()
Exemple #35
0
inference = ed.KLqp({w: qw, b: qb}, data={x: a, y_ph: data.T[1]})
inference.initialize()
inference.run(n_samples=2, n_iter=150)

y_post = Normal(loc=ed.dot(x, qw) + qb, scale=tf.ones(n_samples))
#y_post = ed.copy(y, {w: qw, b: qb})

#plot results
X, Y = data.T[0], data.T[1]
plt.plot(X, Y, "bo", label="Real data")
s1 = 0.0
s2 = 0.0
n_sample = 10
#print(qw.sample(n_samples)[:, 0].eval(), qb.sample(n_samples).eval())

W_, B_ = qw.sample(n_samples)[:, 0].eval(), qb.sample(n_samples).eval()

for x in qw.sample(n_samples)[:, 0].eval():
    s1 += x
for x in qb.sample(n_samples).eval():
    s2 += x

w_samples = s1 / n_samples
b_samples = s2 / n_samples

print("samples", w_samples, b_samples)
plt.plot(X, X * W_[0] + B_[0], 'r', label='Predicted data')
#plt.plot(X, X * w_samples + b_samples, 'r', label='Predicted data')
plt.legend()
plt.show()
qw = Normal(mu=tf.Variable(tf.random_normal([1], 0, 1)),
            sigma=tf.nn.softplus(tf.Variable(1*tf.random_normal([1]))))
qb = Normal(mu=tf.Variable(tf.random_normal([1], 0, 1)),
            sigma=tf.nn.softplus(tf.Variable(1*tf.random_normal([1]))))
# Set up data and the inference method to Kullback Leibler
z_train = mydf.get("PassengersNorm").reshape([N,1])
x_train = mydf.get("Tempdev").reshape([N,1])
sess = ed.get_session()
data = {x: x_train[:,0], z: z_train[:,0]}
#inference = ed.KLqp({x_mu: qx_mu, z_mu: qz_mu, w: qw, b: qb}, data)
inference = ed.KLqp({w: qw, b: qb}, data)

# Set up for samples from models
mus = []
for i in range(10):
    mus += [qw.sample()]

mus = tf.stack(mus)

# Inference: Quick way - No Priors possible
# inference.run()

# Inference: More controlled way of inference running
inference.initialize(n_print=10, n_iter=600)
init = tf.global_variables_initializer()
init.run()

# Prior samples
outputs = mus.eval()
priordf=pd.DataFrame(outputs)
priordf['Sample']=["Sample"+str(x) for x in list(range(10))]
Exemple #37
0
data = {X: X_train, y: y_train}
inference = ed.KLqp({w: qw, b: qb}, data)
inference.run()

# CRITICISM
y_post = ed.copy(y, {w: qw.mean(), b: qb.mean()})
# This is equivalent to
# y_post = Normal(mu=ed.dot(X, qw.mean()) + qb.mean(), sigma=tf.ones(N))

print("Mean squared error on test data:")
print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))

print("Displaying prior predictive samples.")
n_prior_samples = 10

w_prior = w.sample(n_prior_samples).eval()
b_prior = b.sample(n_prior_samples).eval()

plt.scatter(X_train, y_train)

inputs = np.linspace(-1, 10, num=400, dtype=np.float32)
for ns in range(n_prior_samples):
    output = inputs * w_prior[ns] + b_prior[ns]
    plt.plot(inputs, output)

plt.show()

print("Displaying posterior predictive samples.")
n_posterior_samples = 10

w_post = qw.sample(n_posterior_samples).eval()
def _test(shape, loc, scale, n):
    x = Normal(shape, loc, scale)
    val_est = tuple(get_dims(x.sample(n)))
    val_true = (n, ) + shape
    assert val_est == val_true
              sigma=tf.nn.softplus(tf.Variable(tf.random_normal([2]))))
qb_1 = Normal(mu=tf.Variable(tf.random_normal([1])),
              sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))

data = {y: y_train}
inference = ed.KLqp({W_0: qW_0, b_0: qb_0,
                     W_1: qW_1, b_1: qb_1}, data)


# Sample functions from variational model to visualize fits.
rs = np.random.RandomState(0)
inputs = np.linspace(-5, 5, num=400, dtype=np.float32)
x = tf.expand_dims(tf.constant(inputs), 1)
mus = []
for s in range(10):
  mus += [neural_network(x, qW_0.sample(), qW_1.sample(),
                         qb_0.sample(), qb_1.sample())]

mus = tf.pack(mus)

sess = ed.get_session()
init = tf.initialize_all_variables()
init.run()


# FIRST VISUALIZATION (prior)

outputs = mus.eval()

fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
Exemple #40
0
def getting_started_example():
    # Simulate a toy dataset of 50 observations with a cosine relationship.
    ed.set_seed(42)

    N = 50  # Number of data points.
    D = 1  # Number of features.

    x_train, y_train = build_toy_dataset(N)

    #--------------------
    # Define a two-layer Bayesian neural network.
    W_0 = Normal(loc=tf.zeros([D, 2]), scale=tf.ones([D, 2]))
    W_1 = Normal(loc=tf.zeros([2, 1]), scale=tf.ones([2, 1]))
    b_0 = Normal(loc=tf.zeros(2), scale=tf.ones(2))
    b_1 = Normal(loc=tf.zeros(1), scale=tf.ones(1))

    x = x_train
    y = Normal(loc=neural_network(x, W_0, W_1, b_0, b_1),
               scale=0.1 * tf.ones(N))

    #--------------------
    # Make inferences about the model from data.
    # We will use variational inference.
    # Specify a normal approximation over the weights and biases.
    # Defining tf.get_variable allows the variational factors' parameters to vary. They are initialized randomly.
    # The standard deviation parameters are constrained to be greater than zero according to a softplus transformation.
    qW_0 = Normal(loc=tf.get_variable('qW_0/loc', [D, 2]),
                  scale=tf.nn.softplus(tf.get_variable('qW_0/scale', [D, 2])))
    qW_1 = Normal(loc=tf.get_variable('qW_1/loc', [2, 1]),
                  scale=tf.nn.softplus(tf.get_variable('qW_1/scale', [2, 1])))
    qb_0 = Normal(loc=tf.get_variable('qb_0/loc', [2]),
                  scale=tf.nn.softplus(tf.get_variable('qb_0/scale', [2])))
    qb_1 = Normal(loc=tf.get_variable('qb_1/loc', [1]),
                  scale=tf.nn.softplus(tf.get_variable('qb_1/scale', [1])))

    # Sample functions from variational model to visualize fits.
    rs = np.random.RandomState(0)
    inputs = np.linspace(-5, 5, num=400, dtype=np.float32)
    x = tf.expand_dims(inputs, 1)
    mus = tf.stack([
        neural_network(x, qW_0.sample(), qW_1.sample(), qb_0.sample(),
                       qb_1.sample()) for _ in range(10)
    ])

    # First Visualization (prior).
    sess = ed.get_session()
    tf.global_variables_initializer().run()
    outputs = mus.eval()

    fig = plt.figure(figsize=(10, 6))
    ax = fig.add_subplot(111)
    ax.set_title('Iteration: 0')
    ax.plot(x_train, y_train, 'ks', alpha=0.5, label='(x, y)')
    ax.plot(inputs, outputs[0].T, 'r', lw=2, alpha=0.5, label='prior draws')
    ax.plot(inputs, outputs[1:].T, 'r', lw=2, alpha=0.5)
    ax.set_xlim([-5, 5])
    ax.set_ylim([-2, 2])
    ax.legend()
    plt.show()

    #--------------------
    # Run variational inference with the Kullback-Leibler divergence in order to infer the model's latent variables with the given data.
    # We specify 1000 iterations.
    inference = ed.KLqp({
        W_0: qW_0,
        b_0: qb_0,
        W_1: qW_1,
        b_1: qb_1
    },
                        data={y: y_train})
    inference.run(n_iter=1000, n_samples=5)

    #--------------------
    # Criticize the model fit.
    # Bayesian neural networks define a distribution over neural networks, so we can perform a graphical check.
    # Draw neural networks from the inferred model and visualize how well it fits the data.

    # SECOND VISUALIZATION (posterior)
    outputs = mus.eval()

    fig = plt.figure(figsize=(10, 6))
    ax = fig.add_subplot(111)
    ax.set_title('Iteration: 1000')
    ax.plot(x_train, y_train, 'ks', alpha=0.5, label='(x, y)')
    ax.plot(inputs,
            outputs[0].T,
            'r',
            lw=2,
            alpha=0.5,
            label='posterior draws')
    ax.plot(inputs, outputs[1:].T, 'r', lw=2, alpha=0.5)
    ax.set_xlim([-5, 5])
    ax.set_ylim([-2, 2])
    ax.legend()
    plt.show()
sess = ed.get_session()
init = tf.global_variables_initializer()
init.run()

for _ in range(inference.n_iter):
    info_dict = inference.update()
    inference.print_progress(info_dict)
    t = info_dict['t']
    if t % inference.n_print == 0:
        print("Inferred cluster means:")
        print(sess.run(qmu.mean()))

# Calculate likelihood for each data point and cluster assignment,
# averaged over many posterior samples. ``x_post`` has shape (N, 100, K, D).
mu_sample = qmu.sample(100)
sigma_sample = qsigma.sample(100)
x_post = Normal(mu=tf.ones([N, 1, 1, 1]) * mu_sample,
                sigma=tf.ones([N, 1, 1, 1]) * sigma_sample)
x_broadcasted = tf.tile(tf.reshape(x_train, [N, 1, 1, D]), [1, 100, K, 1])

# Sum over latent dimension, then average over posterior samples.
# ``log_liks`` ends up with shape (N, K).
log_liks = x_post.log_prob(x_broadcasted)
log_liks = tf.reduce_sum(log_liks, 3)
log_liks = tf.reduce_mean(log_liks, 1)

# Choose the cluster with the highest likelihood for each data point.
clusters = tf.argmax(log_liks, 1).eval()
plt.scatter(x_train[:, 0], x_train[:, 1], c=clusters, cmap=cm.bwr)
plt.axis([-3, 3, -3, 3])
Exemple #42
0
def _test(mu, sigma, n):
  x = Normal(mu=mu, sigma=sigma)
  val_est = get_dims(x.sample(n))
  val_true = n + get_dims(mu)
  assert val_est == val_true
inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train})
inference.run()

# CRITICISM
y_post = ed.copy(y, {w: qw, b: qb})
# This is equivalent to
# y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(N))

print("Mean squared error on test data:")
print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))

print("Displaying prior predictive samples.")
n_prior_samples = 10

w_prior = w.sample(n_prior_samples).eval()
b_prior = b.sample(n_prior_samples).eval()

plt.scatter(X_train, y_train)

inputs = np.linspace(-1, 10, num=400)
for ns in range(n_prior_samples):
    output = inputs * w_prior[ns] + b_prior[ns]
    plt.plot(inputs, output)

plt.show()

print("Displaying posterior predictive samples.")
n_posterior_samples = 10

w_post = qw.sample(n_posterior_samples).eval()
Exemple #44
0
                      tf.Variable(tf.random_normal([10]), name="scale")))
  with tf.name_scope("qb_1"):
    qb_1 = Normal(loc=tf.Variable(tf.random_normal([10]), name="loc"),
                  scale=tf.nn.softplus(
                      tf.Variable(tf.random_normal([10]), name="scale")))
  with tf.name_scope("qb_2"):
    qb_2 = Normal(loc=tf.Variable(tf.random_normal([10]), name="loc"),
                  scale=tf.nn.softplus(
                      tf.Variable(tf.random_normal([10]), name="scale")))

# hack to get score/performance
x_test = np.vstack([x_test]*6)
y_test = np.hstack([y_test]*6)

y_sample = tf.stack(
  [Categorical(logits=neural_network(x, qW_0.sample(),\
                                     qW_1.sample(), qW_2.sample(),\
                                     qb_0.sample(), qb_1.sample(),\
                                     qb_2.sample()))
               for _ in range(10)])

# build graph
inference = ed.KLqp({W_0: qW_0, b_0: qb_0,
                     W_1: qW_1, b_1: qb_1,
                     W_2: qW_2, b_2: qb_2}, data={x: x_train, y: y_train})

# fit model
start = time.time()
inference.run(n_iter=1000, n_samples=1, logdir='log')
end = time.time()
print('training wall clock time: %5.2f min' % ((end-start)/60.))
Exemple #45
0
qb_0 = Normal(mu=tf.Variable(tf.random_normal([2])),
              sigma=tf.nn.softplus(tf.Variable(tf.random_normal([2]))))
qb_1 = Normal(mu=tf.Variable(tf.random_normal([1])),
              sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))

data = {y: y_train}
inference = ed.MFVI({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1}, data)

# Sample functions from variational model to visualize fits.
rs = np.random.RandomState(0)
inputs = np.linspace(-5, 5, num=400, dtype=np.float32)
x = tf.expand_dims(tf.constant(inputs), 1)
mus = []
for s in range(10):
    mus += [
        neural_network(x, qW_0.sample(), qW_1.sample(), qb_0.sample(),
                       qb_1.sample())
    ]

mus = tf.pack(mus)

sess = ed.get_session()
init = tf.initialize_all_variables()
init.run()

# FIRST VISUALIZATION (prior)

outputs = mus.eval()

fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
def main(_):
  ed.set_seed(42)

  # DATA
  X_train, y_train = build_toy_dataset(FLAGS.N)
  X_test, y_test = build_toy_dataset(FLAGS.N)

  # MODEL
  X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D])
  w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D))
  b = Normal(loc=tf.zeros(1), scale=tf.ones(1))
  y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(FLAGS.N))

  # INFERENCE
  qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D]))
  qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T, 1]))

  inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train})
  inference.run(step_size=1e-3)

  # CRITICISM

  # Plot posterior samples.
  sns.jointplot(qb.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride],
                qw.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride])
  plt.show()

  # Posterior predictive checks.
  y_post = ed.copy(y, {w: qw, b: qb})
  # This is equivalent to
  # y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(FLAGS.N))

  print("Mean squared error on test data:")
  print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))

  print("Displaying prior predictive samples.")
  n_prior_samples = 10

  w_prior = w.sample(n_prior_samples).eval()
  b_prior = b.sample(n_prior_samples).eval()

  plt.scatter(X_train, y_train)

  inputs = np.linspace(-1, 10, num=400)
  for ns in range(n_prior_samples):
      output = inputs * w_prior[ns] + b_prior[ns]
      plt.plot(inputs, output)

  plt.show()

  print("Displaying posterior predictive samples.")
  n_posterior_samples = 10

  w_post = qw.sample(n_posterior_samples).eval()
  b_post = qb.sample(n_posterior_samples).eval()

  plt.scatter(X_train, y_train)

  inputs = np.linspace(-1, 10, num=400)
  for ns in range(n_posterior_samples):
      output = inputs * w_post[ns] + b_post[ns]
      plt.plot(inputs, output)

  plt.show()