Exemple #1
0
  def initialize(self, *args, **kwargs):
    # Store latent variables in a temporary attribute; MAP will
    # optimize `PointMass` random variables, which subsequently
    # optimizes mean parameters of the normal approximations.
    latent_vars_normal = self.latent_vars.copy()
    self.latent_vars = {z: PointMass(params=qz.loc)
                        for z, qz in six.iteritems(latent_vars_normal)}

    super(Laplace, self).initialize(*args, **kwargs)

    hessians = tf.hessians(self.loss, list(six.itervalues(self.latent_vars)))
    self.finalize_ops = []
    for z, hessian in zip(six.iterkeys(self.latent_vars), hessians):
      qz = latent_vars_normal[z]
      if isinstance(qz, (MultivariateNormalDiag, Normal)):
        scale_var = get_variables(qz.variance())[0]
        scale = 1.0 / tf.diag_part(hessian)
      else:  # qz is MultivariateNormalTriL
        scale_var = get_variables(qz.covariance())[0]
        scale = tf.matrix_inverse(tf.cholesky(hessian))

      self.finalize_ops.append(scale_var.assign(scale))

    self.latent_vars = latent_vars_normal.copy()
    del latent_vars_normal
Exemple #2
0
 def build_approximation_update(self):
     """ Calculating the weighted mean and variance of the approximating Gaussians from samples provided
 by noisy-Adam. Each sample is only seen once and the parameters are updated incrementally for
 better memory effficiency. See Welford (1962), Note on a method for calculating
 corrected sums of squares and products or https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
 for information about the incremental algorithm.
 """
     update_ops = []
     trainables = tf.trainable_variables()
     wSum = self.wSum.assign_add(self.learning_rate)
     for z, qz in six.iteritems(self.latent_vars):
         sample = self.empirical_vals[z]
         mm_var = get_variables(self.approximations[z].mu)[0]
         mv_var = get_variables(self.approximations[z].sigma)[0]
         d_op = self.deltas[z].assign(mm_var)
         with tf.control_dependencies([d_op]):
             mm_op = mm_var.assign_add(
                 (self.learning_rate / wSum) * (sample - d_op))
             with tf.control_dependencies([mm_op]):
                 mv_op = mv_var.assign(
                     tf.sqrt(
                         tf.divide((tf.square(mv_var) * wSum) +
                                   (self.learning_rate * (sample - mm_op) *
                                    (sample - d_op)), wSum)))
                 update_ops.append(mv_op)
     with tf.control_dependencies([tf.group(*update_ops)]):
         increment_iters = self.update_iters.assign_add(
             tf.constant(1, tf.int32))
     return tf.group(increment_iters)
Exemple #3
0
  def initialize(self, *args, **kwargs):
    # Store latent variables in a temporary attribute; MAP will
    # optimize ``PointMass`` random variables, which subsequently
    # optimizes mean parameters of the normal approximations.
    latent_vars_normal = self.latent_vars.copy()
    self.latent_vars = {z: PointMass(params=qz.loc)
                        for z, qz in six.iteritems(latent_vars_normal)}

    super(Laplace, self).initialize(*args, **kwargs)

    hessians = tf.hessians(self.loss, list(six.itervalues(self.latent_vars)))
    self.finalize_ops = []
    for z, hessian in zip(six.iterkeys(self.latent_vars), hessians):
      qz = latent_vars_normal[z]
      if isinstance(qz, (MultivariateNormalDiag, Normal)):
        scale_var = get_variables(qz.variance())[0]
        scale = 1.0 / tf.diag_part(hessian)
      else:  # qz is MultivariateNormalTriL
        scale_var = get_variables(qz.covariance())[0]
        scale = tf.matrix_inverse(tf.cholesky(hessian))

      self.finalize_ops.append(scale_var.assign(scale))

    self.latent_vars = latent_vars_normal.copy()
    del latent_vars_normal
 def test_chain_structure(self):
   with self.test_session():
     a = tf.Variable(0.0)
     b = tf.Variable(a)
     c = Normal(mu=b, sigma=1.0)
     self.assertEqual(get_variables(a), [])
     self.assertEqual(get_variables(b), [])
     self.assertEqual(get_variables(c), [b])
Exemple #5
0
 def test_chain_structure(self):
     with self.test_session():
         a = tf.Variable(0.0)
         b = tf.Variable(a)
         c = Normal(mu=b, sigma=1.0)
         self.assertEqual(get_variables(a), [])
         self.assertEqual(get_variables(b), [])
         self.assertEqual(get_variables(c), [b])
 def test_control_flow(self):
   with self.test_session():
     a = Bernoulli(p=0.5)
     b = tf.Variable(0.0)
     c = tf.constant(0.0)
     d = tf.cond(tf.cast(a, tf.bool), lambda: b, lambda: c)
     e = Normal(mu=d, sigma=1.0)
     self.assertEqual(get_variables(d), [b])
     self.assertEqual(get_variables(e), [b])
Exemple #7
0
 def test_control_flow(self):
     with self.test_session():
         a = Bernoulli(p=0.5)
         b = tf.Variable(0.0)
         c = tf.constant(0.0)
         d = tf.cond(tf.cast(a, tf.bool), lambda: b, lambda: c)
         e = Normal(mu=d, sigma=1.0)
         self.assertEqual(get_variables(d), [b])
         self.assertEqual(get_variables(e), [b])
Exemple #8
0
 def test_chain_structure(self):
   """a -> b -> c -> d -> e"""
   with self.test_session():
     a = tf.Variable(0.0)
     b = tf.Variable(a)
     c = Normal(b, 1.0)
     self.assertEqual(get_variables(a), [])
     self.assertEqual(get_variables(b), [])
     self.assertEqual(get_variables(c), [b])
 def test_chain_structure(self):
     """a -> b -> c -> d -> e"""
     with self.test_session():
         a = tf.Variable(0.0)
         b = tf.Variable(a)
         c = Normal(b, 1.0)
         self.assertEqual(get_variables(a), [])
         self.assertEqual(get_variables(b), [])
         self.assertEqual(get_variables(c), [b])
Exemple #10
0
 def test_tensor(self):
   with self.test_session():
     a = tf.Variable(0.0)
     b = tf.constant(2.0)
     c = a + b
     d = tf.Variable(a)
     self.assertEqual(get_variables(a), [])
     self.assertEqual(get_variables(b), [])
     self.assertEqual(get_variables(c), [a])
     self.assertEqual(get_variables(d), [])
Exemple #11
0
 def test_tensor(self):
     with self.test_session():
         a = tf.Variable(0.0)
         b = tf.constant(2.0)
         c = a + b
         d = tf.Variable(a)
         self.assertEqual(get_variables(a), [])
         self.assertEqual(get_variables(b), [])
         self.assertEqual(get_variables(c), [a])
         self.assertEqual(get_variables(d), [])
Exemple #12
0
 def test_a_structure(self):
     with self.test_session():
         a = tf.Variable(0.0)
         b = Normal(mu=a, sigma=1.0)
         c = Normal(mu=b, sigma=1.0)
         d = Normal(mu=a, sigma=1.0)
         e = Normal(mu=d, sigma=1.0)
         self.assertEqual(get_variables(a), [])
         self.assertEqual(get_variables(b), [a])
         self.assertEqual(get_variables(c), [a])
         self.assertEqual(get_variables(d), [a])
         self.assertEqual(get_variables(e), [a])
Exemple #13
0
 def test_v_structure(self):
     with self.test_session():
         a = tf.Variable(0.0)
         b = Normal(mu=a, sigma=1.0)
         c = tf.Variable(0.0)
         d = Normal(mu=c, sigma=1.0)
         e = Normal(mu=tf.multiply(b, d), sigma=1.0)
         self.assertEqual(get_variables(a), [])
         self.assertEqual(get_variables(b), [a])
         self.assertEqual(get_variables(c), [])
         self.assertEqual(get_variables(d), [c])
         self.assertEqual(set(get_variables(e)), set([a, c]))
Exemple #14
0
 def test_a_structure(self):
   with self.test_session():
     a = tf.Variable(0.0)
     b = Normal(mu=a, sigma=1.0)
     c = Normal(mu=b, sigma=1.0)
     d = Normal(mu=a, sigma=1.0)
     e = Normal(mu=d, sigma=1.0)
     self.assertEqual(get_variables(a), [])
     self.assertEqual(get_variables(b), [a])
     self.assertEqual(get_variables(c), [a])
     self.assertEqual(get_variables(d), [a])
     self.assertEqual(get_variables(e), [a])
Exemple #15
0
 def test_v_structure(self):
   with self.test_session():
     a = tf.Variable(0.0)
     b = Normal(mu=a, sigma=1.0)
     c = tf.Variable(0.0)
     d = Normal(mu=c, sigma=1.0)
     e = Normal(mu=tf.mul(b, d), sigma=1.0)
     self.assertEqual(get_variables(a), [])
     self.assertEqual(get_variables(b), [a])
     self.assertEqual(get_variables(c), [])
     self.assertEqual(get_variables(d), [c])
     self.assertEqual(set(get_variables(e)), set([a, c]))
 def test_a_structure(self):
     """e <- d <- a -> b -> c"""
     with self.test_session():
         a = tf.Variable(0.0)
         b = Normal(a, 1.0)
         c = Normal(b, 1.0)
         d = Normal(a, 1.0)
         e = Normal(d, 1.0)
         self.assertEqual(get_variables(a), [])
         self.assertEqual(get_variables(b), [a])
         self.assertEqual(get_variables(c), [a])
         self.assertEqual(get_variables(d), [a])
         self.assertEqual(get_variables(e), [a])
Exemple #17
0
 def test_a_structure(self):
   """e <- d <- a -> b -> c"""
   with self.test_session():
     a = tf.Variable(0.0)
     b = Normal(a, 1.0)
     c = Normal(b, 1.0)
     d = Normal(a, 1.0)
     e = Normal(d, 1.0)
     self.assertEqual(get_variables(a), [])
     self.assertEqual(get_variables(b), [a])
     self.assertEqual(get_variables(c), [a])
     self.assertEqual(get_variables(d), [a])
     self.assertEqual(get_variables(e), [a])
Exemple #18
0
 def test_v_structure(self):
   """a -> b -> e <- d <- c"""
   with self.test_session():
     a = tf.Variable(0.0)
     b = Normal(a, 1.0)
     c = tf.Variable(0.0)
     d = Normal(c, 1.0)
     e = Normal(b * d, 1.0)
     self.assertEqual(get_variables(a), [])
     self.assertEqual(get_variables(b), [a])
     self.assertEqual(get_variables(c), [])
     self.assertEqual(get_variables(d), [c])
     self.assertEqual(set(get_variables(e)), set([a, c]))
 def test_v_structure(self):
     """a -> b -> e <- d <- c"""
     with self.test_session():
         a = tf.Variable(0.0)
         b = Normal(a, 1.0)
         c = tf.Variable(0.0)
         d = Normal(c, 1.0)
         e = Normal(b * d, 1.0)
         self.assertEqual(get_variables(a), [])
         self.assertEqual(get_variables(b), [a])
         self.assertEqual(get_variables(c), [])
         self.assertEqual(get_variables(d), [c])
         self.assertEqual(set(get_variables(e)), set([a, c]))
Exemple #20
0
    def test_scan_with_a_structure(self):
        """copied from test_a_structure"""
        def cumsum(x):
            return tf.scan(lambda a, x: a + x, x)

        with self.test_session():
            a = tf.Variable([1.0, 1.0, 1.0])
            b = Normal(mu=cumsum(a), sigma=tf.ones([3]))
            c = Normal(mu=cumsum(b), sigma=tf.ones([3]))
            d = Normal(mu=cumsum(a), sigma=tf.ones([3]))
            e = Normal(mu=cumsum(d), sigma=tf.ones([3]))
            self.assertEqual(get_variables(a), [])
            self.assertEqual(get_variables(b), [a])
            self.assertEqual(get_variables(c), [a])
            self.assertEqual(get_variables(d), [a])
            self.assertEqual(get_variables(e), [a])
Exemple #21
0
  def test_scan_with_a_structure(self):
    """copied from test_a_structure"""
    def cumsum(x):
      return tf.scan(lambda a, x: a + x, x)

    with self.test_session():
      a = tf.Variable([1.0, 1.0, 1.0])
      b = Normal(cumsum(a), tf.ones([3]))
      c = Normal(cumsum(b), tf.ones([3]))
      d = Normal(cumsum(a), tf.ones([3]))
      e = Normal(cumsum(d), tf.ones([3]))
      self.assertEqual(get_variables(a), [])
      self.assertEqual(get_variables(b), [a])
      self.assertEqual(get_variables(c), [a])
      self.assertEqual(get_variables(d), [a])
      self.assertEqual(get_variables(e), [a])
Exemple #22
0
    def _set_log_variables(self, log_vars=None):
        """Log variables to TensorBoard.

    For each variable in ``log_vars``, forms a ``tf.summary.scalar``if
    the variable has scalar shape; otherwise forms a``tf.summary.histogram``.

    Parameters
    ----------
    log_vars : list, optional
      Specifies the list of variables to log after each ``n_print``
      steps. If None, will log all variables. If ``[]``, no variables
      will be logged.
    """
        summary_key = 'summaries_' + str(id(self))
        if log_vars is None:
            log_vars = []
            for key in six.iterkeys(self.data):
                log_vars += get_variables(key)

            for key, value in six.iteritems(self.latent_vars):
                log_vars += get_variables(key)
                log_vars += get_variables(value)

            log_vars = set(log_vars)

        for var in log_vars:
            # replace colons which are an invalid character
            var_name = var.name.replace(':', '/')
            # Log all scalars.
            if len(var.shape) == 0:
                tf.summary.scalar("parameter/{}".format(var_name),
                                  var,
                                  collections=[summary_key])
            elif len(var.shape) == 1 and var.shape[0] == 1:
                tf.summary.scalar("parameter/{}".format(var_name),
                                  var[0],
                                  collections=[summary_key])
            else:
                # If var is multi-dimensional, log a histogram of its values.
                tf.summary.histogram("parameter/{}".format(var_name),
                                     var,
                                     collections=[summary_key])
Exemple #23
0
  def _set_log_variables(self, log_vars=None):
    """Log variables to TensorBoard.

    For each variable in `log_vars`, forms a `tf.summary.scalar` if
    the variable has scalar shape; otherwise forms a `tf.summary.histogram`.

    Args:
      log_vars: list, optional.
        Specifies the list of variables to log after each `n_print`
        steps. If None, will log all variables. If `[]`, no variables
        will be logged.
    """
    summary_key = 'summaries_' + str(id(self))
    if log_vars is None:
      log_vars = []
      for key in six.iterkeys(self.data):
        log_vars += get_variables(key)

      for key, value in six.iteritems(self.latent_vars):
        log_vars += get_variables(key)
        log_vars += get_variables(value)

      log_vars = set(log_vars)

    for var in log_vars:
      # replace colons which are an invalid character
      var_name = var.name.replace(':', '/')
      # Log all scalars.
      if len(var.shape) == 0:
        tf.summary.scalar("parameter/{}".format(var_name),
                          var, collections=[summary_key])
      elif len(var.shape) == 1 and var.shape[0] == 1:
        tf.summary.scalar("parameter/{}".format(var_name),
                          var[0], collections=[summary_key])
      else:
        # If var is multi-dimensional, log a histogram of its values.
        tf.summary.histogram("parameter/{}".format(var_name),
                             var, collections=[summary_key])
Exemple #24
0
    def finalize(self, feed_dict=None):
        """Function to call after convergence.

    Computes the Hessian at the mode.

    Parameters
    ----------
    feed_dict : dict, optional
      Feed dictionary for a TensorFlow session run during evaluation
      of Hessian. It is used to feed placeholders that are not fed
      during initialization.
    """
        if feed_dict is None:
            feed_dict = {}

        for key, value in six.iteritems(self.data):
            if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
                feed_dict[key] = value

        var_list = list(six.itervalues(self.latent_vars))
        hessians = tf.hessians(self.loss, var_list)

        assign_ops = []
        for z, hessian in zip(six.iterkeys(self.latent_vars), hessians):
            qz = self.latent_vars_normal[z]
            sigma_var = get_variables(qz.sigma)[0]
            if isinstance(qz, MultivariateNormalCholesky):
                sigma = tf.matrix_inverse(tf.cholesky(hessian))
            elif isinstance(qz, MultivariateNormalDiag):
                sigma = 1.0 / tf.diag_part(hessian)
            else:  # qz is MultivariateNormalFull
                sigma = tf.matrix_inverse(hessian)

            assign_ops.append(sigma_var.assign(sigma))

        sess = get_session()
        sess.run(assign_ops, feed_dict)
        self.latent_vars = self.latent_vars_normal.copy()
        del self.latent_vars_normal
        super(Laplace, self).finalize()
    def initialize(self,
                   optimizer=None,
                   var_list=None,
                   use_prettytensor=False,
                   *args,
                   **kwargs):
        """Initialize variational inference.

    Parameters
    ----------
    optimizer : str or tf.train.Optimizer, optional
      A TensorFlow optimizer, to use for optimizing the variational
      objective. Alternatively, one can pass in the name of a
      TensorFlow optimizer, and default parameters for the optimizer
      will be used.
    var_list : list of tf.Variable, optional
      List of TensorFlow variables to optimize over. Default is all
      trainable variables that ``latent_vars`` and ``data`` depend on,
      excluding those that are only used in conditionals in ``data``.
    use_prettytensor : bool, optional
      ``True`` if aim to use PrettyTensor optimizer (when using
      PrettyTensor) or ``False`` if aim to use TensorFlow optimizer.
      Defaults to TensorFlow.
    """
        super(VariationalInference, self).initialize(*args, **kwargs)

        if var_list is None:
            if self.model_wrapper is None:
                # Traverse random variable graphs to get default list of variables.
                var_list = set([])
                trainables = tf.trainable_variables()
                for z, qz in six.iteritems(self.latent_vars):
                    if isinstance(z, RandomVariable):
                        var_list.update(get_variables(z,
                                                      collection=trainables))

                    var_list.update(get_variables(qz, collection=trainables))

                for x, qx in six.iteritems(self.data):
                    if isinstance(x, RandomVariable) and \
                            not isinstance(qx, RandomVariable):
                        var_list.update(get_variables(x,
                                                      collection=trainables))

                var_list = list(var_list)
            else:
                # Variables may not be instantiated for model wrappers until
                # their methods are first called. For now, hard-code
                # ``var_list`` inside build_losses.
                var_list = None

        self.loss, grads_and_vars = self.build_loss_and_gradients(var_list)

        if optimizer is None:
            # Use ADAM with a decaying scale factor.
            global_step = tf.Variable(0, trainable=False)
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       global_step,
                                                       100,
                                                       0.9,
                                                       staircase=True)
            optimizer = tf.train.AdamOptimizer(learning_rate)
        elif isinstance(optimizer, str):
            if optimizer == 'gradientdescent':
                optimizer = tf.train.GradientDescentOptimizer(0.01)
            elif optimizer == 'adadelta':
                optimizer = tf.train.AdadeltaOptimizer()
            elif optimizer == 'adagrad':
                optimizer = tf.train.AdagradOptimizer(0.01)
            elif optimizer == 'momentum':
                optimizer = tf.train.MomentumOptimizer(0.01, 0.9)
            elif optimizer == 'adam':
                optimizer = tf.train.AdamOptimizer()
            elif optimizer == 'ftrl':
                optimizer = tf.train.FtrlOptimizer(0.01)
            elif optimizer == 'rmsprop':
                optimizer = tf.train.RMSPropOptimizer(0.01)
            else:
                raise ValueError('Optimizer class not found:', optimizer)

            global_step = None
        elif isinstance(optimizer, tf.train.Optimizer):
            # Custom optimizers have no control over global_step.
            global_step = None
        else:
            raise TypeError()

        if not use_prettytensor:
            self.train = optimizer.apply_gradients(grads_and_vars,
                                                   global_step=global_step)
        else:
            # Note PrettyTensor optimizer does not accept manual updates;
            # it autodiffs the loss directly.
            self.train = pt.apply_optimizer(optimizer,
                                            losses=[self.loss],
                                            global_step=global_step,
                                            var_list=var_list)
Exemple #26
0
    def test_scan(self):
        with self.test_session():
            b = tf.Variable(0.0)
            op = tf.scan(lambda a, x: a + b + x, tf.constant([2.0, 3.0, 1.0]))

            self.assertEqual(get_variables(op), [b])
Exemple #27
0
  def test_scan(self):
    with self.test_session():
      b = tf.Variable(0.0)
      op = tf.scan(lambda a, x: a + b + x, tf.constant([2.0, 3.0, 1.0]))

      self.assertEqual(get_variables(op), [b])
Exemple #28
0
ph = tf.placeholder(tf.float32, y_train.shape)
var = tf.Variable(ph, trainable=False, collections=[])
sess.run(var.initializer, {ph: y_train})

# n_samples is the number of samples in building loss function
n_samples = args.samp
t = tf.Variable(0, trainable=False)
increment_t = t.assign_add(1)

# find the list of variables
var_list = set([])
trainables = tf.trainable_variables()

for z, qz in six.iteritems(latent_vars):
    if isinstance(z, RandomVariable):
        var_list.update(get_variables(z, collection=trainables))

    var_list.update(get_variables(qz, collection=trainables))

for x, qx in six.iteritems(data):
    if isinstance(x, RandomVariable) and not isinstance(qx, RandomVariable):
        var_list.update(get_variables(x, collection=trainables))

var_list = list(var_list)

# build a loss function
p_log_lik = [0.0] * n_samples

z_sample = {}
for z, qz in six.iteritems(latent_vars):
    # Copy q(z) to obtain new set of posterior samples.
  def initialize(self, optimizer=None, var_list=None, use_prettytensor=False,
                 *args, **kwargs):
    """Initialize variational inference.

    Parameters
    ----------
    optimizer : str or tf.train.Optimizer, optional
      A TensorFlow optimizer, to use for optimizing the variational
      objective. Alternatively, one can pass in the name of a
      TensorFlow optimizer, and default parameters for the optimizer
      will be used.
    var_list : list of tf.Variable, optional
      List of TensorFlow variables to optimize over. Default is all
      trainable variables that ``latent_vars`` and ``data`` depend on,
      excluding those that are only used in conditionals in ``data``.
    use_prettytensor : bool, optional
      ``True`` if aim to use PrettyTensor optimizer (when using
      PrettyTensor) or ``False`` if aim to use TensorFlow optimizer.
      Defaults to TensorFlow.
    """
    super(VariationalInference, self).initialize(*args, **kwargs)

    if var_list is None:
      if self.model_wrapper is None:
        # Traverse random variable graphs to get default list of variables.
        var_list = set([])
        trainables = tf.trainable_variables()
        for z, qz in six.iteritems(self.latent_vars):
          if isinstance(z, RandomVariable):
            var_list.update(get_variables(z, collection=trainables))

          var_list.update(get_variables(qz, collection=trainables))

        for x, qx in six.iteritems(self.data):
          if isinstance(x, RandomVariable) and \
                  not isinstance(qx, RandomVariable):
            var_list.update(get_variables(x, collection=trainables))

        var_list = list(var_list)
      else:
        # Variables may not be instantiated for model wrappers until
        # their methods are first called. For now, hard-code
        # ``var_list`` inside build_losses.
        var_list = None

    self.loss, grads_and_vars = self.build_loss_and_gradients(var_list)

    if optimizer is None:
      # Use ADAM with a decaying scale factor.
      global_step = tf.Variable(0, trainable=False)
      starter_learning_rate = 0.1
      learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                 global_step,
                                                 100, 0.9, staircase=True)
      optimizer = tf.train.AdamOptimizer(learning_rate)
    elif isinstance(optimizer, str):
      if optimizer == 'gradientdescent':
        optimizer = tf.train.GradientDescentOptimizer(0.01)
      elif optimizer == 'adadelta':
        optimizer = tf.train.AdadeltaOptimizer()
      elif optimizer == 'adagrad':
        optimizer = tf.train.AdagradOptimizer(0.01)
      elif optimizer == 'momentum':
        optimizer = tf.train.MomentumOptimizer(0.01, 0.9)
      elif optimizer == 'adam':
        optimizer = tf.train.AdamOptimizer()
      elif optimizer == 'ftrl':
        optimizer = tf.train.FtrlOptimizer(0.01)
      elif optimizer == 'rmsprop':
        optimizer = tf.train.RMSPropOptimizer(0.01)
      else:
        raise ValueError('Optimizer class not found:', optimizer)

      global_step = None
    elif isinstance(optimizer, tf.train.Optimizer):
      # Custom optimizers have no control over global_step.
      global_step = None
    else:
      raise TypeError()

    if not use_prettytensor:
      self.train = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)
    else:
      # Note PrettyTensor optimizer does not accept manual updates;
      # it autodiffs the loss directly.
      self.train = pt.apply_optimizer(optimizer, losses=[self.loss],
                                      global_step=global_step,
                                      var_list=var_list)
Exemple #30
0
    def initialize(self,
                   optimizer=None,
                   var_list=None,
                   use_prettytensor=False,
                   global_step=None,
                   *args,
                   **kwargs):
        """Initialize variational inference.

    Parameters
    ----------
    optimizer : str or tf.train.Optimizer, optional
      A TensorFlow optimizer, to use for optimizing the variational
      objective. Alternatively, one can pass in the name of a
      TensorFlow optimizer, and default parameters for the optimizer
      will be used.
    var_list : list of tf.Variable, optional
      List of TensorFlow variables to optimize over. Default is all
      trainable variables that ``latent_vars`` and ``data`` depend on,
      excluding those that are only used in conditionals in ``data``.
    use_prettytensor : bool, optional
      ``True`` if aim to use PrettyTensor optimizer (when using
      PrettyTensor) or ``False`` if aim to use TensorFlow optimizer.
      Defaults to TensorFlow.
    global_step : tf.Variable, optional
      A TensorFlow variable to hold the global step.
    """
        super(VariationalInference, self).initialize(*args, **kwargs)

        if var_list is None:
            # Traverse random variable graphs to get default list of variables.
            var_list = set()
            trainables = tf.trainable_variables()
            for z, qz in six.iteritems(self.latent_vars):
                if isinstance(z, RandomVariable):
                    var_list.update(get_variables(z, collection=trainables))

                var_list.update(get_variables(qz, collection=trainables))

            for x, qx in six.iteritems(self.data):
                if isinstance(x, RandomVariable) and \
                        not isinstance(qx, RandomVariable):
                    var_list.update(get_variables(x, collection=trainables))

            var_list = list(var_list)

        self.loss, grads_and_vars = self.build_loss_and_gradients(var_list)

        if self.logging:
            summary_key = 'summaries_' + str(id(self))
            tf.summary.scalar("loss", self.loss, collections=[summary_key])
            for grad, var in grads_and_vars:
                # replace colons which are an invalid character
                tf.summary.histogram("gradient/" + var.name.replace(':', '/'),
                                     grad,
                                     collections=[summary_key])
                tf.summary.scalar("gradient_norm/" +
                                  var.name.replace(':', '/'),
                                  tf.norm(grad),
                                  collections=[summary_key])

            self.summarize = tf.summary.merge_all(key=summary_key)

        if optimizer is None and global_step is None:
            # Default optimizer always uses a global step variable.
            global_step = tf.Variable(0, trainable=False, name="global_step")

        if isinstance(global_step, tf.Variable):
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       global_step,
                                                       100,
                                                       0.9,
                                                       staircase=True)
        else:
            learning_rate = 0.01
            global_step = None

        # Build optimizer.
        if optimizer is None:
            optimizer = tf.train.AdamOptimizer(learning_rate)
        elif isinstance(optimizer, str):
            if optimizer == 'gradientdescent':
                optimizer = tf.train.GradientDescentOptimizer(learning_rate)
            elif optimizer == 'adadelta':
                optimizer = tf.train.AdadeltaOptimizer(learning_rate)
            elif optimizer == 'adagrad':
                optimizer = tf.train.AdagradOptimizer(learning_rate)
            elif optimizer == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
            elif optimizer == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            elif optimizer == 'ftrl':
                optimizer = tf.train.FtrlOptimizer(learning_rate)
            elif optimizer == 'rmsprop':
                optimizer = tf.train.RMSPropOptimizer(learning_rate)
            else:
                raise ValueError('Optimizer class not found:', optimizer)
        elif not isinstance(optimizer, tf.train.Optimizer):
            raise TypeError(
                "Optimizer must be str, tf.train.Optimizer, or None.")

        scope = "optimizer_" + str(id(self))
        with tf.variable_scope(scope):
            if not use_prettytensor:
                self.train = optimizer.apply_gradients(grads_and_vars,
                                                       global_step=global_step)
            else:
                # Note PrettyTensor optimizer does not accept manual updates;
                # it autodiffs the loss directly.
                self.train = pt.apply_optimizer(optimizer,
                                                losses=[self.loss],
                                                global_step=global_step,
                                                var_list=var_list)

        self.reset.append(
            tf.variables_initializer(
                tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope)))
  def initialize(self, optimizer=None, var_list=None, use_prettytensor=False,
                 global_step=None, *args, **kwargs):
    """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Args:
      optimizer: str or tf.train.Optimizer, optional.
        A TensorFlow optimizer, to use for optimizing the variational
        objective. Alternatively, one can pass in the name of a
        TensorFlow optimizer, and default parameters for the optimizer
        will be used.
      var_list: list of tf.Variable, optional.
        List of TensorFlow variables to optimize over. Default is all
        trainable variables that `latent_vars` and `data` depend on,
        excluding those that are only used in conditionals in `data`.
      use_prettytensor: bool, optional.
        `True` if aim to use PrettyTensor optimizer (when using
        PrettyTensor) or `False` if aim to use TensorFlow optimizer.
        Defaults to TensorFlow.
      global_step: tf.Variable, optional.
        A TensorFlow variable to hold the global step.
    """
    super(VariationalInference, self).initialize(*args, **kwargs)

    if var_list is None:
      # Traverse random variable graphs to get default list of variables.
      var_list = set()
      trainables = tf.trainable_variables()
      for z, qz in six.iteritems(self.latent_vars):
        if isinstance(z, RandomVariable):
          var_list.update(get_variables(z, collection=trainables))

        var_list.update(get_variables(qz, collection=trainables))

      for x, qx in six.iteritems(self.data):
        if isinstance(x, RandomVariable) and \
                not isinstance(qx, RandomVariable):
          var_list.update(get_variables(x, collection=trainables))

      var_list = list(var_list)

    self.loss, grads_and_vars = self.build_loss_and_gradients(var_list)

    if self.logging:
      summary_key = 'summaries_' + str(id(self))
      tf.summary.scalar("loss", self.loss, collections=[summary_key])
      for grad, var in grads_and_vars:
        # replace colons which are an invalid character
        tf.summary.histogram("gradient/" +
                             var.name.replace(':', '/'),
                             grad, collections=[summary_key])
        tf.summary.scalar("gradient_norm/" +
                          var.name.replace(':', '/'),
                          tf.norm(grad), collections=[summary_key])

      self.summarize = tf.summary.merge_all(key=summary_key)

    if optimizer is None and global_step is None:
      # Default optimizer always uses a global step variable.
      global_step = tf.Variable(0, trainable=False, name="global_step")

    if isinstance(global_step, tf.Variable):
      starter_learning_rate = 0.1
      learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                 global_step,
                                                 100, 0.9, staircase=True)
    else:
      learning_rate = 0.01

    # Build optimizer.
    if optimizer is None:
      optimizer = tf.train.AdamOptimizer(learning_rate)
    elif isinstance(optimizer, str):
      if optimizer == 'gradientdescent':
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
      elif optimizer == 'adadelta':
        optimizer = tf.train.AdadeltaOptimizer(learning_rate)
      elif optimizer == 'adagrad':
        optimizer = tf.train.AdagradOptimizer(learning_rate)
      elif optimizer == 'momentum':
        optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
      elif optimizer == 'adam':
        optimizer = tf.train.AdamOptimizer(learning_rate)
      elif optimizer == 'ftrl':
        optimizer = tf.train.FtrlOptimizer(learning_rate)
      elif optimizer == 'rmsprop':
        optimizer = tf.train.RMSPropOptimizer(learning_rate)
      else:
        raise ValueError('Optimizer class not found:', optimizer)
    elif not isinstance(optimizer, tf.train.Optimizer):
      raise TypeError("Optimizer must be str, tf.train.Optimizer, or None.")

    scope = "optimizer_" + str(id(self))
    with tf.variable_scope(scope):
      if not use_prettytensor:
        self.train = optimizer.apply_gradients(grads_and_vars,
                                               global_step=global_step)
      else:
        # Note PrettyTensor optimizer does not accept manual updates;
        # it autodiffs the loss directly.
        self.train = pt.apply_optimizer(optimizer, losses=[self.loss],
                                        global_step=global_step,
                                        var_list=var_list)

    self.reset.append(tf.variables_initializer(
        tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope)))
Exemple #32
0
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   auto_transform=True,
                   logdir=None,
                   log_timestamp=True,
                   log_vars=None,
                   debug=False,
                   optimizer=None,
                   var_list=None,
                   use_prettytensor=False,
                   global_step=None,
                   n_samples=1,
                   kl_scaling=None,
                   maxnorm=5.):

        if kl_scaling is None:
            kl_scaling = {}
        if n_samples <= 0:
            raise ValueError(
                "n_samples should be greater than zero: {}".format(n_samples))

        self.n_samples = n_samples
        self.kl_scaling = kl_scaling

        # from inference.py
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 100)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")
        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")
        self.scale = scale

        self.transformations = {}
        if auto_transform:
            latent_vars = self.latent_vars.copy()
            self.latent_vars = {}
            self.latent_vars_unconstrained = {}
            for z, qz in six.iteritems(latent_vars):
                if hasattr(z, 'support') and hasattr(qz, 'support') and \
                        z.support != qz.support and qz.support != 'point':

                    z_unconstrained = transform(z)
                    self.transformations[z] = z_unconstrained

                    if qz.support == "points":
                        qz_unconstrained = qz
                    else:
                        qz_unconstrained = transform(qz)
                    self.latent_vars_unconstrained[
                        z_unconstrained] = qz_unconstrained

                    if z_unconstrained != z:
                        qz_constrained = transform(
                            qz_unconstrained,
                            bijectors.Invert(z_unconstrained.bijector))

                        try:
                            qz_constrained.params = \
                                    z_unconstrained.bijector.inverse(
                                        qz_unconstrained.params)
                        except:
                            pass
                    else:
                        qz_constrained = qz_unconstrained

                    self.latent_vars[z] = qz_constrained
                else:
                    self.latent_vars[z] = qz
                    self.latent_vars_unconstrained[z] = qz
            del latent_vars

        if logdir is not None:
            self.logging = True
            if log_timestamp:
                logdir = os.path.expanduser(logdir)
                logdir = os.path.join(
                    logdir,
                    datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

            self._summary_key = tf.get_default_graph().unique_name("summaries")
            self._set_log_variables(log_vars)
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()

        self.reset = [tf.variables_initializer([self.t])]

        # from variational_inference.py
        if var_list is None:
            var_list = set()
            trainables = tf.trainable_variables()
            for z, qz in six.iteritems(self.latent_vars):
                var_list.update(get_variables(z, collection=trainables))
                var_list.update(get_variables(qz, collection=trainables))

            for x, qx in six.iteritems(self.data):
                if isinstance(x, RandomVariable) and \
                        not isinstance(qx, RandomVariable):
                    var_list.update(get_variables(x, collection=trainables))

        var_list = list(var_list)

        self.loss, grads_and_vars = self.build_loss_and_gradients(var_list)

        clipped_grads_and_vars = []
        for grad, var in grads_and_vars:
            if "kernel" in var.name or "bias" in var.name:
                clipped_grads_and_vars.append((tf.clip_by_norm(grad,
                                                               maxnorm,
                                                               axes=[0]), var))
            else:
                clipped_grads_and_vars.append((grad, var))
        # for grad, var in grads_and_vars:
        #     clipped_grads_and_vars.append(
        #         (tf.clip_by_value(grad, -1000., 1000.), var))
        del grads_and_vars

        if self.logging:
            tf.summary.scalar("loss",
                              self.loss,
                              collections=[self._summary_key])
        for grad, var in clipped_grads_and_vars:
            tf.summary.histogram("gradient/" + var.name.replace(':', '/'),
                                 grad,
                                 collections=[self._summary_key])
            tf.summary.scalar("gradient_norm/" + var.name.replace(':', '/'),
                              tf.norm(grad),
                              collections=[self._summary_key])

        self.summarize = tf.summary.merge_all(key=self._summary_key)

        if optimizer is None and global_step is None:
            global_step = tf.Variable(0, trainable=False, name="global_step")

        if isinstance(global_step, tf.Variable):
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       global_step,
                                                       100,
                                                       0.9,
                                                       staircase=True)
        else:
            learning_rate = 0.01

        # Build optimizer.
        if optimizer is None:
            optimizer = tf.train.AdamOptimizer(learning_rate)
        elif isinstance(optimizer, str):
            if optimizer == 'gradientdescent':
                optimizer = tf.train.GradientDescentOptimizer(learning_rate)
            elif optimizer == 'adadelta':
                optimizer = tf.train.AdadeltaOptimizer(learning_rate)
            elif optimizer == 'adagrad':
                optimizer = tf.train.AdagradOptimizer(learning_rate)
            elif optimizer == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
            elif optimizer == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            elif optimizer == 'ftrl':
                optimizer = tf.train.FtrlOptimizer(learning_rate)
            elif optimizer == 'rmsprop':
                optimizer = tf.train.RMSPropOptimizer(learning_rate)
            else:
                raise ValueError('Optimizer class not found:', optimizer)
        elif not isinstance(optimizer, tf.train.Optimizer):
            raise TypeError(
                "Optimizer must be str, tf.train.Optimizer, or None.")

        with tf.variable_scope(None, default_name="optimizer") as scope:
            if not use_prettytensor:
                self.train = optimizer.apply_gradients(clipped_grads_and_vars,
                                                       global_step=global_step)
            else:
                import prettytensor as pt
                self.train = pt.apply_optimizer(optimizer,
                                                losses=[self.loss],
                                                global_step=global_step,
                                                var_list=var_list)

        self.reset.append(
            tf.variables_initializer(
                tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                  scope=scope.name)))