예제 #1
0
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   logdir=None,
                   debug=False):
        """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computational graph. No ops
    should be created outside the call to ``initialize()``.

    Parameters
    ----------
    n_iter : int, optional
      Number of iterations for algorithm.
    n_print : int, optional
      Number of iterations for each print progress. To suppress print
      progress, then specify 0. Default is ``int(n_iter / 10)``.
    scale : dict of RandomVariable to tf.Tensor, optional
      A tensor to scale computation for any random variable that it is
      binded to. Its shape must be broadcastable; it is multiplied
      element-wise to the random variable. For example, this is useful
      for mini-batch scaling when inferring global variables, or
      applying masks on a random variable.
    logdir : str, optional
      Directory where event file will be written. For details,
      see ``tf.summary.FileWriter``. Default is to write nothing.
    debug : bool, optional
      If True, add checks for ``NaN`` and ``Inf`` to all computations
      in the graph. May result in substantially slower execution
      times.
    """
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 10)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")

        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")

        self.scale = scale

        if logdir is not None:
            self.logging = True
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
            self.summarize = tf.summary.merge_all()
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()
예제 #2
0
파일: vae.py 프로젝트: zueigung1419/edward
def main(_):
    ed.set_seed(42)

    # DATA. MNIST batches are fed at training time.
    (x_train, _), (x_test, _) = mnist(FLAGS.data_dir)
    x_train_generator = generator(x_train, FLAGS.M)

    # MODEL
    # Define a subgraph of the full model, corresponding to a minibatch of
    # size M.
    z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]),
               scale=tf.ones([FLAGS.M, FLAGS.d]))
    hidden = tf.layers.dense(z, 256, activation=tf.nn.relu)
    x = Bernoulli(logits=tf.layers.dense(hidden, 28 * 28))

    # INFERENCE
    # Define a subgraph of the variational model, corresponding to a
    # minibatch of size M.
    x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28])
    hidden = tf.layers.dense(tf.cast(x_ph, tf.float32),
                             256,
                             activation=tf.nn.relu)
    qz = Normal(loc=tf.layers.dense(hidden, FLAGS.d),
                scale=tf.layers.dense(hidden,
                                      FLAGS.d,
                                      activation=tf.nn.softplus))

    # Bind p(x, z) and q(z | x) to the same TensorFlow placeholder for x.
    inference = ed.KLqp({z: qz}, data={x: x_ph})
    optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0)
    inference.initialize(optimizer=optimizer)

    tf.global_variables_initializer().run()

    n_iter_per_epoch = x_train.shape[0] // FLAGS.M
    for epoch in range(1, FLAGS.n_epoch + 1):
        print("Epoch: {0}".format(epoch))
        avg_loss = 0.0

        pbar = Progbar(n_iter_per_epoch)
        for t in range(1, n_iter_per_epoch + 1):
            pbar.update(t)
            x_batch = next(x_train_generator)
            info_dict = inference.update(feed_dict={x_ph: x_batch})
            avg_loss += info_dict['loss']

        # Print a lower bound to the average marginal likelihood for an
        # image.
        avg_loss /= n_iter_per_epoch
        avg_loss /= FLAGS.M
        print("-log p(x) <= {:0.3f}".format(avg_loss))

        # Prior predictive check.
        images = x.eval()
        for m in range(FLAGS.M):
            imsave(
                os.path.join(FLAGS.out_dir, '%d.png') % m,
                images[m].reshape(28, 28))
예제 #3
0
파일: vae.py 프로젝트: JoyceYa/edward
def main(_):
  ed.set_seed(42)

  # DATA. MNIST batches are fed at training time.
  (x_train, _), (x_test, _) = mnist(FLAGS.data_dir)
  x_train_generator = generator(x_train, FLAGS.M)

  # MODEL
  # Define a subgraph of the full model, corresponding to a minibatch of
  # size M.
  z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]),
             scale=tf.ones([FLAGS.M, FLAGS.d]))
  hidden = tf.layers.dense(z, 256, activation=tf.nn.relu)
  x = Bernoulli(logits=tf.layers.dense(hidden, 28 * 28))

  # INFERENCE
  # Define a subgraph of the variational model, corresponding to a
  # minibatch of size M.
  x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28])
  hidden = tf.layers.dense(tf.cast(x_ph, tf.float32), 256,
                           activation=tf.nn.relu)
  qz = Normal(loc=tf.layers.dense(hidden, FLAGS.d),
              scale=tf.layers.dense(
                  hidden, FLAGS.d, activation=tf.nn.softplus))

  # Bind p(x, z) and q(z | x) to the same TensorFlow placeholder for x.
  inference = ed.KLqp({z: qz}, data={x: x_ph})
  optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0)
  inference.initialize(optimizer=optimizer)

  tf.global_variables_initializer().run()

  n_iter_per_epoch = x_train.shape[0] // FLAGS.M
  for epoch in range(1, FLAGS.n_epoch + 1):
    print("Epoch: {0}".format(epoch))
    avg_loss = 0.0

    pbar = Progbar(n_iter_per_epoch)
    for t in range(1, n_iter_per_epoch + 1):
      pbar.update(t)
      x_batch = next(x_train_generator)
      info_dict = inference.update(feed_dict={x_ph: x_batch})
      avg_loss += info_dict['loss']

    # Print a lower bound to the average marginal likelihood for an
    # image.
    avg_loss /= n_iter_per_epoch
    avg_loss /= FLAGS.M
    print("-log p(x) <= {:0.3f}".format(avg_loss))

    # Prior predictive check.
    images = x.eval()
    for m in range(FLAGS.M):
      imsave(os.path.join(FLAGS.out_dir, '%d.png') % m,
             images[m].reshape(28, 28))
예제 #4
0
def main(_):
    ed.set_seed(42)

    # DATA. MNIST batches are fed at training time.
    (x_train, _), (x_test, _) = mnist(FLAGS.data_dir)
    x_train_generator = generator(x_train, FLAGS.M)

    # MODEL
    z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]),
               scale=tf.ones([FLAGS.M, FLAGS.d]))
    logits = generative_network(z)
    x = Bernoulli(logits=logits)

    # INFERENCE
    x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28])
    loc, scale = inference_network(tf.cast(x_ph, tf.float32))
    qz = Normal(loc=loc, scale=scale)

    # Bind p(x, z) and q(z | x) to the same placeholder for x.
    inference = ed.KLqp({z: qz}, data={x: x_ph})
    optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
    inference.initialize(optimizer=optimizer)

    hidden_rep = tf.sigmoid(logits)

    tf.global_variables_initializer().run()

    n_iter_per_epoch = x_train.shape[0] // FLAGS.M
    for epoch in range(1, FLAGS.n_epoch + 1):
        print("Epoch: {0}".format(epoch))
        avg_loss = 0.0

        pbar = Progbar(n_iter_per_epoch)
        for t in range(1, n_iter_per_epoch + 1):
            pbar.update(t)
            x_batch = next(x_train_generator)
            info_dict = inference.update(feed_dict={x_ph: x_batch})
            avg_loss += info_dict['loss']

        # Print a lower bound to the average marginal likelihood for an
        # image.
        avg_loss /= n_iter_per_epoch
        avg_loss /= FLAGS.M
        print("-log p(x) <= {:0.3f}".format(avg_loss))

        # Visualize hidden representations.
        images = hidden_rep.eval()
        for m in range(FLAGS.M):
            imsave(
                os.path.join(FLAGS.out_dir, '%d.png') % m,
                images[m].reshape(28, 28))
예제 #5
0
def main(_):
  ed.set_seed(42)

  # DATA. MNIST batches are fed at training time.
  (x_train, _), (x_test, _) = mnist(FLAGS.data_dir)
  x_train_generator = generator(x_train, FLAGS.M)

  # MODEL
  z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]),
             scale=tf.ones([FLAGS.M, FLAGS.d]))
  logits = generative_network(z)
  x = Bernoulli(logits=logits)

  # INFERENCE
  x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28])
  loc, scale = inference_network(tf.cast(x_ph, tf.float32))
  qz = Normal(loc=loc, scale=scale)

  # Bind p(x, z) and q(z | x) to the same placeholder for x.
  inference = ed.KLqp({z: qz}, data={x: x_ph})
  optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
  inference.initialize(optimizer=optimizer)

  hidden_rep = tf.sigmoid(logits)

  tf.global_variables_initializer().run()

  n_iter_per_epoch = x_train.shape[0] // FLAGS.M
  for epoch in range(1, FLAGS.n_epoch + 1):
    print("Epoch: {0}".format(epoch))
    avg_loss = 0.0

    pbar = Progbar(n_iter_per_epoch)
    for t in range(1, n_iter_per_epoch + 1):
      pbar.update(t)
      x_batch = next(x_train_generator)
      info_dict = inference.update(feed_dict={x_ph: x_batch})
      avg_loss += info_dict['loss']

    # Print a lower bound to the average marginal likelihood for an
    # image.
    avg_loss /= n_iter_per_epoch
    avg_loss /= FLAGS.M
    print("-log p(x) <= {:0.3f}".format(avg_loss))

    # Visualize hidden representations.
    images = hidden_rep.eval()
    for m in range(FLAGS.M):
      imsave(os.path.join(FLAGS.out_dir, '%d.png') % m,
             images[m].reshape(28, 28))
예제 #6
0
def main(_):
  ed.set_seed(42)

  # DATA
  (x_train, _), (x_test, _), (x_valid, _) = caltech101_silhouettes(
      FLAGS.data_dir)
  x_train_generator = generator(x_train, FLAGS.batch_size)
  x_ph = tf.placeholder(tf.int32, [None, 28 * 28])

  # MODEL
  zs = [0] * len(FLAGS.hidden_sizes)
  for l in reversed(range(len(FLAGS.hidden_sizes))):
    if l == len(FLAGS.hidden_sizes) - 1:
      logits = tf.zeros([tf.shape(x_ph)[0], FLAGS.hidden_sizes[l]])
    else:
      logits = tf.layers.dense(tf.cast(zs[l + 1], tf.float32),
                               FLAGS.hidden_sizes[l], activation=None)
    zs[l] = Bernoulli(logits=logits)

  x = Bernoulli(logits=tf.layers.dense(tf.cast(zs[0], tf.float32),
                                       28 * 28, activation=None))

  # INFERENCE
  # Define variational model with reverse ordering as probability model:
  # if p is 15-100-300 from top-down, q is 300-100-15 from bottom-up.
  qzs = [0] * len(FLAGS.hidden_sizes)
  for l in range(len(FLAGS.hidden_sizes)):
    if l == 0:
      logits = tf.layers.dense(tf.cast(x_ph, tf.float32),
                               FLAGS.hidden_sizes[l], activation=None)
    else:
      logits = tf.layers.dense(tf.cast(qzs[l - 1], tf.float32),
                               FLAGS.hidden_sizes[l], activation=None)
    qzs[l] = Bernoulli(logits=logits)

  inference = ed.KLqp({z: qz for z, qz in zip(zs, qzs)}, data={x: x_ph})
  optimizer = tf.train.AdamOptimizer(FLAGS.step_size)
  inference.initialize(optimizer=optimizer, n_samples=FLAGS.n_train_samples)

  # Build tensor for log-likelihood given one variational sample to run
  # on test data.
  x_post = ed.copy(x, {z: qz for z, qz in zip(zs, qzs)})
  x_neg_log_prob = (-tf.reduce_sum(x_post.log_prob(x_ph)) /
                    tf.cast(tf.shape(x_ph)[0], tf.float32))

  sess = ed.get_session()
  tf.global_variables_initializer().run()

  for epoch in range(FLAGS.n_epoch):
    print("Epoch {}".format(epoch))
    train_loss = 0.0

    pbar = Progbar(FLAGS.n_iter_per_epoch)
    for t in range(1, FLAGS.n_iter_per_epoch + 1):
      pbar.update(t)
      x_batch = next(x_train_generator)
      info_dict = inference.update(feed_dict={x_ph: x_batch})
      train_loss += info_dict['loss']

    # Print per-data point loss, averaged over training epoch.
    train_loss /= FLAGS.n_iter_per_epoch
    train_loss /= FLAGS.batch_size
    print("Training negative log-likelihood: {:0.3f}".format(train_loss))

    test_loss = [sess.run(x_neg_log_prob, {x_ph: x_test})
                 for _ in range(FLAGS.n_test_samples)]
    test_loss = np.mean(test_loss)
    print("Test negative log-likelihood: {:0.3f}".format(test_loss))

    # Prior predictive check.
    images = sess.run(x, {x_ph: x_batch})  # feed ph to determine sample size
    for m in range(FLAGS.batch_size):
      imsave("{}/{}.png".format(out_dir, m), images[m].reshape(28, 28))
예제 #7
0
파일: inference.py 프로젝트: ekostem/edward
class Inference(object):
  """Abstract base class for inference. All inference algorithms in
  Edward inherit from `Inference`, sharing common methods and
  properties via a class hierarchy.

  Specific algorithms typically inherit from other subclasses of
  `Inference` rather than `Inference` directly. For example, one
  might inherit from the abstract classes `MonteCarlo` or
  `VariationalInference`.

  To build an algorithm inheriting from `Inference`, one must at the
  minimum implement `initialize` and `update`: the former builds
  the computational graph for the algorithm; the latter runs the
  computational graph for the algorithm.

  To reset inference (e.g., internal variable counters incremented
  over training), fetch inference's reset ops from session with
  `sess.run(inference.reset)`.

  #### Examples

  ```python
  mu = Normal(loc=tf.constant(0.0), scale=tf.constant(1.0))
  x = Normal(loc=tf.ones(50) * mu, scale=tf.constant(1.0))

  qmu_loc = tf.Variable(tf.random_normal([]))
  qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([])))
  qmu = Normal(loc=qmu_loc, scale=qmu_scale)

  inference = ed.Inference({mu: qmu}, data={x: tf.zeros(50)})
  ```
  """
  def __init__(self, latent_vars=None, data=None):
    """Create an inference algorithm.

    Args:
      latent_vars: dict, optional.
        Collection of latent variables (of type `RandomVariable` or
        `tf.Tensor`) to perform inference on. Each random variable is
        binded to another random variable; the latter will infer the
        former conditional on data.
      data: dict, optional.
        Data dictionary which binds observed variables (of type
        `RandomVariable` or `tf.Tensor`) to their realizations (of
        type `tf.Tensor`). It can also bind placeholders (of type
        `tf.Tensor`) used in the model to their realizations; and
        prior latent variables (of type `RandomVariable`) to posterior
        latent variables (of type `RandomVariable`).
    """
    sess = get_session()
    if latent_vars is None:
      latent_vars = {}
    if data is None:
      data = {}

    check_latent_vars(latent_vars)
    self.latent_vars = latent_vars

    check_data(data)
    self.data = {}
    for key, value in six.iteritems(data):
      if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
        self.data[key] = value
      elif isinstance(key, (RandomVariable, tf.Tensor)):
        if isinstance(value, (RandomVariable, tf.Tensor)):
          self.data[key] = value
        elif isinstance(value, (float, list, int, np.ndarray, np.number, str)):
          # If value is a Python type, store it in the graph.
          # Assign its placeholder with the key's data type.
          with tf.variable_scope("data"):
            ph = tf.placeholder(key.dtype, np.shape(value))
            var = tf.Variable(ph, trainable=False, collections=[])
            sess.run(var.initializer, {ph: value})
            self.data[key] = var

  def run(self, variables=None, use_coordinator=True, *args, **kwargs):
    """A simple wrapper to run inference.

    1. Initialize algorithm via `initialize`.
    2. (Optional) Build a TensorFlow summary writer for TensorBoard.
    3. (Optional) Initialize TensorFlow variables.
    4. (Optional) Start queue runners.
    5. Run `update` for `self.n_iter` iterations.
    6. While running, `print_progress`.
    7. Finalize algorithm via `finalize`.
    8. (Optional) Stop queue runners.

    To customize the way inference is run, run these steps
    individually.

    Args:
      variables: list, optional.
        A list of TensorFlow variables to initialize during inference.
        Default is to initialize all variables (this includes
        reinitializing variables that were already initialized). To
        avoid initializing any variables, pass in an empty list.
      use_coordinator: bool, optional.
        Whether to start and stop queue runners during inference using a
        TensorFlow coordinator. For example, queue runners are necessary
        for batch training with file readers.
      *args:
        Passed into `initialize`.
      **kwargs:
        Passed into `initialize`.
    """
    self.initialize(*args, **kwargs)

    if variables is None:
      init = tf.global_variables_initializer()
    else:
      init = tf.variables_initializer(variables)

    # Feed placeholders in case initialization depends on them.
    feed_dict = {}
    for key, value in six.iteritems(self.data):
      if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
        feed_dict[key] = value

    init.run(feed_dict)

    if use_coordinator:
      # Start input enqueue threads.
      self.coord = tf.train.Coordinator()
      self.threads = tf.train.start_queue_runners(coord=self.coord)

    for _ in range(self.n_iter):
      info_dict = self.update()
      self.print_progress(info_dict)

    self.finalize()

    if use_coordinator:
      # Ask threads to stop.
      self.coord.request_stop()
      self.coord.join(self.threads)

  @abc.abstractmethod
  def initialize(self, n_iter=1000, n_print=None, scale=None, logdir=None,
                 log_timestamp=True, log_vars=None, debug=False):
    """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Any derived class of `Inference` **must** implement this method.
    No methods which build ops should be called outside `initialize()`.

    Args:
      n_iter: int, optional.
        Number of iterations for algorithm when calling `run()`.
        Alternatively if controlling inference manually, it is the
        expected number of calls to `update()`; this number determines
        tracking information during the print progress.
      n_print: int, optional.
        Number of iterations for each print progress. To suppress print
        progress, then specify 0. Default is `int(n_iter / 100)`.
      scale: dict of RandomVariable to tf.Tensor, optional.
        A tensor to scale computation for any random variable that it is
        binded to. Its shape must be broadcastable; it is multiplied
        element-wise to the random variable. For example, this is useful
        for mini-batch scaling when inferring global variables, or
        applying masks on a random variable.
      logdir: str, optional.
        Directory where event file will be written. For details,
        see `tf.summary.FileWriter`. Default is to log nothing.
      log_timestamp: bool, optional.
        If True (and `logdir` is specified), create a subdirectory of
        `logdir` to save the specific run results. The subdirectory's
        name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'.
      log_vars: list, optional.
        Specifies the list of variables to log after each `n_print`
        steps. If None, will log all variables. If `[]`, no variables
        will be logged. `logdir` must be specified for variables to be
        logged.
      debug: bool, optional.
        If True, add checks for `NaN` and `Inf` to all computations
        in the graph. May result in substantially slower execution
        times.
    """
    self.n_iter = n_iter
    if n_print is None:
      self.n_print = int(n_iter / 100)
    else:
      self.n_print = n_print

    self.progbar = Progbar(self.n_iter)
    self.t = tf.Variable(0, trainable=False, name="iteration")

    self.increment_t = self.t.assign_add(1)

    if scale is None:
      scale = {}
    elif not isinstance(scale, dict):
      raise TypeError("scale must be a dict object.")

    self.scale = scale

    if logdir is not None:
      self.logging = True
      if log_timestamp:
        logdir = os.path.join(
            logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

      self._set_log_variables(log_vars)
      self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
      self.summarize = tf.summary.merge_all()
    else:
      self.logging = False

    self.debug = debug
    if self.debug:
      self.op_check = tf.add_check_numerics_ops()

    # Store reset ops which user can call. Subclasses should append
    # any ops needed to reset internal variables in inference.
    self.reset = [tf.variables_initializer([self.t])]

  @abc.abstractmethod
  def update(self, feed_dict=None):
    """Run one iteration of inference.

    Any derived class of `Inference` **must** implement this method.

    Args:
      feed_dict: dict, optional.
        Feed dictionary for a TensorFlow session run. It is used to feed
        placeholders that are not fed during initialization.

    Returns:
      dict.
        Dictionary of algorithm-specific information.
    """
    if feed_dict is None:
      feed_dict = {}

    for key, value in six.iteritems(self.data):
      if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
        feed_dict[key] = value

    sess = get_session()
    t = sess.run(self.increment_t)

    if self.debug:
      sess.run(self.op_check, feed_dict)

    if self.logging and self.n_print != 0:
      if t == 1 or t % self.n_print == 0:
        summary = sess.run(self.summarize, feed_dict)
        self.train_writer.add_summary(summary, t)

    return {'t': t}

  def print_progress(self, info_dict):
    """Print progress to output.

    Args:
      info_dict: dict.
        Dictionary of algorithm-specific information.
    """
    if self.n_print != 0:
      t = info_dict['t']
      if t == 1 or t % self.n_print == 0:
        self.progbar.update(t)

  def finalize(self):
    """Function to call after convergence.
    """
    if self.logging:
      self.train_writer.close()

  def _set_log_variables(self, log_vars=None):
    """Log variables to TensorBoard.

    For each variable in `log_vars`, forms a `tf.summary.scalar` if
    the variable has scalar shape; otherwise forms a `tf.summary.histogram`.

    Args:
      log_vars: list, optional.
        Specifies the list of variables to log after each `n_print`
        steps. If None, will log all variables. If `[]`, no variables
        will be logged.
    """
    summary_key = 'summaries_' + str(id(self))
    if log_vars is None:
      log_vars = []
      for key in six.iterkeys(self.data):
        log_vars += get_variables(key)

      for key, value in six.iteritems(self.latent_vars):
        log_vars += get_variables(key)
        log_vars += get_variables(value)

      log_vars = set(log_vars)

    for var in log_vars:
      # replace colons which are an invalid character
      var_name = var.name.replace(':', '/')
      # Log all scalars.
      if len(var.shape) == 0:
        tf.summary.scalar("parameter/{}".format(var_name),
                          var, collections=[summary_key])
      elif len(var.shape) == 1 and var.shape[0] == 1:
        tf.summary.scalar("parameter/{}".format(var_name),
                          var[0], collections=[summary_key])
      else:
        # If var is multi-dimensional, log a histogram of its values.
        tf.summary.histogram("parameter/{}".format(var_name),
                             var, collections=[summary_key])
예제 #8
0
파일: inference.py 프로젝트: ekostem/edward
  def initialize(self, n_iter=1000, n_print=None, scale=None, logdir=None,
                 log_timestamp=True, log_vars=None, debug=False):
    """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Any derived class of `Inference` **must** implement this method.
    No methods which build ops should be called outside `initialize()`.

    Args:
      n_iter: int, optional.
        Number of iterations for algorithm when calling `run()`.
        Alternatively if controlling inference manually, it is the
        expected number of calls to `update()`; this number determines
        tracking information during the print progress.
      n_print: int, optional.
        Number of iterations for each print progress. To suppress print
        progress, then specify 0. Default is `int(n_iter / 100)`.
      scale: dict of RandomVariable to tf.Tensor, optional.
        A tensor to scale computation for any random variable that it is
        binded to. Its shape must be broadcastable; it is multiplied
        element-wise to the random variable. For example, this is useful
        for mini-batch scaling when inferring global variables, or
        applying masks on a random variable.
      logdir: str, optional.
        Directory where event file will be written. For details,
        see `tf.summary.FileWriter`. Default is to log nothing.
      log_timestamp: bool, optional.
        If True (and `logdir` is specified), create a subdirectory of
        `logdir` to save the specific run results. The subdirectory's
        name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'.
      log_vars: list, optional.
        Specifies the list of variables to log after each `n_print`
        steps. If None, will log all variables. If `[]`, no variables
        will be logged. `logdir` must be specified for variables to be
        logged.
      debug: bool, optional.
        If True, add checks for `NaN` and `Inf` to all computations
        in the graph. May result in substantially slower execution
        times.
    """
    self.n_iter = n_iter
    if n_print is None:
      self.n_print = int(n_iter / 100)
    else:
      self.n_print = n_print

    self.progbar = Progbar(self.n_iter)
    self.t = tf.Variable(0, trainable=False, name="iteration")

    self.increment_t = self.t.assign_add(1)

    if scale is None:
      scale = {}
    elif not isinstance(scale, dict):
      raise TypeError("scale must be a dict object.")

    self.scale = scale

    if logdir is not None:
      self.logging = True
      if log_timestamp:
        logdir = os.path.join(
            logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

      self._set_log_variables(log_vars)
      self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
      self.summarize = tf.summary.merge_all()
    else:
      self.logging = False

    self.debug = debug
    if self.debug:
      self.op_check = tf.add_check_numerics_ops()

    # Store reset ops which user can call. Subclasses should append
    # any ops needed to reset internal variables in inference.
    self.reset = [tf.variables_initializer([self.t])]
예제 #9
0
def main(_):
    ed.set_seed(42)

    # DATA
    x_train, metadata = nips(FLAGS.data_dir)
    documents = metadata['columns']
    words = metadata['rows']

    # Subset to documents in 2011 and words appearing in at least two
    # documents and have a total word count of at least 10.
    doc_idx = [
        i for i, document in enumerate(documents)
        if document.startswith('2011')
    ]
    documents = [documents[doc] for doc in doc_idx]
    x_train = x_train[:, doc_idx]
    word_idx = np.logical_and(
        np.sum(x_train != 0, 1) >= 2,
        np.sum(x_train, 1) >= 10)
    words = [word for word, idx in zip(words, word_idx) if idx]
    x_train = x_train[word_idx, :]
    x_train = x_train.T

    N = x_train.shape[0]  # number of documents
    D = x_train.shape[1]  # vocabulary size

    # MODEL
    W2 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[2], FLAGS.K[1]])
    W1 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[1], FLAGS.K[0]])
    W0 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[0], D])

    z3 = Gamma(0.1, 0.1, sample_shape=[N, FLAGS.K[2]])
    z2 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z3, W2))
    z1 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z2, W1))
    x = Poisson(tf.matmul(z1, W0))

    # INFERENCE
    qW2 = pointmass_q(W2.shape)
    qW1 = pointmass_q(W1.shape)
    qW0 = pointmass_q(W0.shape)
    if FLAGS.q == 'gamma':
        qz3 = gamma_q(z3.shape)
        qz2 = gamma_q(z2.shape)
        qz1 = gamma_q(z1.shape)
    else:
        qz3 = lognormal_q(z3.shape)
        qz2 = lognormal_q(z2.shape)
        qz1 = lognormal_q(z1.shape)

    # We apply variational EM with E-step over local variables
    # and M-step to point estimate the global weight matrices.
    inference_e = ed.KLqp({
        z1: qz1,
        z2: qz2,
        z3: qz3
    },
                          data={
                              x: x_train,
                              W0: qW0,
                              W1: qW1,
                              W2: qW2
                          })
    inference_m = ed.MAP({
        W0: qW0,
        W1: qW1,
        W2: qW2
    },
                         data={
                             x: x_train,
                             z1: qz1,
                             z2: qz2,
                             z3: qz3
                         })

    optimizer_e = tf.train.RMSPropOptimizer(FLAGS.lr)
    optimizer_m = tf.train.RMSPropOptimizer(FLAGS.lr)
    kwargs = {
        'optimizer': optimizer_e,
        'n_print': 100,
        'logdir': FLAGS.logdir,
        'log_timestamp': False
    }
    if FLAGS.q == 'gamma':
        kwargs['n_samples'] = 30
    inference_e.initialize(**kwargs)
    inference_m.initialize(optimizer=optimizer_m)

    sess = ed.get_session()
    tf.global_variables_initializer().run()

    n_epoch = 20
    n_iter_per_epoch = 10000
    for epoch in range(n_epoch):
        print("Epoch {}".format(epoch))
        nll = 0.0

        pbar = Progbar(n_iter_per_epoch)
        for t in range(1, n_iter_per_epoch + 1):
            pbar.update(t)
            info_dict_e = inference_e.update()
            info_dict_m = inference_m.update()
            nll += info_dict_e['loss']

        # Compute perplexity averaged over a number of training iterations.
        # The model's negative log-likelihood of data is upper bounded by
        # the variational objective.
        nll /= n_iter_per_epoch
        perplexity = np.exp(nll / np.sum(x_train))
        print("Negative log-likelihood <= {:0.3f}".format(nll))
        print("Perplexity <= {:0.3f}".format(perplexity))

        # Print top 10 words for first 10 topics.
        qW0_vals = sess.run(qW0)
        for k in range(10):
            top_words_idx = qW0_vals[k, :].argsort()[-10:][::-1]
            top_words = " ".join([words[i] for i in top_words_idx])
            print("Topic {}: {}".format(k, top_words))
예제 #10
0
def main(_):
    ed.set_seed(42)

    # DATA
    (x_train, _), (x_test, _), (x_valid,
                                _) = caltech101_silhouettes(FLAGS.data_dir)
    x_train_generator = generator(x_train, FLAGS.batch_size)
    x_ph = tf.placeholder(tf.int32, [None, 28 * 28])

    # MODEL
    zs = [0] * len(FLAGS.hidden_sizes)
    for l in reversed(range(len(FLAGS.hidden_sizes))):
        if l == len(FLAGS.hidden_sizes) - 1:
            logits = tf.zeros([tf.shape(x_ph)[0], FLAGS.hidden_sizes[l]])
        else:
            logits = tf.layers.dense(tf.cast(zs[l + 1], tf.float32),
                                     FLAGS.hidden_sizes[l],
                                     activation=None)
        zs[l] = Bernoulli(logits=logits)

    x = Bernoulli(logits=tf.layers.dense(
        tf.cast(zs[0], tf.float32), 28 * 28, activation=None))

    # INFERENCE
    # Define variational model with reverse ordering as probability model:
    # if p is 15-100-300 from top-down, q is 300-100-15 from bottom-up.
    qzs = [0] * len(FLAGS.hidden_sizes)
    for l in range(len(FLAGS.hidden_sizes)):
        if l == 0:
            logits = tf.layers.dense(tf.cast(x_ph, tf.float32),
                                     FLAGS.hidden_sizes[l],
                                     activation=None)
        else:
            logits = tf.layers.dense(tf.cast(qzs[l - 1], tf.float32),
                                     FLAGS.hidden_sizes[l],
                                     activation=None)
        qzs[l] = Bernoulli(logits=logits)

    inference = ed.KLqp({z: qz for z, qz in zip(zs, qzs)}, data={x: x_ph})
    optimizer = tf.train.AdamOptimizer(FLAGS.step_size)
    inference.initialize(optimizer=optimizer, n_samples=FLAGS.n_train_samples)

    # Build tensor for log-likelihood given one variational sample to run
    # on test data.
    x_post = ed.copy(x, {z: qz for z, qz in zip(zs, qzs)})
    x_neg_log_prob = (-tf.reduce_sum(x_post.log_prob(x_ph)) /
                      tf.cast(tf.shape(x_ph)[0], tf.float32))

    sess = ed.get_session()
    tf.global_variables_initializer().run()

    for epoch in range(FLAGS.n_epoch):
        print("Epoch {}".format(epoch))
        train_loss = 0.0

        pbar = Progbar(FLAGS.n_iter_per_epoch)
        for t in range(1, FLAGS.n_iter_per_epoch + 1):
            pbar.update(t)
            x_batch = next(x_train_generator)
            info_dict = inference.update(feed_dict={x_ph: x_batch})
            train_loss += info_dict['loss']

        # Print per-data point loss, averaged over training epoch.
        train_loss /= FLAGS.n_iter_per_epoch
        train_loss /= FLAGS.batch_size
        print("Training negative log-likelihood: {:0.3f}".format(train_loss))

        test_loss = [
            sess.run(x_neg_log_prob, {x_ph: x_test})
            for _ in range(FLAGS.n_test_samples)
        ]
        test_loss = np.mean(test_loss)
        print("Test negative log-likelihood: {:0.3f}".format(test_loss))

        # Prior predictive check.
        images = sess.run(x,
                          {x_ph: x_batch})  # feed ph to determine sample size
        for m in range(FLAGS.batch_size):
            imsave("{}/{}.png".format(out_dir, m), images[m].reshape(28, 28))
예제 #11
0
class Inference(object):
    """Abstract base class for inference. All inference algorithms in
  Edward inherit from `Inference`, sharing common methods and
  properties via a class hierarchy.

  Specific algorithms typically inherit from other subclasses of
  `Inference` rather than `Inference` directly. For example, one
  might inherit from the abstract classes `MonteCarlo` or
  `VariationalInference`.

  To build an algorithm inheriting from `Inference`, one must at the
  minimum implement `initialize` and `update`: the former builds
  the computational graph for the algorithm; the latter runs the
  computational graph for the algorithm.

  To reset inference (e.g., internal variable counters incremented
  over training), fetch inference's reset ops from session with
  `sess.run(inference.reset)`.

  #### Examples

  ```python
  # Set up probability model.
  mu = Normal(loc=0.0, scale=1.0)
  x = Normal(loc=mu, scale=1.0, sample_shape=50)

  # Set up posterior approximation.
  qmu_loc = tf.Variable(tf.random_normal([]))
  qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([])))
  qmu = Normal(loc=qmu_loc, scale=qmu_scale)

  inference = ed.Inference({mu: qmu}, data={x: tf.zeros(50)})
  ```
  """
    def __init__(self, latent_vars=None, data=None):
        """Create an inference algorithm.

    Args:
      latent_vars: dict, optional.
        Collection of latent variables (of type `RandomVariable` or
        `tf.Tensor`) to perform inference on. Each random variable is
        binded to another random variable; the latter will infer the
        former conditional on data.
      data: dict, optional.
        Data dictionary which binds observed variables (of type
        `RandomVariable` or `tf.Tensor`) to their realizations (of
        type `tf.Tensor`). It can also bind placeholders (of type
        `tf.Tensor`) used in the model to their realizations; and
        prior latent variables (of type `RandomVariable`) to posterior
        latent variables (of type `RandomVariable`).
    """
        sess = get_session()
        if latent_vars is None:
            latent_vars = {}
        if data is None:
            data = {}

        check_latent_vars(latent_vars)
        self.latent_vars = latent_vars

        check_data(data)
        self.data = {}
        for key, value in six.iteritems(data):
            if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
                self.data[key] = value
            elif isinstance(key, (RandomVariable, tf.Tensor)):
                if isinstance(value, (RandomVariable, tf.Tensor)):
                    self.data[key] = value
                elif isinstance(
                        value, (float, list, int, np.ndarray, np.number, str)):
                    # If value is a Python type, store it in the graph.
                    # Assign its placeholder with the key's data type.
                    with tf.variable_scope(None, default_name="data"):
                        ph = tf.placeholder(key.dtype, np.shape(value))
                        var = tf.Variable(ph, trainable=False, collections=[])
                        sess.run(var.initializer, {ph: value})
                        self.data[key] = var

    def run(self, variables=None, use_coordinator=True, *args, **kwargs):
        """A simple wrapper to run inference.

    1. Initialize algorithm via `initialize`.
    2. (Optional) Build a TensorFlow summary writer for TensorBoard.
    3. (Optional) Initialize TensorFlow variables.
    4. (Optional) Start queue runners.
    5. Run `update` for `self.n_iter` iterations.
    6. While running, `print_progress`.
    7. Finalize algorithm via `finalize`.
    8. (Optional) Stop queue runners.

    To customize the way inference is run, run these steps
    individually.

    Args:
      variables: list, optional.
        A list of TensorFlow variables to initialize during inference.
        Default is to initialize all variables (this includes
        reinitializing variables that were already initialized). To
        avoid initializing any variables, pass in an empty list.
      use_coordinator: bool, optional.
        Whether to start and stop queue runners during inference using a
        TensorFlow coordinator. For example, queue runners are necessary
        for batch training with file readers.
      *args, **kwargs:
        Passed into `initialize`.
    """
        self.initialize(*args, **kwargs)

        if variables is None:
            init = tf.global_variables_initializer()
        else:
            init = tf.variables_initializer(variables)

        # Feed placeholders in case initialization depends on them.
        feed_dict = {}
        for key, value in six.iteritems(self.data):
            if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
                feed_dict[key] = value

        init.run(feed_dict)

        if use_coordinator:
            # Start input enqueue threads.
            self.coord = tf.train.Coordinator()
            self.threads = tf.train.start_queue_runners(coord=self.coord)

        for _ in range(self.n_iter):
            info_dict = self.update()
            self.print_progress(info_dict)

        self.finalize()

        if use_coordinator:
            # Ask threads to stop.
            self.coord.request_stop()
            self.coord.join(self.threads)

    @abc.abstractmethod
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   auto_transform=True,
                   logdir=None,
                   log_timestamp=True,
                   log_vars=None,
                   debug=False):
        """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Any derived class of `Inference` **must** implement this method.
    No methods which build ops should be called outside `initialize()`.

    Args:
      n_iter: int, optional.
        Number of iterations for algorithm when calling `run()`.
        Alternatively if controlling inference manually, it is the
        expected number of calls to `update()`; this number determines
        tracking information during the print progress.
      n_print: int, optional.
        Number of iterations for each print progress. To suppress print
        progress, then specify 0. Default is `int(n_iter / 100)`.
      scale: dict of RandomVariable to tf.Tensor, optional.
        A tensor to scale computation for any random variable that it is
        binded to. Its shape must be broadcastable; it is multiplied
        element-wise to the random variable. For example, this is useful
        for mini-batch scaling when inferring global variables, or
        applying masks on a random variable.
      auto_transform: bool, optional.
        Whether to automatically transform continuous latent variables
        of unequal support to be on the unconstrained space. It is
        only applied if the argument is `True`, the latent variable
        pair are `ed.RandomVariable`s with the `support` attribute,
        the supports are both continuous and unequal.
      logdir: str, optional.
        Directory where event file will be written. For details,
        see `tf.summary.FileWriter`. Default is to log nothing.
      log_timestamp: bool, optional.
        If True (and `logdir` is specified), create a subdirectory of
        `logdir` to save the specific run results. The subdirectory's
        name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'.
      log_vars: list, optional.
        Specifies the list of variables to log after each `n_print`
        steps. If None, will log all variables. If `[]`, no variables
        will be logged. `logdir` must be specified for variables to be
        logged.
      debug: bool, optional.
        If True, add checks for `NaN` and `Inf` to all computations
        in the graph. May result in substantially slower execution
        times.
    """
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 100)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")

        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")

        self.scale = scale

        # map from original latent vars to unconstrained versions
        self.transformations = {}
        if auto_transform:
            latent_vars = self.latent_vars.copy()
            # latent_vars maps original latent vars to constrained Q's.
            # latent_vars_unconstrained maps unconstrained vars to unconstrained Q's.
            self.latent_vars = {}
            self.latent_vars_unconstrained = {}
            for z, qz in six.iteritems(latent_vars):
                if hasattr(z, 'support') and hasattr(qz, 'support') and \
                        z.support != qz.support and qz.support != 'point':

                    # transform z to an unconstrained space
                    z_unconstrained = transform(z)
                    self.transformations[z] = z_unconstrained

                    # make sure we also have a qz that covers the unconstrained space
                    if qz.support == "points":
                        qz_unconstrained = qz
                    else:
                        qz_unconstrained = transform(qz)
                    self.latent_vars_unconstrained[
                        z_unconstrained] = qz_unconstrained

                    # additionally construct the transformation of qz
                    # back into the original constrained space
                    if z_unconstrained != z:
                        qz_constrained = transform(
                            qz_unconstrained,
                            bijectors.Invert(z_unconstrained.bijector))

                        try:  # attempt to pushforward the params of Empirical distributions
                            qz_constrained.params = z_unconstrained.bijector.inverse(
                                qz_unconstrained.params)
                        except:  # qz_unconstrained is not an Empirical distribution
                            pass

                    else:
                        qz_constrained = qz_unconstrained

                    self.latent_vars[z] = qz_constrained
                else:
                    self.latent_vars[z] = qz
                    self.latent_vars_unconstrained[z] = qz
            del latent_vars

        if logdir is not None:
            self.logging = True
            if log_timestamp:
                logdir = os.path.expanduser(logdir)
                logdir = os.path.join(
                    logdir,
                    datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

            self._summary_key = tf.get_default_graph().unique_name("summaries")
            self._set_log_variables(log_vars)
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()

        # Store reset ops which user can call. Subclasses should append
        # any ops needed to reset internal variables in inference.
        self.reset = [tf.variables_initializer([self.t])]

    @abc.abstractmethod
    def update(self, feed_dict=None):
        """Run one iteration of inference.

    Any derived class of `Inference` **must** implement this method.

    Args:
      feed_dict: dict, optional.
        Feed dictionary for a TensorFlow session run. It is used to feed
        placeholders that are not fed during initialization.

    Returns:
      dict.
        Dictionary of algorithm-specific information.
    """
        if feed_dict is None:
            feed_dict = {}

        for key, value in six.iteritems(self.data):
            if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
                feed_dict[key] = value

        sess = get_session()
        t = sess.run(self.increment_t)

        if self.debug:
            sess.run(self.op_check, feed_dict)

        if self.logging and self.n_print != 0:
            if t == 1 or t % self.n_print == 0:
                summary = sess.run(self.summarize, feed_dict)
                self.train_writer.add_summary(summary, t)

        return {'t': t}

    def print_progress(self, info_dict):
        """Print progress to output.

    Args:
      info_dict: dict.
        Dictionary of algorithm-specific information.
    """
        if self.n_print != 0:
            t = info_dict['t']
            if t == 1 or t % self.n_print == 0:
                self.progbar.update(t)

    def finalize(self):
        """Function to call after convergence.
    """
        if self.logging:
            self.train_writer.close()

    def _set_log_variables(self, log_vars=None):
        """Log variables to TensorBoard.

    For each variable in `log_vars`, forms a `tf.summary.scalar` if
    the variable has scalar shape; otherwise forms a `tf.summary.histogram`.

    Args:
      log_vars: list, optional.
        Specifies the list of variables to log after each `n_print`
        steps. If None, will log all variables. If `[]`, no variables
        will be logged.
    """
        if log_vars is None:
            log_vars = []
            for key in six.iterkeys(self.data):
                log_vars += get_variables(key)

            for key, value in six.iteritems(self.latent_vars):
                log_vars += get_variables(key)
                log_vars += get_variables(value)

            log_vars = set(log_vars)

        for var in log_vars:
            # replace colons which are an invalid character
            var_name = var.name.replace(':', '/')
            # Log all scalars.
            if len(var.shape) == 0:
                tf.summary.scalar("parameter/{}".format(var_name),
                                  var,
                                  collections=[self._summary_key])
            elif len(var.shape) == 1 and var.shape[0] == 1:
                tf.summary.scalar("parameter/{}".format(var_name),
                                  var[0],
                                  collections=[self._summary_key])
            else:
                # If var is multi-dimensional, log a histogram of its values.
                tf.summary.histogram("parameter/{}".format(var_name),
                                     var,
                                     collections=[self._summary_key])
data = {x: x_ph}
inference = ed.ReparameterizationKLKLqp({z: qz}, data)
optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
inference.initialize(optimizer=optimizer, use_prettytensor=True)

hidden_rep = tf.sigmoid(logits)

init = tf.global_variables_initializer()
init.run()

n_epoch = 100
n_iter_per_epoch = 1000
for epoch in range(n_epoch):
    avg_loss = 0.0

    pbar = Progbar(n_iter_per_epoch)
    for t in range(1, n_iter_per_epoch + 1):
        pbar.update(t)
        x_train, _ = mnist.train.next_batch(M)
        info_dict = inference.update(feed_dict={x_ph: x_train})
        avg_loss += info_dict['loss']

    # Print a lower bound to the average marginal likelihood for an
    # image.
    avg_loss = avg_loss / n_iter_per_epoch
    avg_loss = avg_loss / M
    print("log p(x) >= {:0.3f}".format(avg_loss))

    # Visualize hidden representations.
    imgs = hidden_rep.eval()
    for m in range(M):
예제 #13
0
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   logdir=None,
                   log_timestamp=True,
                   log_vars=None,
                   debug=False):
        """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Any derived class of ``Inference`` **must** implement this method.
    No methods which build ops should be called outside ``initialize()``.

    Parameters
    ----------
    n_iter : int, optional
      Number of iterations for algorithm.
    n_print : int, optional
      Number of iterations for each print progress. To suppress print
      progress, then specify 0. Default is ``int(n_iter / 100)``.
    scale : dict of RandomVariable to tf.Tensor, optional
      A tensor to scale computation for any random variable that it is
      binded to. Its shape must be broadcastable; it is multiplied
      element-wise to the random variable. For example, this is useful
      for mini-batch scaling when inferring global variables, or
      applying masks on a random variable.
    logdir : str, optional
      Directory where event file will be written. For details,
      see ``tf.summary.FileWriter``. Default is to log nothing.
    log_timestamp : bool, optional
      If True (and ``logdir`` is specified), create a subdirectory of
      ``logdir`` to save the specific run results. The subdirectory's
      name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'.
    log_vars : list, optional
      Specifies the list of variables to log after each ``n_print``
      steps. If None, will log all variables. If ``[]``, no variables
      will be logged. ``logdir`` must be specified for variables to be
      logged.
    debug : bool, optional
      If True, add checks for ``NaN`` and ``Inf`` to all computations
      in the graph. May result in substantially slower execution
      times.
    """
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 100)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")

        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")

        self.scale = scale

        if logdir is not None:
            self.logging = True
            if log_timestamp:
                logdir = os.path.join(
                    logdir,
                    datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

            self._set_log_variables(log_vars)
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
            self.summarize = tf.summary.merge_all()
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()

        # Store reset ops which user can call. Subclasses should append
        # any ops needed to reset internal variables in inference.
        self.reset = [tf.variables_initializer([self.t])]
예제 #14
0
class Inference(object):
  """Base class for Edward inference methods.

  Attributes
  ----------
  latent_vars : dict
    Collection of latent variables (of type ``RandomVariable`` or
    ``tf.Tensor``) to perform inference on. Each random variable is
    binded to another random variable; the latter will infer the
    former conditional on data.
  data : dict
    Data dictionary which binds observed variables (of type
    ``RandomVariable`` or ``tf.Tensor``) to their realizations (of
    type ``tf.Tensor``).
  """
  def __init__(self, latent_vars=None, data=None):
    """Initialization.

    Parameters
    ----------
    latent_vars : dict, optional
      Collection of latent variables (of type ``RandomVariable`` or
      ``tf.Tensor``) to perform inference on. Each random variable is
      binded to another random variable; the latter will infer the
      former conditional on data.
    data : dict, optional
      Data dictionary which binds observed variables (of type
      ``RandomVariable`` or ``tf.Tensor``) to their realizations (of
      type ``tf.Tensor``). It can also bind placeholders (of type
      ``tf.Tensor``) used in the model to their realizations; and
      prior latent variables (of type ``RandomVariable``) to posterior
      latent variables (of type ``RandomVariable``).

    Notes
    -----
    If ``data`` is not passed in, the dictionary is empty.

    Three options are available for batch training:

    1. internally if user passes in data as a dictionary of NumPy
       arrays;
    2. externally if user passes in data as a dictionary of
       TensorFlow placeholders (and manually feeds them);
    3. externally if user passes in data as TensorFlow tensors
       which are the outputs of data readers.

    Examples
    --------
    >>> mu = Normal(mu=tf.constant(0.0), sigma=tf.constant(1.0))
    >>> x = Normal(mu=tf.ones(N) * mu, sigma=tf.constant(1.0))
    >>>
    >>> qmu_mu = tf.Variable(tf.random_normal([1]))
    >>> qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([1])))
    >>> qmu = Normal(mu=qmu_mu, sigma=qmu_sigma)
    >>>
    >>> Inference({mu: qmu}, {x: tf.constant([0.0] * N)})
    """
    sess = get_session()
    if latent_vars is None:
      latent_vars = {}
    elif not isinstance(latent_vars, dict):
      raise TypeError("latent_vars must have type dict.")

    for key, value in six.iteritems(latent_vars):
      if not isinstance(key, (RandomVariable, tf.Tensor)):
        raise TypeError("Latent variable key has an invalid type.")
      elif not isinstance(value, (RandomVariable, tf.Tensor)):
        raise TypeError("Latent variable value has an invalid type.")
      elif not key.get_shape().is_compatible_with(value.get_shape()):
        raise TypeError("Latent variable bindings do not have same shape.")

    self.latent_vars = latent_vars

    if data is None:
      data = {}
    elif not isinstance(data, dict):
      raise TypeError("data must have type dict.")

    self.data = {}
    for key, value in six.iteritems(data):
      if isinstance(key, RandomVariable) or \
         (isinstance(key, tf.Tensor) and "Placeholder" not in key.op.type):
        if isinstance(value, tf.Tensor):
          if not key.get_shape().is_compatible_with(value.get_shape()):
            raise TypeError("Observed variable bindings do not have same "
                            "shape.")

          self.data[key] = tf.cast(value, tf.float32)
        elif isinstance(value, RandomVariable):
          if not key.get_shape().is_compatible_with(value.get_shape()):
            raise TypeError("Observed variable bindings do not have same "
                            "shape.")

          self.data[key] = value
        elif isinstance(value, np.ndarray):
          if not key.get_shape().is_compatible_with(value.shape):
            raise TypeError("Observed variable bindings do not have same "
                            "shape.")

          # If value is a np.ndarray, store it in the graph. Assign its
          # placeholder to an appropriate data type.
          if np.issubdtype(value.dtype, np.float):
            ph_type = tf.float32
          elif np.issubdtype(value.dtype, np.int):
            ph_type = tf.int32
          else:
            raise TypeError("Data value has an unsupported type.")
          ph = tf.placeholder(ph_type, value.shape)
          var = tf.Variable(ph, trainable=False, collections=[])
          self.data[key] = var
          sess.run(var.initializer, {ph: value})
        elif isinstance(value, np.number):
          if np.issubdtype(value.dtype, np.float):
            ph_type = tf.float32
          elif np.issubdtype(value.dtype, np.int):
            ph_type = tf.int32
          else:
              raise TypeError("Data value as an invalid type.")
          ph = tf.placeholder(ph_type, value.shape)
          var = tf.Variable(ph, trainable=False, collections=[])
          self.data[key] = var
          sess.run(var.initializer, {ph: value})
        elif isinstance(value, float):
          ph_type = tf.float32
          ph = tf.placeholder(ph_type, ())
          var = tf.Variable(ph, trainable=False, collections=[])
          self.data[key] = var
          sess.run(var.initializer, {ph: value})
        elif isinstance(value, int):
          ph_type = tf.int32
          ph = tf.placeholder(ph_type, ())
          var = tf.Variable(ph, trainable=False, collections=[])
          self.data[key] = var
          # handle if value is `bool` which this case catches
          sess.run(var.initializer, {ph: int(value)})
        else:
          raise TypeError("Data value has an invalid type.")
      elif isinstance(key, tf.Tensor):
        if isinstance(value, RandomVariable):
          raise TypeError("Data placeholder cannot be bound to a "
                          "RandomVariable.")

        self.data[key] = value
      else:
        raise TypeError("Data key has an invalid type.")

  def run(self, variables=None, use_coordinator=True, *args, **kwargs):
    """A simple wrapper to run inference.

    1. Initialize algorithm via ``initialize``.
    2. (Optional) Build a TensorFlow summary writer for TensorBoard.
    3. (Optional) Initialize TensorFlow variables.
    4. (Optional) Start queue runners.
    5. Run ``update`` for ``self.n_iter`` iterations.
    6. While running, ``print_progress``.
    7. Finalize algorithm via ``finalize``.
    8. (Optional) Stop queue runners.

    To customize the way inference is run, run these steps
    individually.

    Parameters
    ----------
    variables : list, optional
      A list of TensorFlow variables to initialize during inference.
      Default is to initialize all variables (this includes
      reinitializing variables that were already initialized). To
      avoid initializing any variables, pass in an empty list.
    use_coordinator : bool, optional
      Whether to start and stop queue runners during inference using a
      TensorFlow coordinator. For example, queue runners are necessary
      for batch training with file readers.
    *args
      Passed into ``initialize``.
    **kwargs
      Passed into ``initialize``.
    """
    self.initialize(*args, **kwargs)

    if variables is None:
      init = tf.global_variables_initializer()
    else:
      init = tf.variables_initializer(variables)

    # Feed placeholders in case initialization depends on them.
    feed_dict = {}
    for key, value in six.iteritems(self.data):
      if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
        feed_dict[key] = value

    init.run(feed_dict)

    if use_coordinator:
      # Start input enqueue threads.
      self.coord = tf.train.Coordinator()
      self.threads = tf.train.start_queue_runners(coord=self.coord)

    for _ in range(self.n_iter):
      info_dict = self.update()
      self.print_progress(info_dict)

    self.finalize()

    if use_coordinator:
      # Ask threads to stop.
      self.coord.request_stop()
      self.coord.join(self.threads)

  def initialize(self, n_iter=1000, n_print=None, scale=None, logdir=None,
                 debug=False):
    """Initialize inference algorithm.

    Parameters
    ----------
    n_iter : int, optional
      Number of iterations for algorithm.
    n_print : int, optional
      Number of iterations for each print progress. To suppress print
      progress, then specify 0. Default is ``int(n_iter / 10)``.
    scale : dict of RandomVariable to tf.Tensor, optional
      A tensor to scale computation for any random variable that it is
      binded to. Its shape must be broadcastable; it is multiplied
      element-wise to the random variable. For example, this is useful
      for mini-batch scaling when inferring global variables, or
      applying masks on a random variable.
    logdir : str, optional
      Directory where event file will be written. For details,
      see ``tf.summary.FileWriter``. Default is to write nothing.
    debug : bool, optional
      If True, add checks for ``NaN`` and ``Inf`` to all computations
      in the graph. May result in substantially slower execution
      times.
    """
    self.n_iter = n_iter
    if n_print is None:
      self.n_print = int(n_iter / 10)
    else:
      self.n_print = n_print

    self.progbar = Progbar(self.n_iter)
    self.t = tf.Variable(0, trainable=False)
    self.increment_t = self.t.assign_add(1)

    if scale is None:
      scale = {}
    elif not isinstance(scale, dict):
      raise TypeError()

    self.scale = scale

    if logdir is not None:
      self.logging = True
      self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
      self.summarize = tf.summary.merge_all()
    else:
      self.logging = False

    self.debug = debug
    if self.debug:
      self.op_check = tf.add_check_numerics_ops()

  def update(self, feed_dict=None):
    """Run one iteration of inference.

    Parameters
    ----------
    feed_dict : dict, optional
      Feed dictionary for a TensorFlow session run. It is used to feed
      placeholders that are not fed during initialization.

    Returns
    -------
    dict
      Dictionary of algorithm-specific information.
    """
    if feed_dict is None:
      feed_dict = {}

    for key, value in six.iteritems(self.data):
      if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
        feed_dict[key] = value

    sess = get_session()
    t = sess.run(self.increment_t)

    if self.debug:
      sess.run(self.op_check)

    if self.logging and self.n_print != 0:
      if t == 1 or t % self.n_print == 0:
        summary = sess.run(self.summarize, feed_dict)
        self.train_writer.add_summary(summary, t)

    return {'t': t}

  def print_progress(self, info_dict):
    """Print progress to output.

    Parameters
    ----------
    info_dict : dict
      Dictionary of algorithm-specific information.
    """
    if self.n_print != 0:
      t = info_dict['t']
      if t == 1 or t % self.n_print == 0:
        self.progbar.update(t)

  def finalize(self):
    """Function to call after convergence.
    """
    if self.logging:
      self.train_writer.close()
예제 #15
0
def main(_):
  ed.set_seed(42)

  # DATA
  x_train, metadata = nips(FLAGS.data_dir)
  documents = metadata['columns']
  words = metadata['rows']

  # Subset to documents in 2011 and words appearing in at least two
  # documents and have a total word count of at least 10.
  doc_idx = [i for i, document in enumerate(documents)
             if document.startswith('2011')]
  documents = [documents[doc] for doc in doc_idx]
  x_train = x_train[:, doc_idx]
  word_idx = np.logical_and(np.sum(x_train != 0, 1) >= 2,
                            np.sum(x_train, 1) >= 10)
  words = [word for word, idx in zip(words, word_idx) if idx]
  x_train = x_train[word_idx, :]
  x_train = x_train.T

  N = x_train.shape[0]  # number of documents
  D = x_train.shape[1]  # vocabulary size

  # MODEL
  W2 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[2], FLAGS.K[1]])
  W1 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[1], FLAGS.K[0]])
  W0 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[0], D])

  z3 = Gamma(0.1, 0.1, sample_shape=[N, FLAGS.K[2]])
  z2 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z3, W2))
  z1 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z2, W1))
  x = Poisson(tf.matmul(z1, W0))

  # INFERENCE
  qW2 = pointmass_q(W2.shape)
  qW1 = pointmass_q(W1.shape)
  qW0 = pointmass_q(W0.shape)
  if FLAGS.q == 'gamma':
    qz3 = gamma_q(z3.shape)
    qz2 = gamma_q(z2.shape)
    qz1 = gamma_q(z1.shape)
  else:
    qz3 = lognormal_q(z3.shape)
    qz2 = lognormal_q(z2.shape)
    qz1 = lognormal_q(z1.shape)

  # We apply variational EM with E-step over local variables
  # and M-step to point estimate the global weight matrices.
  inference_e = ed.KLqp({z1: qz1, z2: qz2, z3: qz3},
                        data={x: x_train, W0: qW0, W1: qW1, W2: qW2})
  inference_m = ed.MAP({W0: qW0, W1: qW1, W2: qW2},
                       data={x: x_train, z1: qz1, z2: qz2, z3: qz3})

  optimizer_e = tf.train.RMSPropOptimizer(FLAGS.lr)
  optimizer_m = tf.train.RMSPropOptimizer(FLAGS.lr)
  kwargs = {'optimizer': optimizer_e,
            'n_print': 100,
            'logdir': FLAGS.logdir,
            'log_timestamp': False}
  if FLAGS.q == 'gamma':
    kwargs['n_samples'] = 30
  inference_e.initialize(**kwargs)
  inference_m.initialize(optimizer=optimizer_m)

  sess = ed.get_session()
  tf.global_variables_initializer().run()

  n_epoch = 20
  n_iter_per_epoch = 10000
  for epoch in range(n_epoch):
    print("Epoch {}".format(epoch))
    nll = 0.0

    pbar = Progbar(n_iter_per_epoch)
    for t in range(1, n_iter_per_epoch + 1):
      pbar.update(t)
      info_dict_e = inference_e.update()
      info_dict_m = inference_m.update()
      nll += info_dict_e['loss']

    # Compute perplexity averaged over a number of training iterations.
    # The model's negative log-likelihood of data is upper bounded by
    # the variational objective.
    nll /= n_iter_per_epoch
    perplexity = np.exp(nll / np.sum(x_train))
    print("Negative log-likelihood <= {:0.3f}".format(nll))
    print("Perplexity <= {:0.3f}".format(perplexity))

    # Print top 10 words for first 10 topics.
    qW0_vals = sess.run(qW0)
    for k in range(10):
      top_words_idx = qW0_vals[k, :].argsort()[-10:][::-1]
      top_words = " ".join([words[i] for i in top_words_idx])
      print("Topic {}: {}".format(k, top_words))
예제 #16
0
파일: inference.py 프로젝트: JoyceYa/edward
  def initialize(self, n_iter=1000, n_print=None, scale=None,
                 auto_transform=True, logdir=None, log_timestamp=True,
                 log_vars=None, debug=False):
    """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Any derived class of `Inference` **must** implement this method.
    No methods which build ops should be called outside `initialize()`.

    Args:
      n_iter: int.
        Number of iterations for algorithm when calling `run()`.
        Alternatively if controlling inference manually, it is the
        expected number of calls to `update()`; this number determines
        tracking information during the print progress.
      n_print: int.
        Number of iterations for each print progress. To suppress print
        progress, then specify 0. Default is `int(n_iter / 100)`.
      scale: dict of RandomVariable to tf.Tensor.
        A tensor to scale computation for any random variable that it is
        binded to. Its shape must be broadcastable; it is multiplied
        element-wise to the random variable. For example, this is useful
        for mini-batch scaling when inferring global variables, or
        applying masks on a random variable.
      auto_transform: bool.
        Whether to automatically transform continuous latent variables
        of unequal support to be on the unconstrained space. It is
        only applied if the argument is `True`, the latent variable
        pair are `ed.RandomVariable`s with the `support` attribute,
        the supports are both continuous and unequal.
      logdir: str.
        Directory where event file will be written. For details,
        see `tf.summary.FileWriter`. Default is to log nothing.
      log_timestamp: bool.
        If True (and `logdir` is specified), create a subdirectory of
        `logdir` to save the specific run results. The subdirectory's
        name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'.
      log_vars: list.
        Specifies the list of variables to log after each `n_print`
        steps. If None, will log all variables. If `[]`, no variables
        will be logged. `logdir` must be specified for variables to be
        logged.
      debug: bool.
        If True, add checks for `NaN` and `Inf` to all computations
        in the graph. May result in substantially slower execution
        times.
    """
    self.n_iter = n_iter
    if n_print is None:
      self.n_print = int(n_iter / 100)
    else:
      self.n_print = n_print

    self.progbar = Progbar(self.n_iter)
    self.t = tf.Variable(0, trainable=False, name="iteration")

    self.increment_t = self.t.assign_add(1)

    if scale is None:
      scale = {}
    elif not isinstance(scale, dict):
      raise TypeError("scale must be a dict object.")

    self.scale = scale

    # map from original latent vars to unconstrained versions
    self.transformations = {}
    if auto_transform:
      latent_vars = self.latent_vars.copy()
      # latent_vars maps original latent vars to constrained Q's.
      # latent_vars_unconstrained maps unconstrained vars to unconstrained Q's.
      self.latent_vars = {}
      self.latent_vars_unconstrained = {}
      for z, qz in six.iteritems(latent_vars):
        if hasattr(z, 'support') and hasattr(qz, 'support') and \
                z.support != qz.support and qz.support != 'point':

          # transform z to an unconstrained space
          z_unconstrained = transform(z)
          self.transformations[z] = z_unconstrained

          # make sure we also have a qz that covers the unconstrained space
          if qz.support == "points":
            qz_unconstrained = qz
          else:
            qz_unconstrained = transform(qz)
          self.latent_vars_unconstrained[z_unconstrained] = qz_unconstrained

          # additionally construct the transformation of qz
          # back into the original constrained space
          if z_unconstrained != z:
            qz_constrained = transform(
                qz_unconstrained, bijectors.Invert(z_unconstrained.bijector))

            try:  # attempt to pushforward the params of Empirical distributions
              qz_constrained.params = z_unconstrained.bijector.inverse(
                  qz_unconstrained.params)
            except:  # qz_unconstrained is not an Empirical distribution
              pass

          else:
            qz_constrained = qz_unconstrained

          self.latent_vars[z] = qz_constrained
        else:
          self.latent_vars[z] = qz
          self.latent_vars_unconstrained[z] = qz
      del latent_vars

    if logdir is not None:
      self.logging = True
      if log_timestamp:
        logdir = os.path.expanduser(logdir)
        logdir = os.path.join(
            logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

      self._summary_key = tf.get_default_graph().unique_name("summaries")
      self._set_log_variables(log_vars)
      self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
    else:
      self.logging = False

    self.debug = debug
    if self.debug:
      self.op_check = tf.add_check_numerics_ops()

    # Store reset ops which user can call. Subclasses should append
    # any ops needed to reset internal variables in inference.
    self.reset = [tf.variables_initializer([self.t])]
예제 #17
0
### predictive check

n_rep = 100  # number of replicated datasets we generate
holdout_gen = np.zeros((n_rep, x_train.shape[0], x_train.shape[1]))

for i in range(n_rep):
    x_generated = x_post.sample().eval()

    # look only at the heldout entries
    holdout_gen[i] = np.multiply(x_generated, holdout_mask)

n_eval = 10  # we draw samples from the inferred Z and W
obs_ll = []
rep_ll = []
pbar = Progbar(n_eval)
for j in range(n_eval):
    U_sample = U_post.sample().eval()
    V_sample = V_post.sample().eval()

    holdoutmean_sample = np.multiply(U_sample.dot(V_sample.T), holdout_mask)
    obs_ll.append(
        np.mean(np.ma.masked_invalid(
            stats.poisson.logpmf(np.array(x_vad, dtype=int),
                                 holdoutmean_sample)),
                axis=1))

    rep_ll.append(
        np.mean(np.ma.masked_invalid(
            stats.poisson.logpmf(holdout_gen, holdoutmean_sample)),
                axis=2))
예제 #18
0
class Inference(object):
    """Abstract base class for inference. All inference algorithms in
  Edward inherit from ``Inference``, sharing common methods and
  properties via a class hierarchy.

  Specific algorithms typically inherit from other subclasses of
  ``Inference`` rather than ``Inference`` directly. For example, one
  might inherit from the abstract classes ``MonteCarlo`` or
  ``VariationalInference``.

  To build an algorithm inheriting from ``Inference``, one must at the
  minimum implement ``initialize`` and ``update``: the former builds
  the computational graph for the algorithm; the latter runs the
  computational graph for the algorithm.

  To reset inference (e.g., internal variable counters incremented
  over training), fetch inference's reset ops from session with
  ``sess.run(inference.reset)``.
  """
    def __init__(self, latent_vars=None, data=None):
        """Initialization.

    Parameters
    ----------
    latent_vars : dict, optional
      Collection of latent variables (of type ``RandomVariable`` or
      ``tf.Tensor``) to perform inference on. Each random variable is
      binded to another random variable; the latter will infer the
      former conditional on data.
    data : dict, optional
      Data dictionary which binds observed variables (of type
      ``RandomVariable`` or ``tf.Tensor``) to their realizations (of
      type ``tf.Tensor``). It can also bind placeholders (of type
      ``tf.Tensor``) used in the model to their realizations; and
      prior latent variables (of type ``RandomVariable``) to posterior
      latent variables (of type ``RandomVariable``).

    Examples
    --------
    >>> mu = Normal(loc=tf.constant(0.0), scale=tf.constant(1.0))
    >>> x = Normal(loc=tf.ones(50) * mu, scale=tf.constant(1.0))
    >>>
    >>> qmu_loc = tf.Variable(tf.random_normal([]))
    >>> qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([])))
    >>> qmu = Normal(loc=qmu_loc, scale=qmu_scale)
    >>>
    >>> inference = ed.Inference({mu: qmu}, data={x: tf.zeros(50)})
    """
        sess = get_session()
        if latent_vars is None:
            latent_vars = {}
        if data is None:
            data = {}

        check_latent_vars(latent_vars)
        self.latent_vars = latent_vars

        check_data(data)
        self.data = {}
        for key, value in six.iteritems(data):
            if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
                self.data[key] = value
            elif isinstance(key, (RandomVariable, tf.Tensor)):
                if isinstance(value, (RandomVariable, tf.Tensor)):
                    self.data[key] = value
                elif isinstance(
                        value, (float, list, int, np.ndarray, np.number, str)):
                    # If value is a Python type, store it in the graph.
                    # Assign its placeholder with the key's data type.
                    with tf.variable_scope("data"):
                        ph = tf.placeholder(key.dtype, np.shape(value))
                        var = tf.Variable(ph, trainable=False, collections=[])
                        sess.run(var.initializer, {ph: value})
                        self.data[key] = var

    def run(self, variables=None, use_coordinator=True, *args, **kwargs):
        """A simple wrapper to run inference.

    1. Initialize algorithm via ``initialize``.
    2. (Optional) Build a TensorFlow summary writer for TensorBoard.
    3. (Optional) Initialize TensorFlow variables.
    4. (Optional) Start queue runners.
    5. Run ``update`` for ``self.n_iter`` iterations.
    6. While running, ``print_progress``.
    7. Finalize algorithm via ``finalize``.
    8. (Optional) Stop queue runners.

    To customize the way inference is run, run these steps
    individually.

    Parameters
    ----------
    variables : list, optional
      A list of TensorFlow variables to initialize during inference.
      Default is to initialize all variables (this includes
      reinitializing variables that were already initialized). To
      avoid initializing any variables, pass in an empty list.
    use_coordinator : bool, optional
      Whether to start and stop queue runners during inference using a
      TensorFlow coordinator. For example, queue runners are necessary
      for batch training with file readers.
    *args
      Passed into ``initialize``.
    **kwargs
      Passed into ``initialize``.
    """
        self.initialize(*args, **kwargs)

        if variables is None:
            init = tf.global_variables_initializer()
        else:
            init = tf.variables_initializer(variables)

        # Feed placeholders in case initialization depends on them.
        feed_dict = {}
        for key, value in six.iteritems(self.data):
            if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
                feed_dict[key] = value

        init.run(feed_dict)

        if use_coordinator:
            # Start input enqueue threads.
            self.coord = tf.train.Coordinator()
            self.threads = tf.train.start_queue_runners(coord=self.coord)

        for _ in range(self.n_iter):
            info_dict = self.update()
            self.print_progress(info_dict)

        self.finalize()

        if use_coordinator:
            # Ask threads to stop.
            self.coord.request_stop()
            self.coord.join(self.threads)

    @abc.abstractmethod
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   logdir=None,
                   log_timestamp=True,
                   log_vars=None,
                   debug=False):
        """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Any derived class of ``Inference`` **must** implement this method.
    No methods which build ops should be called outside ``initialize()``.

    Parameters
    ----------
    n_iter : int, optional
      Number of iterations for algorithm.
    n_print : int, optional
      Number of iterations for each print progress. To suppress print
      progress, then specify 0. Default is ``int(n_iter / 100)``.
    scale : dict of RandomVariable to tf.Tensor, optional
      A tensor to scale computation for any random variable that it is
      binded to. Its shape must be broadcastable; it is multiplied
      element-wise to the random variable. For example, this is useful
      for mini-batch scaling when inferring global variables, or
      applying masks on a random variable.
    logdir : str, optional
      Directory where event file will be written. For details,
      see ``tf.summary.FileWriter``. Default is to log nothing.
    log_timestamp : bool, optional
      If True (and ``logdir`` is specified), create a subdirectory of
      ``logdir`` to save the specific run results. The subdirectory's
      name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'.
    log_vars : list, optional
      Specifies the list of variables to log after each ``n_print``
      steps. If None, will log all variables. If ``[]``, no variables
      will be logged. ``logdir`` must be specified for variables to be
      logged.
    debug : bool, optional
      If True, add checks for ``NaN`` and ``Inf`` to all computations
      in the graph. May result in substantially slower execution
      times.
    """
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 100)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")

        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")

        self.scale = scale

        if logdir is not None:
            self.logging = True
            if log_timestamp:
                logdir = os.path.join(
                    logdir,
                    datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

            self._set_log_variables(log_vars)
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
            self.summarize = tf.summary.merge_all()
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()

        # Store reset ops which user can call. Subclasses should append
        # any ops needed to reset internal variables in inference.
        self.reset = [tf.variables_initializer([self.t])]

    @abc.abstractmethod
    def update(self, feed_dict=None):
        """Run one iteration of inference.

    Any derived class of ``Inference`` **must** implement this method.

    Parameters
    ----------
    feed_dict : dict, optional
      Feed dictionary for a TensorFlow session run. It is used to feed
      placeholders that are not fed during initialization.

    Returns
    -------
    dict
      Dictionary of algorithm-specific information.
    """
        if feed_dict is None:
            feed_dict = {}

        for key, value in six.iteritems(self.data):
            if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
                feed_dict[key] = value

        sess = get_session()
        t = sess.run(self.increment_t)

        if self.debug:
            sess.run(self.op_check, feed_dict)

        if self.logging and self.n_print != 0:
            if t == 1 or t % self.n_print == 0:
                summary = sess.run(self.summarize, feed_dict)
                self.train_writer.add_summary(summary, t)

        return {'t': t}

    def print_progress(self, info_dict):
        """Print progress to output.

    Parameters
    ----------
    info_dict : dict
      Dictionary of algorithm-specific information.
    """
        if self.n_print != 0:
            t = info_dict['t']
            if t == 1 or t % self.n_print == 0:
                self.progbar.update(t)

    def finalize(self):
        """Function to call after convergence.
    """
        if self.logging:
            self.train_writer.close()

    def _set_log_variables(self, log_vars=None):
        """Log variables to TensorBoard.

    For each variable in ``log_vars``, forms a ``tf.summary.scalar``if
    the variable has scalar shape; otherwise forms a``tf.summary.histogram``.

    Parameters
    ----------
    log_vars : list, optional
      Specifies the list of variables to log after each ``n_print``
      steps. If None, will log all variables. If ``[]``, no variables
      will be logged.
    """
        summary_key = 'summaries_' + str(id(self))
        if log_vars is None:
            log_vars = []
            for key in six.iterkeys(self.data):
                log_vars += get_variables(key)

            for key, value in six.iteritems(self.latent_vars):
                log_vars += get_variables(key)
                log_vars += get_variables(value)

            log_vars = set(log_vars)

        for var in log_vars:
            # replace colons which are an invalid character
            var_name = var.name.replace(':', '/')
            # Log all scalars.
            if len(var.shape) == 0:
                tf.summary.scalar("parameter/{}".format(var_name),
                                  var,
                                  collections=[summary_key])
            elif len(var.shape) == 1 and var.shape[0] == 1:
                tf.summary.scalar("parameter/{}".format(var_name),
                                  var[0],
                                  collections=[summary_key])
            else:
                # If var is multi-dimensional, log a histogram of its values.
                tf.summary.histogram("parameter/{}".format(var_name),
                                     var,
                                     collections=[summary_key])
예제 #19
0
파일: lstm.py 프로젝트: zueigung1419/edward
def main(_):
    ed.set_seed(42)

    # DATA
    x_train, _, x_test = text8(FLAGS.data_dir)
    vocab = string.ascii_lowercase + ' '
    vocab_size = len(vocab)
    encoder = dict(zip(vocab, range(vocab_size)))
    decoder = {v: k for k, v in encoder.items()}

    data = generator(x_train, FLAGS.batch_size, FLAGS.timesteps, encoder)

    # MODEL
    x_ph = tf.placeholder(tf.int32, [None, FLAGS.timesteps])
    with tf.variable_scope("language_model"):
        # Shift input sequence to right by 1, [0, x[0], ..., x[timesteps - 2]].
        x_ph_shift = tf.pad(x_ph, [[0, 0], [1, 0]])[:, :-1]
        x = language_model(x_ph_shift, vocab_size)

    with tf.variable_scope("language_model", reuse=True):
        x_gen = language_model_gen(5, vocab_size)

    imb = range(0, len(x_test) - FLAGS.timesteps, FLAGS.timesteps)
    encoded_x_test = np.asarray(
        [[encoder[c] for c in x_test[i:(i + FLAGS.timesteps)]] for i in imb],
        dtype=np.int32)
    test_size = encoded_x_test.shape[0]
    print("Test set shape: {}".format(encoded_x_test.shape))
    test_nll = -tf.reduce_sum(x.log_prob(x_ph))

    # INFERENCE
    inference = ed.MAP({}, {x: x_ph})

    optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr)
    inference.initialize(optimizer=optimizer,
                         logdir=FLAGS.log_dir,
                         log_timestamp=False)

    print("Number of sets of parameters: {}".format(
        len(tf.trainable_variables())))
    print("Number of parameters: {}".format(
        np.sum([np.prod(v.shape.as_list())
                for v in tf.trainable_variables()])))
    for v in tf.trainable_variables():
        print(v)

    sess = ed.get_session()
    tf.global_variables_initializer().run()

    # Double n_epoch and print progress every half an epoch.
    n_iter_per_epoch = len(x_train) // (FLAGS.batch_size * FLAGS.timesteps * 2)
    epoch = 0.0
    for _ in range(FLAGS.n_epoch * 2):
        epoch += 0.5
        print("Epoch: {0}".format(epoch))
        avg_nll = 0.0

        pbar = Progbar(n_iter_per_epoch)
        for t in range(1, n_iter_per_epoch + 1):
            pbar.update(t)
            x_batch = next(data)
            info_dict = inference.update({x_ph: x_batch})
            avg_nll += info_dict['loss']

        # Print average bits per character over epoch.
        avg_nll /= (n_iter_per_epoch * FLAGS.batch_size * FLAGS.timesteps *
                    np.log(2))
        print("Train average bits/char: {:0.8f}".format(avg_nll))

        # Print per-data point log-likelihood on test set.
        avg_nll = 0.0
        for start in range(0, test_size, batch_size):
            end = min(test_size, start + batch_size)
            x_batch = encoded_x_test[start:end]
            avg_nll += sess.run(test_nll, {x_ph: x_batch})

        avg_nll /= test_size
        print("Test average NLL: {:0.8f}".format(avg_nll))

        # Generate samples from model.
        samples = sess.run(x_gen)
        samples = [''.join([decoder[c] for c in sample]) for sample in samples]
        print("Samples:")
        for sample in samples:
            print(sample)
예제 #20
0
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   auto_transform=True,
                   logdir=None,
                   log_timestamp=True,
                   log_vars=None,
                   debug=False):
        """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Any derived class of `Inference` **must** implement this method.
    No methods which build ops should be called outside `initialize()`.

    Args:
      n_iter: int, optional.
        Number of iterations for algorithm when calling `run()`.
        Alternatively if controlling inference manually, it is the
        expected number of calls to `update()`; this number determines
        tracking information during the print progress.
      n_print: int, optional.
        Number of iterations for each print progress. To suppress print
        progress, then specify 0. Default is `int(n_iter / 100)`.
      scale: dict of RandomVariable to tf.Tensor, optional.
        A tensor to scale computation for any random variable that it is
        binded to. Its shape must be broadcastable; it is multiplied
        element-wise to the random variable. For example, this is useful
        for mini-batch scaling when inferring global variables, or
        applying masks on a random variable.
      auto_transform: bool, optional.
        Whether to automatically transform continuous latent variables
        of unequal support to be on the unconstrained space. It is
        only applied if the argument is `True`, the latent variable
        pair are `ed.RandomVariable`s with the `support` attribute,
        the supports are both continuous and unequal.
      logdir: str, optional.
        Directory where event file will be written. For details,
        see `tf.summary.FileWriter`. Default is to log nothing.
      log_timestamp: bool, optional.
        If True (and `logdir` is specified), create a subdirectory of
        `logdir` to save the specific run results. The subdirectory's
        name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'.
      log_vars: list, optional.
        Specifies the list of variables to log after each `n_print`
        steps. If None, will log all variables. If `[]`, no variables
        will be logged. `logdir` must be specified for variables to be
        logged.
      debug: bool, optional.
        If True, add checks for `NaN` and `Inf` to all computations
        in the graph. May result in substantially slower execution
        times.
    """
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 100)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")

        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")

        self.scale = scale

        # map from original latent vars to unconstrained versions
        self.transformations = {}
        if auto_transform:
            latent_vars = self.latent_vars.copy()
            # latent_vars maps original latent vars to constrained Q's.
            # latent_vars_unconstrained maps unconstrained vars to unconstrained Q's.
            self.latent_vars = {}
            self.latent_vars_unconstrained = {}
            for z, qz in six.iteritems(latent_vars):
                if hasattr(z, 'support') and hasattr(qz, 'support') and \
                        z.support != qz.support and qz.support != 'point':

                    # transform z to an unconstrained space
                    z_unconstrained = transform(z)
                    self.transformations[z] = z_unconstrained

                    # make sure we also have a qz that covers the unconstrained space
                    if qz.support == "points":
                        qz_unconstrained = qz
                    else:
                        qz_unconstrained = transform(qz)
                    self.latent_vars_unconstrained[
                        z_unconstrained] = qz_unconstrained

                    # additionally construct the transformation of qz
                    # back into the original constrained space
                    if z_unconstrained != z:
                        qz_constrained = transform(
                            qz_unconstrained,
                            bijectors.Invert(z_unconstrained.bijector))

                        try:  # attempt to pushforward the params of Empirical distributions
                            qz_constrained.params = z_unconstrained.bijector.inverse(
                                qz_unconstrained.params)
                        except:  # qz_unconstrained is not an Empirical distribution
                            pass

                    else:
                        qz_constrained = qz_unconstrained

                    self.latent_vars[z] = qz_constrained
                else:
                    self.latent_vars[z] = qz
                    self.latent_vars_unconstrained[z] = qz
            del latent_vars

        if logdir is not None:
            self.logging = True
            if log_timestamp:
                logdir = os.path.expanduser(logdir)
                logdir = os.path.join(
                    logdir,
                    datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

            self._summary_key = tf.get_default_graph().unique_name("summaries")
            self._set_log_variables(log_vars)
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()

        # Store reset ops which user can call. Subclasses should append
        # any ops needed to reset internal variables in inference.
        self.reset = [tf.variables_initializer([self.t])]
예제 #21
0
파일: lstm.py 프로젝트: JoyceYa/edward
def main(_):
  ed.set_seed(42)

  # DATA
  x_train, _, x_test = text8(FLAGS.data_dir)
  vocab = string.ascii_lowercase + ' '
  vocab_size = len(vocab)
  encoder = dict(zip(vocab, range(vocab_size)))
  decoder = {v: k for k, v in encoder.items()}

  data = generator(x_train, FLAGS.batch_size, FLAGS.timesteps, encoder)

  # MODEL
  x_ph = tf.placeholder(tf.int32, [None, FLAGS.timesteps])
  with tf.variable_scope("language_model"):
    # Shift input sequence to right by 1, [0, x[0], ..., x[timesteps - 2]].
    x_ph_shift = tf.pad(x_ph, [[0, 0], [1, 0]])[:, :-1]
    x = language_model(x_ph_shift, vocab_size)

  with tf.variable_scope("language_model", reuse=True):
    x_gen = language_model_gen(5, vocab_size)

  imb = range(0, len(x_test) - FLAGS.timesteps, FLAGS.timesteps)
  encoded_x_test = np.asarray(
      [[encoder[c] for c in x_test[i:(i + FLAGS.timesteps)]] for i in imb],
      dtype=np.int32)
  test_size = encoded_x_test.shape[0]
  print("Test set shape: {}".format(encoded_x_test.shape))
  test_nll = -tf.reduce_sum(x.log_prob(x_ph))

  # INFERENCE
  inference = ed.MAP({}, {x: x_ph})

  optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr)
  inference.initialize(optimizer=optimizer,
                       logdir=FLAGS.log_dir,
                       log_timestamp=False)

  print("Number of sets of parameters: {}".format(
      len(tf.trainable_variables())))
  print("Number of parameters: {}".format(
      np.sum([np.prod(v.shape.as_list()) for v in tf.trainable_variables()])))
  for v in tf.trainable_variables():
    print(v)

  sess = ed.get_session()
  tf.global_variables_initializer().run()

  # Double n_epoch and print progress every half an epoch.
  n_iter_per_epoch = len(x_train) // (FLAGS.batch_size * FLAGS.timesteps * 2)
  epoch = 0.0
  for _ in range(FLAGS.n_epoch * 2):
    epoch += 0.5
    print("Epoch: {0}".format(epoch))
    avg_nll = 0.0

    pbar = Progbar(n_iter_per_epoch)
    for t in range(1, n_iter_per_epoch + 1):
      pbar.update(t)
      x_batch = next(data)
      info_dict = inference.update({x_ph: x_batch})
      avg_nll += info_dict['loss']

    # Print average bits per character over epoch.
    avg_nll /= (n_iter_per_epoch * FLAGS.batch_size * FLAGS.timesteps *
                np.log(2))
    print("Train average bits/char: {:0.8f}".format(avg_nll))

    # Print per-data point log-likelihood on test set.
    avg_nll = 0.0
    for start in range(0, test_size, batch_size):
      end = min(test_size, start + batch_size)
      x_batch = encoded_x_test[start:end]
      avg_nll += sess.run(test_nll, {x_ph: x_batch})

    avg_nll /= test_size
    print("Test average NLL: {:0.8f}".format(avg_nll))

    # Generate samples from model.
    samples = sess.run(x_gen)
    samples = [''.join([decoder[c] for c in sample]) for sample in samples]
    print("Samples:")
    for sample in samples:
      print(sample)
# Bind p(x, z) and q(z | x) to the same placeholder for x.
data = {x: x_ph}
inference = ed.ReparameterizationKLKLqp({z: qz}, data)
optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
inference.initialize(optimizer=optimizer, use_prettytensor=True)

hidden_rep = tf.sigmoid(logits)

tf.global_variables_initializer().run()

n_epoch = 100
n_iter_per_epoch = 1000
for epoch in range(n_epoch):
  avg_loss = 0.0

  pbar = Progbar(n_iter_per_epoch)
  for t in range(1, n_iter_per_epoch + 1):
    pbar.update(t)
    x_train, _ = mnist.train.next_batch(M)
    x_train = np.random.binomial(1, x_train)
    info_dict = inference.update(feed_dict={x_ph: x_train})
    avg_loss += info_dict['loss']

  # Print a lower bound to the average marginal likelihood for an
  # image.
  avg_loss = avg_loss / n_iter_per_epoch
  avg_loss = avg_loss / M
  print("log p(x) >= {:0.3f}".format(avg_loss))

  # Visualize hidden representations.
  imgs = hidden_rep.eval()
예제 #23
0
파일: utils.py 프로젝트: qkuang/tf_gbds
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   auto_transform=True,
                   logdir=None,
                   log_timestamp=True,
                   log_vars=None,
                   debug=False,
                   optimizer=None,
                   var_list=None,
                   use_prettytensor=False,
                   global_step=None,
                   n_samples=1,
                   kl_scaling=None,
                   maxnorm=5.):

        if kl_scaling is None:
            kl_scaling = {}
        if n_samples <= 0:
            raise ValueError(
                "n_samples should be greater than zero: {}".format(n_samples))

        self.n_samples = n_samples
        self.kl_scaling = kl_scaling

        # from inference.py
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 100)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")
        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")
        self.scale = scale

        self.transformations = {}
        if auto_transform:
            latent_vars = self.latent_vars.copy()
            self.latent_vars = {}
            self.latent_vars_unconstrained = {}
            for z, qz in six.iteritems(latent_vars):
                if hasattr(z, 'support') and hasattr(qz, 'support') and \
                        z.support != qz.support and qz.support != 'point':

                    z_unconstrained = transform(z)
                    self.transformations[z] = z_unconstrained

                    if qz.support == "points":
                        qz_unconstrained = qz
                    else:
                        qz_unconstrained = transform(qz)
                    self.latent_vars_unconstrained[
                        z_unconstrained] = qz_unconstrained

                    if z_unconstrained != z:
                        qz_constrained = transform(
                            qz_unconstrained,
                            bijectors.Invert(z_unconstrained.bijector))

                        try:
                            qz_constrained.params = \
                                    z_unconstrained.bijector.inverse(
                                        qz_unconstrained.params)
                        except:
                            pass
                    else:
                        qz_constrained = qz_unconstrained

                    self.latent_vars[z] = qz_constrained
                else:
                    self.latent_vars[z] = qz
                    self.latent_vars_unconstrained[z] = qz
            del latent_vars

        if logdir is not None:
            self.logging = True
            if log_timestamp:
                logdir = os.path.expanduser(logdir)
                logdir = os.path.join(
                    logdir,
                    datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

            self._summary_key = tf.get_default_graph().unique_name("summaries")
            self._set_log_variables(log_vars)
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()

        self.reset = [tf.variables_initializer([self.t])]

        # from variational_inference.py
        if var_list is None:
            var_list = set()
            trainables = tf.trainable_variables()
            for z, qz in six.iteritems(self.latent_vars):
                var_list.update(get_variables(z, collection=trainables))
                var_list.update(get_variables(qz, collection=trainables))

            for x, qx in six.iteritems(self.data):
                if isinstance(x, RandomVariable) and \
                        not isinstance(qx, RandomVariable):
                    var_list.update(get_variables(x, collection=trainables))

        var_list = list(var_list)

        self.loss, grads_and_vars = self.build_loss_and_gradients(var_list)

        clipped_grads_and_vars = []
        for grad, var in grads_and_vars:
            if "kernel" in var.name or "bias" in var.name:
                clipped_grads_and_vars.append((tf.clip_by_norm(grad,
                                                               maxnorm,
                                                               axes=[0]), var))
            else:
                clipped_grads_and_vars.append((grad, var))
        # for grad, var in grads_and_vars:
        #     clipped_grads_and_vars.append(
        #         (tf.clip_by_value(grad, -1000., 1000.), var))
        del grads_and_vars

        if self.logging:
            tf.summary.scalar("loss",
                              self.loss,
                              collections=[self._summary_key])
        for grad, var in clipped_grads_and_vars:
            tf.summary.histogram("gradient/" + var.name.replace(':', '/'),
                                 grad,
                                 collections=[self._summary_key])
            tf.summary.scalar("gradient_norm/" + var.name.replace(':', '/'),
                              tf.norm(grad),
                              collections=[self._summary_key])

        self.summarize = tf.summary.merge_all(key=self._summary_key)

        if optimizer is None and global_step is None:
            global_step = tf.Variable(0, trainable=False, name="global_step")

        if isinstance(global_step, tf.Variable):
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       global_step,
                                                       100,
                                                       0.9,
                                                       staircase=True)
        else:
            learning_rate = 0.01

        # Build optimizer.
        if optimizer is None:
            optimizer = tf.train.AdamOptimizer(learning_rate)
        elif isinstance(optimizer, str):
            if optimizer == 'gradientdescent':
                optimizer = tf.train.GradientDescentOptimizer(learning_rate)
            elif optimizer == 'adadelta':
                optimizer = tf.train.AdadeltaOptimizer(learning_rate)
            elif optimizer == 'adagrad':
                optimizer = tf.train.AdagradOptimizer(learning_rate)
            elif optimizer == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
            elif optimizer == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            elif optimizer == 'ftrl':
                optimizer = tf.train.FtrlOptimizer(learning_rate)
            elif optimizer == 'rmsprop':
                optimizer = tf.train.RMSPropOptimizer(learning_rate)
            else:
                raise ValueError('Optimizer class not found:', optimizer)
        elif not isinstance(optimizer, tf.train.Optimizer):
            raise TypeError(
                "Optimizer must be str, tf.train.Optimizer, or None.")

        with tf.variable_scope(None, default_name="optimizer") as scope:
            if not use_prettytensor:
                self.train = optimizer.apply_gradients(clipped_grads_and_vars,
                                                       global_step=global_step)
            else:
                import prettytensor as pt
                self.train = pt.apply_optimizer(optimizer,
                                                losses=[self.loss],
                                                global_step=global_step,
                                                var_list=var_list)

        self.reset.append(
            tf.variables_initializer(
                tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                  scope=scope.name)))