def initialize(self, n_iter=1000, n_print=None, scale=None, logdir=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computational graph. No ops should be created outside the call to ``initialize()``. Parameters ---------- n_iter : int, optional Number of iterations for algorithm. n_print : int, optional Number of iterations for each print progress. To suppress print progress, then specify 0. Default is ``int(n_iter / 10)``. scale : dict of RandomVariable to tf.Tensor, optional A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. logdir : str, optional Directory where event file will be written. For details, see ``tf.summary.FileWriter``. Default is to write nothing. debug : bool, optional If True, add checks for ``NaN`` and ``Inf`` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 10) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale if logdir is not None: self.logging = True self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) self.summarize = tf.summary.merge_all() else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops()
def main(_): ed.set_seed(42) # DATA. MNIST batches are fed at training time. (x_train, _), (x_test, _) = mnist(FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.M) # MODEL # Define a subgraph of the full model, corresponding to a minibatch of # size M. z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]), scale=tf.ones([FLAGS.M, FLAGS.d])) hidden = tf.layers.dense(z, 256, activation=tf.nn.relu) x = Bernoulli(logits=tf.layers.dense(hidden, 28 * 28)) # INFERENCE # Define a subgraph of the variational model, corresponding to a # minibatch of size M. x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28]) hidden = tf.layers.dense(tf.cast(x_ph, tf.float32), 256, activation=tf.nn.relu) qz = Normal(loc=tf.layers.dense(hidden, FLAGS.d), scale=tf.layers.dense(hidden, FLAGS.d, activation=tf.nn.softplus)) # Bind p(x, z) and q(z | x) to the same TensorFlow placeholder for x. inference = ed.KLqp({z: qz}, data={x: x_ph}) optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer) tf.global_variables_initializer().run() n_iter_per_epoch = x_train.shape[0] // FLAGS.M for epoch in range(1, FLAGS.n_epoch + 1): print("Epoch: {0}".format(epoch)) avg_loss = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) avg_loss += info_dict['loss'] # Print a lower bound to the average marginal likelihood for an # image. avg_loss /= n_iter_per_epoch avg_loss /= FLAGS.M print("-log p(x) <= {:0.3f}".format(avg_loss)) # Prior predictive check. images = x.eval() for m in range(FLAGS.M): imsave( os.path.join(FLAGS.out_dir, '%d.png') % m, images[m].reshape(28, 28))
def main(_): ed.set_seed(42) # DATA. MNIST batches are fed at training time. (x_train, _), (x_test, _) = mnist(FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.M) # MODEL # Define a subgraph of the full model, corresponding to a minibatch of # size M. z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]), scale=tf.ones([FLAGS.M, FLAGS.d])) hidden = tf.layers.dense(z, 256, activation=tf.nn.relu) x = Bernoulli(logits=tf.layers.dense(hidden, 28 * 28)) # INFERENCE # Define a subgraph of the variational model, corresponding to a # minibatch of size M. x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28]) hidden = tf.layers.dense(tf.cast(x_ph, tf.float32), 256, activation=tf.nn.relu) qz = Normal(loc=tf.layers.dense(hidden, FLAGS.d), scale=tf.layers.dense( hidden, FLAGS.d, activation=tf.nn.softplus)) # Bind p(x, z) and q(z | x) to the same TensorFlow placeholder for x. inference = ed.KLqp({z: qz}, data={x: x_ph}) optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer) tf.global_variables_initializer().run() n_iter_per_epoch = x_train.shape[0] // FLAGS.M for epoch in range(1, FLAGS.n_epoch + 1): print("Epoch: {0}".format(epoch)) avg_loss = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) avg_loss += info_dict['loss'] # Print a lower bound to the average marginal likelihood for an # image. avg_loss /= n_iter_per_epoch avg_loss /= FLAGS.M print("-log p(x) <= {:0.3f}".format(avg_loss)) # Prior predictive check. images = x.eval() for m in range(FLAGS.M): imsave(os.path.join(FLAGS.out_dir, '%d.png') % m, images[m].reshape(28, 28))
def main(_): ed.set_seed(42) # DATA. MNIST batches are fed at training time. (x_train, _), (x_test, _) = mnist(FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.M) # MODEL z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]), scale=tf.ones([FLAGS.M, FLAGS.d])) logits = generative_network(z) x = Bernoulli(logits=logits) # INFERENCE x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28]) loc, scale = inference_network(tf.cast(x_ph, tf.float32)) qz = Normal(loc=loc, scale=scale) # Bind p(x, z) and q(z | x) to the same placeholder for x. inference = ed.KLqp({z: qz}, data={x: x_ph}) optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer) hidden_rep = tf.sigmoid(logits) tf.global_variables_initializer().run() n_iter_per_epoch = x_train.shape[0] // FLAGS.M for epoch in range(1, FLAGS.n_epoch + 1): print("Epoch: {0}".format(epoch)) avg_loss = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) avg_loss += info_dict['loss'] # Print a lower bound to the average marginal likelihood for an # image. avg_loss /= n_iter_per_epoch avg_loss /= FLAGS.M print("-log p(x) <= {:0.3f}".format(avg_loss)) # Visualize hidden representations. images = hidden_rep.eval() for m in range(FLAGS.M): imsave( os.path.join(FLAGS.out_dir, '%d.png') % m, images[m].reshape(28, 28))
def main(_): ed.set_seed(42) # DATA. MNIST batches are fed at training time. (x_train, _), (x_test, _) = mnist(FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.M) # MODEL z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]), scale=tf.ones([FLAGS.M, FLAGS.d])) logits = generative_network(z) x = Bernoulli(logits=logits) # INFERENCE x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28]) loc, scale = inference_network(tf.cast(x_ph, tf.float32)) qz = Normal(loc=loc, scale=scale) # Bind p(x, z) and q(z | x) to the same placeholder for x. inference = ed.KLqp({z: qz}, data={x: x_ph}) optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer) hidden_rep = tf.sigmoid(logits) tf.global_variables_initializer().run() n_iter_per_epoch = x_train.shape[0] // FLAGS.M for epoch in range(1, FLAGS.n_epoch + 1): print("Epoch: {0}".format(epoch)) avg_loss = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) avg_loss += info_dict['loss'] # Print a lower bound to the average marginal likelihood for an # image. avg_loss /= n_iter_per_epoch avg_loss /= FLAGS.M print("-log p(x) <= {:0.3f}".format(avg_loss)) # Visualize hidden representations. images = hidden_rep.eval() for m in range(FLAGS.M): imsave(os.path.join(FLAGS.out_dir, '%d.png') % m, images[m].reshape(28, 28))
def main(_): ed.set_seed(42) # DATA (x_train, _), (x_test, _), (x_valid, _) = caltech101_silhouettes( FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.batch_size) x_ph = tf.placeholder(tf.int32, [None, 28 * 28]) # MODEL zs = [0] * len(FLAGS.hidden_sizes) for l in reversed(range(len(FLAGS.hidden_sizes))): if l == len(FLAGS.hidden_sizes) - 1: logits = tf.zeros([tf.shape(x_ph)[0], FLAGS.hidden_sizes[l]]) else: logits = tf.layers.dense(tf.cast(zs[l + 1], tf.float32), FLAGS.hidden_sizes[l], activation=None) zs[l] = Bernoulli(logits=logits) x = Bernoulli(logits=tf.layers.dense(tf.cast(zs[0], tf.float32), 28 * 28, activation=None)) # INFERENCE # Define variational model with reverse ordering as probability model: # if p is 15-100-300 from top-down, q is 300-100-15 from bottom-up. qzs = [0] * len(FLAGS.hidden_sizes) for l in range(len(FLAGS.hidden_sizes)): if l == 0: logits = tf.layers.dense(tf.cast(x_ph, tf.float32), FLAGS.hidden_sizes[l], activation=None) else: logits = tf.layers.dense(tf.cast(qzs[l - 1], tf.float32), FLAGS.hidden_sizes[l], activation=None) qzs[l] = Bernoulli(logits=logits) inference = ed.KLqp({z: qz for z, qz in zip(zs, qzs)}, data={x: x_ph}) optimizer = tf.train.AdamOptimizer(FLAGS.step_size) inference.initialize(optimizer=optimizer, n_samples=FLAGS.n_train_samples) # Build tensor for log-likelihood given one variational sample to run # on test data. x_post = ed.copy(x, {z: qz for z, qz in zip(zs, qzs)}) x_neg_log_prob = (-tf.reduce_sum(x_post.log_prob(x_ph)) / tf.cast(tf.shape(x_ph)[0], tf.float32)) sess = ed.get_session() tf.global_variables_initializer().run() for epoch in range(FLAGS.n_epoch): print("Epoch {}".format(epoch)) train_loss = 0.0 pbar = Progbar(FLAGS.n_iter_per_epoch) for t in range(1, FLAGS.n_iter_per_epoch + 1): pbar.update(t) x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) train_loss += info_dict['loss'] # Print per-data point loss, averaged over training epoch. train_loss /= FLAGS.n_iter_per_epoch train_loss /= FLAGS.batch_size print("Training negative log-likelihood: {:0.3f}".format(train_loss)) test_loss = [sess.run(x_neg_log_prob, {x_ph: x_test}) for _ in range(FLAGS.n_test_samples)] test_loss = np.mean(test_loss) print("Test negative log-likelihood: {:0.3f}".format(test_loss)) # Prior predictive check. images = sess.run(x, {x_ph: x_batch}) # feed ph to determine sample size for m in range(FLAGS.batch_size): imsave("{}/{}.png".format(out_dir, m), images[m].reshape(28, 28))
class Inference(object): """Abstract base class for inference. All inference algorithms in Edward inherit from `Inference`, sharing common methods and properties via a class hierarchy. Specific algorithms typically inherit from other subclasses of `Inference` rather than `Inference` directly. For example, one might inherit from the abstract classes `MonteCarlo` or `VariationalInference`. To build an algorithm inheriting from `Inference`, one must at the minimum implement `initialize` and `update`: the former builds the computational graph for the algorithm; the latter runs the computational graph for the algorithm. To reset inference (e.g., internal variable counters incremented over training), fetch inference's reset ops from session with `sess.run(inference.reset)`. #### Examples ```python mu = Normal(loc=tf.constant(0.0), scale=tf.constant(1.0)) x = Normal(loc=tf.ones(50) * mu, scale=tf.constant(1.0)) qmu_loc = tf.Variable(tf.random_normal([])) qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qmu = Normal(loc=qmu_loc, scale=qmu_scale) inference = ed.Inference({mu: qmu}, data={x: tf.zeros(50)}) ``` """ def __init__(self, latent_vars=None, data=None): """Create an inference algorithm. Args: latent_vars: dict, optional. Collection of latent variables (of type `RandomVariable` or `tf.Tensor`) to perform inference on. Each random variable is binded to another random variable; the latter will infer the former conditional on data. data: dict, optional. Data dictionary which binds observed variables (of type `RandomVariable` or `tf.Tensor`) to their realizations (of type `tf.Tensor`). It can also bind placeholders (of type `tf.Tensor`) used in the model to their realizations; and prior latent variables (of type `RandomVariable`) to posterior latent variables (of type `RandomVariable`). """ sess = get_session() if latent_vars is None: latent_vars = {} if data is None: data = {} check_latent_vars(latent_vars) self.latent_vars = latent_vars check_data(data) self.data = {} for key, value in six.iteritems(data): if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type: self.data[key] = value elif isinstance(key, (RandomVariable, tf.Tensor)): if isinstance(value, (RandomVariable, tf.Tensor)): self.data[key] = value elif isinstance(value, (float, list, int, np.ndarray, np.number, str)): # If value is a Python type, store it in the graph. # Assign its placeholder with the key's data type. with tf.variable_scope("data"): ph = tf.placeholder(key.dtype, np.shape(value)) var = tf.Variable(ph, trainable=False, collections=[]) sess.run(var.initializer, {ph: value}) self.data[key] = var def run(self, variables=None, use_coordinator=True, *args, **kwargs): """A simple wrapper to run inference. 1. Initialize algorithm via `initialize`. 2. (Optional) Build a TensorFlow summary writer for TensorBoard. 3. (Optional) Initialize TensorFlow variables. 4. (Optional) Start queue runners. 5. Run `update` for `self.n_iter` iterations. 6. While running, `print_progress`. 7. Finalize algorithm via `finalize`. 8. (Optional) Stop queue runners. To customize the way inference is run, run these steps individually. Args: variables: list, optional. A list of TensorFlow variables to initialize during inference. Default is to initialize all variables (this includes reinitializing variables that were already initialized). To avoid initializing any variables, pass in an empty list. use_coordinator: bool, optional. Whether to start and stop queue runners during inference using a TensorFlow coordinator. For example, queue runners are necessary for batch training with file readers. *args: Passed into `initialize`. **kwargs: Passed into `initialize`. """ self.initialize(*args, **kwargs) if variables is None: init = tf.global_variables_initializer() else: init = tf.variables_initializer(variables) # Feed placeholders in case initialization depends on them. feed_dict = {} for key, value in six.iteritems(self.data): if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type: feed_dict[key] = value init.run(feed_dict) if use_coordinator: # Start input enqueue threads. self.coord = tf.train.Coordinator() self.threads = tf.train.start_queue_runners(coord=self.coord) for _ in range(self.n_iter): info_dict = self.update() self.print_progress(info_dict) self.finalize() if use_coordinator: # Ask threads to stop. self.coord.request_stop() self.coord.join(self.threads) @abc.abstractmethod def initialize(self, n_iter=1000, n_print=None, scale=None, logdir=None, log_timestamp=True, log_vars=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Any derived class of `Inference` **must** implement this method. No methods which build ops should be called outside `initialize()`. Args: n_iter: int, optional. Number of iterations for algorithm when calling `run()`. Alternatively if controlling inference manually, it is the expected number of calls to `update()`; this number determines tracking information during the print progress. n_print: int, optional. Number of iterations for each print progress. To suppress print progress, then specify 0. Default is `int(n_iter / 100)`. scale: dict of RandomVariable to tf.Tensor, optional. A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. logdir: str, optional. Directory where event file will be written. For details, see `tf.summary.FileWriter`. Default is to log nothing. log_timestamp: bool, optional. If True (and `logdir` is specified), create a subdirectory of `logdir` to save the specific run results. The subdirectory's name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'. log_vars: list, optional. Specifies the list of variables to log after each `n_print` steps. If None, will log all variables. If `[]`, no variables will be logged. `logdir` must be specified for variables to be logged. debug: bool, optional. If True, add checks for `NaN` and `Inf` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) self.summarize = tf.summary.merge_all() else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() # Store reset ops which user can call. Subclasses should append # any ops needed to reset internal variables in inference. self.reset = [tf.variables_initializer([self.t])] @abc.abstractmethod def update(self, feed_dict=None): """Run one iteration of inference. Any derived class of `Inference` **must** implement this method. Args: feed_dict: dict, optional. Feed dictionary for a TensorFlow session run. It is used to feed placeholders that are not fed during initialization. Returns: dict. Dictionary of algorithm-specific information. """ if feed_dict is None: feed_dict = {} for key, value in six.iteritems(self.data): if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type: feed_dict[key] = value sess = get_session() t = sess.run(self.increment_t) if self.debug: sess.run(self.op_check, feed_dict) if self.logging and self.n_print != 0: if t == 1 or t % self.n_print == 0: summary = sess.run(self.summarize, feed_dict) self.train_writer.add_summary(summary, t) return {'t': t} def print_progress(self, info_dict): """Print progress to output. Args: info_dict: dict. Dictionary of algorithm-specific information. """ if self.n_print != 0: t = info_dict['t'] if t == 1 or t % self.n_print == 0: self.progbar.update(t) def finalize(self): """Function to call after convergence. """ if self.logging: self.train_writer.close() def _set_log_variables(self, log_vars=None): """Log variables to TensorBoard. For each variable in `log_vars`, forms a `tf.summary.scalar` if the variable has scalar shape; otherwise forms a `tf.summary.histogram`. Args: log_vars: list, optional. Specifies the list of variables to log after each `n_print` steps. If None, will log all variables. If `[]`, no variables will be logged. """ summary_key = 'summaries_' + str(id(self)) if log_vars is None: log_vars = [] for key in six.iterkeys(self.data): log_vars += get_variables(key) for key, value in six.iteritems(self.latent_vars): log_vars += get_variables(key) log_vars += get_variables(value) log_vars = set(log_vars) for var in log_vars: # replace colons which are an invalid character var_name = var.name.replace(':', '/') # Log all scalars. if len(var.shape) == 0: tf.summary.scalar("parameter/{}".format(var_name), var, collections=[summary_key]) elif len(var.shape) == 1 and var.shape[0] == 1: tf.summary.scalar("parameter/{}".format(var_name), var[0], collections=[summary_key]) else: # If var is multi-dimensional, log a histogram of its values. tf.summary.histogram("parameter/{}".format(var_name), var, collections=[summary_key])
def initialize(self, n_iter=1000, n_print=None, scale=None, logdir=None, log_timestamp=True, log_vars=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Any derived class of `Inference` **must** implement this method. No methods which build ops should be called outside `initialize()`. Args: n_iter: int, optional. Number of iterations for algorithm when calling `run()`. Alternatively if controlling inference manually, it is the expected number of calls to `update()`; this number determines tracking information during the print progress. n_print: int, optional. Number of iterations for each print progress. To suppress print progress, then specify 0. Default is `int(n_iter / 100)`. scale: dict of RandomVariable to tf.Tensor, optional. A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. logdir: str, optional. Directory where event file will be written. For details, see `tf.summary.FileWriter`. Default is to log nothing. log_timestamp: bool, optional. If True (and `logdir` is specified), create a subdirectory of `logdir` to save the specific run results. The subdirectory's name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'. log_vars: list, optional. Specifies the list of variables to log after each `n_print` steps. If None, will log all variables. If `[]`, no variables will be logged. `logdir` must be specified for variables to be logged. debug: bool, optional. If True, add checks for `NaN` and `Inf` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) self.summarize = tf.summary.merge_all() else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() # Store reset ops which user can call. Subclasses should append # any ops needed to reset internal variables in inference. self.reset = [tf.variables_initializer([self.t])]
def main(_): ed.set_seed(42) # DATA x_train, metadata = nips(FLAGS.data_dir) documents = metadata['columns'] words = metadata['rows'] # Subset to documents in 2011 and words appearing in at least two # documents and have a total word count of at least 10. doc_idx = [ i for i, document in enumerate(documents) if document.startswith('2011') ] documents = [documents[doc] for doc in doc_idx] x_train = x_train[:, doc_idx] word_idx = np.logical_and( np.sum(x_train != 0, 1) >= 2, np.sum(x_train, 1) >= 10) words = [word for word, idx in zip(words, word_idx) if idx] x_train = x_train[word_idx, :] x_train = x_train.T N = x_train.shape[0] # number of documents D = x_train.shape[1] # vocabulary size # MODEL W2 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[2], FLAGS.K[1]]) W1 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[1], FLAGS.K[0]]) W0 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[0], D]) z3 = Gamma(0.1, 0.1, sample_shape=[N, FLAGS.K[2]]) z2 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z3, W2)) z1 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z2, W1)) x = Poisson(tf.matmul(z1, W0)) # INFERENCE qW2 = pointmass_q(W2.shape) qW1 = pointmass_q(W1.shape) qW0 = pointmass_q(W0.shape) if FLAGS.q == 'gamma': qz3 = gamma_q(z3.shape) qz2 = gamma_q(z2.shape) qz1 = gamma_q(z1.shape) else: qz3 = lognormal_q(z3.shape) qz2 = lognormal_q(z2.shape) qz1 = lognormal_q(z1.shape) # We apply variational EM with E-step over local variables # and M-step to point estimate the global weight matrices. inference_e = ed.KLqp({ z1: qz1, z2: qz2, z3: qz3 }, data={ x: x_train, W0: qW0, W1: qW1, W2: qW2 }) inference_m = ed.MAP({ W0: qW0, W1: qW1, W2: qW2 }, data={ x: x_train, z1: qz1, z2: qz2, z3: qz3 }) optimizer_e = tf.train.RMSPropOptimizer(FLAGS.lr) optimizer_m = tf.train.RMSPropOptimizer(FLAGS.lr) kwargs = { 'optimizer': optimizer_e, 'n_print': 100, 'logdir': FLAGS.logdir, 'log_timestamp': False } if FLAGS.q == 'gamma': kwargs['n_samples'] = 30 inference_e.initialize(**kwargs) inference_m.initialize(optimizer=optimizer_m) sess = ed.get_session() tf.global_variables_initializer().run() n_epoch = 20 n_iter_per_epoch = 10000 for epoch in range(n_epoch): print("Epoch {}".format(epoch)) nll = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) info_dict_e = inference_e.update() info_dict_m = inference_m.update() nll += info_dict_e['loss'] # Compute perplexity averaged over a number of training iterations. # The model's negative log-likelihood of data is upper bounded by # the variational objective. nll /= n_iter_per_epoch perplexity = np.exp(nll / np.sum(x_train)) print("Negative log-likelihood <= {:0.3f}".format(nll)) print("Perplexity <= {:0.3f}".format(perplexity)) # Print top 10 words for first 10 topics. qW0_vals = sess.run(qW0) for k in range(10): top_words_idx = qW0_vals[k, :].argsort()[-10:][::-1] top_words = " ".join([words[i] for i in top_words_idx]) print("Topic {}: {}".format(k, top_words))
def main(_): ed.set_seed(42) # DATA (x_train, _), (x_test, _), (x_valid, _) = caltech101_silhouettes(FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.batch_size) x_ph = tf.placeholder(tf.int32, [None, 28 * 28]) # MODEL zs = [0] * len(FLAGS.hidden_sizes) for l in reversed(range(len(FLAGS.hidden_sizes))): if l == len(FLAGS.hidden_sizes) - 1: logits = tf.zeros([tf.shape(x_ph)[0], FLAGS.hidden_sizes[l]]) else: logits = tf.layers.dense(tf.cast(zs[l + 1], tf.float32), FLAGS.hidden_sizes[l], activation=None) zs[l] = Bernoulli(logits=logits) x = Bernoulli(logits=tf.layers.dense( tf.cast(zs[0], tf.float32), 28 * 28, activation=None)) # INFERENCE # Define variational model with reverse ordering as probability model: # if p is 15-100-300 from top-down, q is 300-100-15 from bottom-up. qzs = [0] * len(FLAGS.hidden_sizes) for l in range(len(FLAGS.hidden_sizes)): if l == 0: logits = tf.layers.dense(tf.cast(x_ph, tf.float32), FLAGS.hidden_sizes[l], activation=None) else: logits = tf.layers.dense(tf.cast(qzs[l - 1], tf.float32), FLAGS.hidden_sizes[l], activation=None) qzs[l] = Bernoulli(logits=logits) inference = ed.KLqp({z: qz for z, qz in zip(zs, qzs)}, data={x: x_ph}) optimizer = tf.train.AdamOptimizer(FLAGS.step_size) inference.initialize(optimizer=optimizer, n_samples=FLAGS.n_train_samples) # Build tensor for log-likelihood given one variational sample to run # on test data. x_post = ed.copy(x, {z: qz for z, qz in zip(zs, qzs)}) x_neg_log_prob = (-tf.reduce_sum(x_post.log_prob(x_ph)) / tf.cast(tf.shape(x_ph)[0], tf.float32)) sess = ed.get_session() tf.global_variables_initializer().run() for epoch in range(FLAGS.n_epoch): print("Epoch {}".format(epoch)) train_loss = 0.0 pbar = Progbar(FLAGS.n_iter_per_epoch) for t in range(1, FLAGS.n_iter_per_epoch + 1): pbar.update(t) x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) train_loss += info_dict['loss'] # Print per-data point loss, averaged over training epoch. train_loss /= FLAGS.n_iter_per_epoch train_loss /= FLAGS.batch_size print("Training negative log-likelihood: {:0.3f}".format(train_loss)) test_loss = [ sess.run(x_neg_log_prob, {x_ph: x_test}) for _ in range(FLAGS.n_test_samples) ] test_loss = np.mean(test_loss) print("Test negative log-likelihood: {:0.3f}".format(test_loss)) # Prior predictive check. images = sess.run(x, {x_ph: x_batch}) # feed ph to determine sample size for m in range(FLAGS.batch_size): imsave("{}/{}.png".format(out_dir, m), images[m].reshape(28, 28))
class Inference(object): """Abstract base class for inference. All inference algorithms in Edward inherit from `Inference`, sharing common methods and properties via a class hierarchy. Specific algorithms typically inherit from other subclasses of `Inference` rather than `Inference` directly. For example, one might inherit from the abstract classes `MonteCarlo` or `VariationalInference`. To build an algorithm inheriting from `Inference`, one must at the minimum implement `initialize` and `update`: the former builds the computational graph for the algorithm; the latter runs the computational graph for the algorithm. To reset inference (e.g., internal variable counters incremented over training), fetch inference's reset ops from session with `sess.run(inference.reset)`. #### Examples ```python # Set up probability model. mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=mu, scale=1.0, sample_shape=50) # Set up posterior approximation. qmu_loc = tf.Variable(tf.random_normal([])) qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qmu = Normal(loc=qmu_loc, scale=qmu_scale) inference = ed.Inference({mu: qmu}, data={x: tf.zeros(50)}) ``` """ def __init__(self, latent_vars=None, data=None): """Create an inference algorithm. Args: latent_vars: dict, optional. Collection of latent variables (of type `RandomVariable` or `tf.Tensor`) to perform inference on. Each random variable is binded to another random variable; the latter will infer the former conditional on data. data: dict, optional. Data dictionary which binds observed variables (of type `RandomVariable` or `tf.Tensor`) to their realizations (of type `tf.Tensor`). It can also bind placeholders (of type `tf.Tensor`) used in the model to their realizations; and prior latent variables (of type `RandomVariable`) to posterior latent variables (of type `RandomVariable`). """ sess = get_session() if latent_vars is None: latent_vars = {} if data is None: data = {} check_latent_vars(latent_vars) self.latent_vars = latent_vars check_data(data) self.data = {} for key, value in six.iteritems(data): if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type: self.data[key] = value elif isinstance(key, (RandomVariable, tf.Tensor)): if isinstance(value, (RandomVariable, tf.Tensor)): self.data[key] = value elif isinstance( value, (float, list, int, np.ndarray, np.number, str)): # If value is a Python type, store it in the graph. # Assign its placeholder with the key's data type. with tf.variable_scope(None, default_name="data"): ph = tf.placeholder(key.dtype, np.shape(value)) var = tf.Variable(ph, trainable=False, collections=[]) sess.run(var.initializer, {ph: value}) self.data[key] = var def run(self, variables=None, use_coordinator=True, *args, **kwargs): """A simple wrapper to run inference. 1. Initialize algorithm via `initialize`. 2. (Optional) Build a TensorFlow summary writer for TensorBoard. 3. (Optional) Initialize TensorFlow variables. 4. (Optional) Start queue runners. 5. Run `update` for `self.n_iter` iterations. 6. While running, `print_progress`. 7. Finalize algorithm via `finalize`. 8. (Optional) Stop queue runners. To customize the way inference is run, run these steps individually. Args: variables: list, optional. A list of TensorFlow variables to initialize during inference. Default is to initialize all variables (this includes reinitializing variables that were already initialized). To avoid initializing any variables, pass in an empty list. use_coordinator: bool, optional. Whether to start and stop queue runners during inference using a TensorFlow coordinator. For example, queue runners are necessary for batch training with file readers. *args, **kwargs: Passed into `initialize`. """ self.initialize(*args, **kwargs) if variables is None: init = tf.global_variables_initializer() else: init = tf.variables_initializer(variables) # Feed placeholders in case initialization depends on them. feed_dict = {} for key, value in six.iteritems(self.data): if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type: feed_dict[key] = value init.run(feed_dict) if use_coordinator: # Start input enqueue threads. self.coord = tf.train.Coordinator() self.threads = tf.train.start_queue_runners(coord=self.coord) for _ in range(self.n_iter): info_dict = self.update() self.print_progress(info_dict) self.finalize() if use_coordinator: # Ask threads to stop. self.coord.request_stop() self.coord.join(self.threads) @abc.abstractmethod def initialize(self, n_iter=1000, n_print=None, scale=None, auto_transform=True, logdir=None, log_timestamp=True, log_vars=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Any derived class of `Inference` **must** implement this method. No methods which build ops should be called outside `initialize()`. Args: n_iter: int, optional. Number of iterations for algorithm when calling `run()`. Alternatively if controlling inference manually, it is the expected number of calls to `update()`; this number determines tracking information during the print progress. n_print: int, optional. Number of iterations for each print progress. To suppress print progress, then specify 0. Default is `int(n_iter / 100)`. scale: dict of RandomVariable to tf.Tensor, optional. A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. auto_transform: bool, optional. Whether to automatically transform continuous latent variables of unequal support to be on the unconstrained space. It is only applied if the argument is `True`, the latent variable pair are `ed.RandomVariable`s with the `support` attribute, the supports are both continuous and unequal. logdir: str, optional. Directory where event file will be written. For details, see `tf.summary.FileWriter`. Default is to log nothing. log_timestamp: bool, optional. If True (and `logdir` is specified), create a subdirectory of `logdir` to save the specific run results. The subdirectory's name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'. log_vars: list, optional. Specifies the list of variables to log after each `n_print` steps. If None, will log all variables. If `[]`, no variables will be logged. `logdir` must be specified for variables to be logged. debug: bool, optional. If True, add checks for `NaN` and `Inf` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale # map from original latent vars to unconstrained versions self.transformations = {} if auto_transform: latent_vars = self.latent_vars.copy() # latent_vars maps original latent vars to constrained Q's. # latent_vars_unconstrained maps unconstrained vars to unconstrained Q's. self.latent_vars = {} self.latent_vars_unconstrained = {} for z, qz in six.iteritems(latent_vars): if hasattr(z, 'support') and hasattr(qz, 'support') and \ z.support != qz.support and qz.support != 'point': # transform z to an unconstrained space z_unconstrained = transform(z) self.transformations[z] = z_unconstrained # make sure we also have a qz that covers the unconstrained space if qz.support == "points": qz_unconstrained = qz else: qz_unconstrained = transform(qz) self.latent_vars_unconstrained[ z_unconstrained] = qz_unconstrained # additionally construct the transformation of qz # back into the original constrained space if z_unconstrained != z: qz_constrained = transform( qz_unconstrained, bijectors.Invert(z_unconstrained.bijector)) try: # attempt to pushforward the params of Empirical distributions qz_constrained.params = z_unconstrained.bijector.inverse( qz_unconstrained.params) except: # qz_unconstrained is not an Empirical distribution pass else: qz_constrained = qz_unconstrained self.latent_vars[z] = qz_constrained else: self.latent_vars[z] = qz self.latent_vars_unconstrained[z] = qz del latent_vars if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.expanduser(logdir) logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._summary_key = tf.get_default_graph().unique_name("summaries") self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() # Store reset ops which user can call. Subclasses should append # any ops needed to reset internal variables in inference. self.reset = [tf.variables_initializer([self.t])] @abc.abstractmethod def update(self, feed_dict=None): """Run one iteration of inference. Any derived class of `Inference` **must** implement this method. Args: feed_dict: dict, optional. Feed dictionary for a TensorFlow session run. It is used to feed placeholders that are not fed during initialization. Returns: dict. Dictionary of algorithm-specific information. """ if feed_dict is None: feed_dict = {} for key, value in six.iteritems(self.data): if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type: feed_dict[key] = value sess = get_session() t = sess.run(self.increment_t) if self.debug: sess.run(self.op_check, feed_dict) if self.logging and self.n_print != 0: if t == 1 or t % self.n_print == 0: summary = sess.run(self.summarize, feed_dict) self.train_writer.add_summary(summary, t) return {'t': t} def print_progress(self, info_dict): """Print progress to output. Args: info_dict: dict. Dictionary of algorithm-specific information. """ if self.n_print != 0: t = info_dict['t'] if t == 1 or t % self.n_print == 0: self.progbar.update(t) def finalize(self): """Function to call after convergence. """ if self.logging: self.train_writer.close() def _set_log_variables(self, log_vars=None): """Log variables to TensorBoard. For each variable in `log_vars`, forms a `tf.summary.scalar` if the variable has scalar shape; otherwise forms a `tf.summary.histogram`. Args: log_vars: list, optional. Specifies the list of variables to log after each `n_print` steps. If None, will log all variables. If `[]`, no variables will be logged. """ if log_vars is None: log_vars = [] for key in six.iterkeys(self.data): log_vars += get_variables(key) for key, value in six.iteritems(self.latent_vars): log_vars += get_variables(key) log_vars += get_variables(value) log_vars = set(log_vars) for var in log_vars: # replace colons which are an invalid character var_name = var.name.replace(':', '/') # Log all scalars. if len(var.shape) == 0: tf.summary.scalar("parameter/{}".format(var_name), var, collections=[self._summary_key]) elif len(var.shape) == 1 and var.shape[0] == 1: tf.summary.scalar("parameter/{}".format(var_name), var[0], collections=[self._summary_key]) else: # If var is multi-dimensional, log a histogram of its values. tf.summary.histogram("parameter/{}".format(var_name), var, collections=[self._summary_key])
data = {x: x_ph} inference = ed.ReparameterizationKLKLqp({z: qz}, data) optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer, use_prettytensor=True) hidden_rep = tf.sigmoid(logits) init = tf.global_variables_initializer() init.run() n_epoch = 100 n_iter_per_epoch = 1000 for epoch in range(n_epoch): avg_loss = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_train, _ = mnist.train.next_batch(M) info_dict = inference.update(feed_dict={x_ph: x_train}) avg_loss += info_dict['loss'] # Print a lower bound to the average marginal likelihood for an # image. avg_loss = avg_loss / n_iter_per_epoch avg_loss = avg_loss / M print("log p(x) >= {:0.3f}".format(avg_loss)) # Visualize hidden representations. imgs = hidden_rep.eval() for m in range(M):
def initialize(self, n_iter=1000, n_print=None, scale=None, logdir=None, log_timestamp=True, log_vars=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Any derived class of ``Inference`` **must** implement this method. No methods which build ops should be called outside ``initialize()``. Parameters ---------- n_iter : int, optional Number of iterations for algorithm. n_print : int, optional Number of iterations for each print progress. To suppress print progress, then specify 0. Default is ``int(n_iter / 100)``. scale : dict of RandomVariable to tf.Tensor, optional A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. logdir : str, optional Directory where event file will be written. For details, see ``tf.summary.FileWriter``. Default is to log nothing. log_timestamp : bool, optional If True (and ``logdir`` is specified), create a subdirectory of ``logdir`` to save the specific run results. The subdirectory's name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'. log_vars : list, optional Specifies the list of variables to log after each ``n_print`` steps. If None, will log all variables. If ``[]``, no variables will be logged. ``logdir`` must be specified for variables to be logged. debug : bool, optional If True, add checks for ``NaN`` and ``Inf`` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) self.summarize = tf.summary.merge_all() else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() # Store reset ops which user can call. Subclasses should append # any ops needed to reset internal variables in inference. self.reset = [tf.variables_initializer([self.t])]
class Inference(object): """Base class for Edward inference methods. Attributes ---------- latent_vars : dict Collection of latent variables (of type ``RandomVariable`` or ``tf.Tensor``) to perform inference on. Each random variable is binded to another random variable; the latter will infer the former conditional on data. data : dict Data dictionary which binds observed variables (of type ``RandomVariable`` or ``tf.Tensor``) to their realizations (of type ``tf.Tensor``). """ def __init__(self, latent_vars=None, data=None): """Initialization. Parameters ---------- latent_vars : dict, optional Collection of latent variables (of type ``RandomVariable`` or ``tf.Tensor``) to perform inference on. Each random variable is binded to another random variable; the latter will infer the former conditional on data. data : dict, optional Data dictionary which binds observed variables (of type ``RandomVariable`` or ``tf.Tensor``) to their realizations (of type ``tf.Tensor``). It can also bind placeholders (of type ``tf.Tensor``) used in the model to their realizations; and prior latent variables (of type ``RandomVariable``) to posterior latent variables (of type ``RandomVariable``). Notes ----- If ``data`` is not passed in, the dictionary is empty. Three options are available for batch training: 1. internally if user passes in data as a dictionary of NumPy arrays; 2. externally if user passes in data as a dictionary of TensorFlow placeholders (and manually feeds them); 3. externally if user passes in data as TensorFlow tensors which are the outputs of data readers. Examples -------- >>> mu = Normal(mu=tf.constant(0.0), sigma=tf.constant(1.0)) >>> x = Normal(mu=tf.ones(N) * mu, sigma=tf.constant(1.0)) >>> >>> qmu_mu = tf.Variable(tf.random_normal([1])) >>> qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([1]))) >>> qmu = Normal(mu=qmu_mu, sigma=qmu_sigma) >>> >>> Inference({mu: qmu}, {x: tf.constant([0.0] * N)}) """ sess = get_session() if latent_vars is None: latent_vars = {} elif not isinstance(latent_vars, dict): raise TypeError("latent_vars must have type dict.") for key, value in six.iteritems(latent_vars): if not isinstance(key, (RandomVariable, tf.Tensor)): raise TypeError("Latent variable key has an invalid type.") elif not isinstance(value, (RandomVariable, tf.Tensor)): raise TypeError("Latent variable value has an invalid type.") elif not key.get_shape().is_compatible_with(value.get_shape()): raise TypeError("Latent variable bindings do not have same shape.") self.latent_vars = latent_vars if data is None: data = {} elif not isinstance(data, dict): raise TypeError("data must have type dict.") self.data = {} for key, value in six.iteritems(data): if isinstance(key, RandomVariable) or \ (isinstance(key, tf.Tensor) and "Placeholder" not in key.op.type): if isinstance(value, tf.Tensor): if not key.get_shape().is_compatible_with(value.get_shape()): raise TypeError("Observed variable bindings do not have same " "shape.") self.data[key] = tf.cast(value, tf.float32) elif isinstance(value, RandomVariable): if not key.get_shape().is_compatible_with(value.get_shape()): raise TypeError("Observed variable bindings do not have same " "shape.") self.data[key] = value elif isinstance(value, np.ndarray): if not key.get_shape().is_compatible_with(value.shape): raise TypeError("Observed variable bindings do not have same " "shape.") # If value is a np.ndarray, store it in the graph. Assign its # placeholder to an appropriate data type. if np.issubdtype(value.dtype, np.float): ph_type = tf.float32 elif np.issubdtype(value.dtype, np.int): ph_type = tf.int32 else: raise TypeError("Data value has an unsupported type.") ph = tf.placeholder(ph_type, value.shape) var = tf.Variable(ph, trainable=False, collections=[]) self.data[key] = var sess.run(var.initializer, {ph: value}) elif isinstance(value, np.number): if np.issubdtype(value.dtype, np.float): ph_type = tf.float32 elif np.issubdtype(value.dtype, np.int): ph_type = tf.int32 else: raise TypeError("Data value as an invalid type.") ph = tf.placeholder(ph_type, value.shape) var = tf.Variable(ph, trainable=False, collections=[]) self.data[key] = var sess.run(var.initializer, {ph: value}) elif isinstance(value, float): ph_type = tf.float32 ph = tf.placeholder(ph_type, ()) var = tf.Variable(ph, trainable=False, collections=[]) self.data[key] = var sess.run(var.initializer, {ph: value}) elif isinstance(value, int): ph_type = tf.int32 ph = tf.placeholder(ph_type, ()) var = tf.Variable(ph, trainable=False, collections=[]) self.data[key] = var # handle if value is `bool` which this case catches sess.run(var.initializer, {ph: int(value)}) else: raise TypeError("Data value has an invalid type.") elif isinstance(key, tf.Tensor): if isinstance(value, RandomVariable): raise TypeError("Data placeholder cannot be bound to a " "RandomVariable.") self.data[key] = value else: raise TypeError("Data key has an invalid type.") def run(self, variables=None, use_coordinator=True, *args, **kwargs): """A simple wrapper to run inference. 1. Initialize algorithm via ``initialize``. 2. (Optional) Build a TensorFlow summary writer for TensorBoard. 3. (Optional) Initialize TensorFlow variables. 4. (Optional) Start queue runners. 5. Run ``update`` for ``self.n_iter`` iterations. 6. While running, ``print_progress``. 7. Finalize algorithm via ``finalize``. 8. (Optional) Stop queue runners. To customize the way inference is run, run these steps individually. Parameters ---------- variables : list, optional A list of TensorFlow variables to initialize during inference. Default is to initialize all variables (this includes reinitializing variables that were already initialized). To avoid initializing any variables, pass in an empty list. use_coordinator : bool, optional Whether to start and stop queue runners during inference using a TensorFlow coordinator. For example, queue runners are necessary for batch training with file readers. *args Passed into ``initialize``. **kwargs Passed into ``initialize``. """ self.initialize(*args, **kwargs) if variables is None: init = tf.global_variables_initializer() else: init = tf.variables_initializer(variables) # Feed placeholders in case initialization depends on them. feed_dict = {} for key, value in six.iteritems(self.data): if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type: feed_dict[key] = value init.run(feed_dict) if use_coordinator: # Start input enqueue threads. self.coord = tf.train.Coordinator() self.threads = tf.train.start_queue_runners(coord=self.coord) for _ in range(self.n_iter): info_dict = self.update() self.print_progress(info_dict) self.finalize() if use_coordinator: # Ask threads to stop. self.coord.request_stop() self.coord.join(self.threads) def initialize(self, n_iter=1000, n_print=None, scale=None, logdir=None, debug=False): """Initialize inference algorithm. Parameters ---------- n_iter : int, optional Number of iterations for algorithm. n_print : int, optional Number of iterations for each print progress. To suppress print progress, then specify 0. Default is ``int(n_iter / 10)``. scale : dict of RandomVariable to tf.Tensor, optional A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. logdir : str, optional Directory where event file will be written. For details, see ``tf.summary.FileWriter``. Default is to write nothing. debug : bool, optional If True, add checks for ``NaN`` and ``Inf`` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 10) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False) self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError() self.scale = scale if logdir is not None: self.logging = True self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) self.summarize = tf.summary.merge_all() else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() def update(self, feed_dict=None): """Run one iteration of inference. Parameters ---------- feed_dict : dict, optional Feed dictionary for a TensorFlow session run. It is used to feed placeholders that are not fed during initialization. Returns ------- dict Dictionary of algorithm-specific information. """ if feed_dict is None: feed_dict = {} for key, value in six.iteritems(self.data): if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type: feed_dict[key] = value sess = get_session() t = sess.run(self.increment_t) if self.debug: sess.run(self.op_check) if self.logging and self.n_print != 0: if t == 1 or t % self.n_print == 0: summary = sess.run(self.summarize, feed_dict) self.train_writer.add_summary(summary, t) return {'t': t} def print_progress(self, info_dict): """Print progress to output. Parameters ---------- info_dict : dict Dictionary of algorithm-specific information. """ if self.n_print != 0: t = info_dict['t'] if t == 1 or t % self.n_print == 0: self.progbar.update(t) def finalize(self): """Function to call after convergence. """ if self.logging: self.train_writer.close()
def main(_): ed.set_seed(42) # DATA x_train, metadata = nips(FLAGS.data_dir) documents = metadata['columns'] words = metadata['rows'] # Subset to documents in 2011 and words appearing in at least two # documents and have a total word count of at least 10. doc_idx = [i for i, document in enumerate(documents) if document.startswith('2011')] documents = [documents[doc] for doc in doc_idx] x_train = x_train[:, doc_idx] word_idx = np.logical_and(np.sum(x_train != 0, 1) >= 2, np.sum(x_train, 1) >= 10) words = [word for word, idx in zip(words, word_idx) if idx] x_train = x_train[word_idx, :] x_train = x_train.T N = x_train.shape[0] # number of documents D = x_train.shape[1] # vocabulary size # MODEL W2 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[2], FLAGS.K[1]]) W1 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[1], FLAGS.K[0]]) W0 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[0], D]) z3 = Gamma(0.1, 0.1, sample_shape=[N, FLAGS.K[2]]) z2 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z3, W2)) z1 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z2, W1)) x = Poisson(tf.matmul(z1, W0)) # INFERENCE qW2 = pointmass_q(W2.shape) qW1 = pointmass_q(W1.shape) qW0 = pointmass_q(W0.shape) if FLAGS.q == 'gamma': qz3 = gamma_q(z3.shape) qz2 = gamma_q(z2.shape) qz1 = gamma_q(z1.shape) else: qz3 = lognormal_q(z3.shape) qz2 = lognormal_q(z2.shape) qz1 = lognormal_q(z1.shape) # We apply variational EM with E-step over local variables # and M-step to point estimate the global weight matrices. inference_e = ed.KLqp({z1: qz1, z2: qz2, z3: qz3}, data={x: x_train, W0: qW0, W1: qW1, W2: qW2}) inference_m = ed.MAP({W0: qW0, W1: qW1, W2: qW2}, data={x: x_train, z1: qz1, z2: qz2, z3: qz3}) optimizer_e = tf.train.RMSPropOptimizer(FLAGS.lr) optimizer_m = tf.train.RMSPropOptimizer(FLAGS.lr) kwargs = {'optimizer': optimizer_e, 'n_print': 100, 'logdir': FLAGS.logdir, 'log_timestamp': False} if FLAGS.q == 'gamma': kwargs['n_samples'] = 30 inference_e.initialize(**kwargs) inference_m.initialize(optimizer=optimizer_m) sess = ed.get_session() tf.global_variables_initializer().run() n_epoch = 20 n_iter_per_epoch = 10000 for epoch in range(n_epoch): print("Epoch {}".format(epoch)) nll = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) info_dict_e = inference_e.update() info_dict_m = inference_m.update() nll += info_dict_e['loss'] # Compute perplexity averaged over a number of training iterations. # The model's negative log-likelihood of data is upper bounded by # the variational objective. nll /= n_iter_per_epoch perplexity = np.exp(nll / np.sum(x_train)) print("Negative log-likelihood <= {:0.3f}".format(nll)) print("Perplexity <= {:0.3f}".format(perplexity)) # Print top 10 words for first 10 topics. qW0_vals = sess.run(qW0) for k in range(10): top_words_idx = qW0_vals[k, :].argsort()[-10:][::-1] top_words = " ".join([words[i] for i in top_words_idx]) print("Topic {}: {}".format(k, top_words))
def initialize(self, n_iter=1000, n_print=None, scale=None, auto_transform=True, logdir=None, log_timestamp=True, log_vars=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Any derived class of `Inference` **must** implement this method. No methods which build ops should be called outside `initialize()`. Args: n_iter: int. Number of iterations for algorithm when calling `run()`. Alternatively if controlling inference manually, it is the expected number of calls to `update()`; this number determines tracking information during the print progress. n_print: int. Number of iterations for each print progress. To suppress print progress, then specify 0. Default is `int(n_iter / 100)`. scale: dict of RandomVariable to tf.Tensor. A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. auto_transform: bool. Whether to automatically transform continuous latent variables of unequal support to be on the unconstrained space. It is only applied if the argument is `True`, the latent variable pair are `ed.RandomVariable`s with the `support` attribute, the supports are both continuous and unequal. logdir: str. Directory where event file will be written. For details, see `tf.summary.FileWriter`. Default is to log nothing. log_timestamp: bool. If True (and `logdir` is specified), create a subdirectory of `logdir` to save the specific run results. The subdirectory's name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'. log_vars: list. Specifies the list of variables to log after each `n_print` steps. If None, will log all variables. If `[]`, no variables will be logged. `logdir` must be specified for variables to be logged. debug: bool. If True, add checks for `NaN` and `Inf` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale # map from original latent vars to unconstrained versions self.transformations = {} if auto_transform: latent_vars = self.latent_vars.copy() # latent_vars maps original latent vars to constrained Q's. # latent_vars_unconstrained maps unconstrained vars to unconstrained Q's. self.latent_vars = {} self.latent_vars_unconstrained = {} for z, qz in six.iteritems(latent_vars): if hasattr(z, 'support') and hasattr(qz, 'support') and \ z.support != qz.support and qz.support != 'point': # transform z to an unconstrained space z_unconstrained = transform(z) self.transformations[z] = z_unconstrained # make sure we also have a qz that covers the unconstrained space if qz.support == "points": qz_unconstrained = qz else: qz_unconstrained = transform(qz) self.latent_vars_unconstrained[z_unconstrained] = qz_unconstrained # additionally construct the transformation of qz # back into the original constrained space if z_unconstrained != z: qz_constrained = transform( qz_unconstrained, bijectors.Invert(z_unconstrained.bijector)) try: # attempt to pushforward the params of Empirical distributions qz_constrained.params = z_unconstrained.bijector.inverse( qz_unconstrained.params) except: # qz_unconstrained is not an Empirical distribution pass else: qz_constrained = qz_unconstrained self.latent_vars[z] = qz_constrained else: self.latent_vars[z] = qz self.latent_vars_unconstrained[z] = qz del latent_vars if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.expanduser(logdir) logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._summary_key = tf.get_default_graph().unique_name("summaries") self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() # Store reset ops which user can call. Subclasses should append # any ops needed to reset internal variables in inference. self.reset = [tf.variables_initializer([self.t])]
### predictive check n_rep = 100 # number of replicated datasets we generate holdout_gen = np.zeros((n_rep, x_train.shape[0], x_train.shape[1])) for i in range(n_rep): x_generated = x_post.sample().eval() # look only at the heldout entries holdout_gen[i] = np.multiply(x_generated, holdout_mask) n_eval = 10 # we draw samples from the inferred Z and W obs_ll = [] rep_ll = [] pbar = Progbar(n_eval) for j in range(n_eval): U_sample = U_post.sample().eval() V_sample = V_post.sample().eval() holdoutmean_sample = np.multiply(U_sample.dot(V_sample.T), holdout_mask) obs_ll.append( np.mean(np.ma.masked_invalid( stats.poisson.logpmf(np.array(x_vad, dtype=int), holdoutmean_sample)), axis=1)) rep_ll.append( np.mean(np.ma.masked_invalid( stats.poisson.logpmf(holdout_gen, holdoutmean_sample)), axis=2))
class Inference(object): """Abstract base class for inference. All inference algorithms in Edward inherit from ``Inference``, sharing common methods and properties via a class hierarchy. Specific algorithms typically inherit from other subclasses of ``Inference`` rather than ``Inference`` directly. For example, one might inherit from the abstract classes ``MonteCarlo`` or ``VariationalInference``. To build an algorithm inheriting from ``Inference``, one must at the minimum implement ``initialize`` and ``update``: the former builds the computational graph for the algorithm; the latter runs the computational graph for the algorithm. To reset inference (e.g., internal variable counters incremented over training), fetch inference's reset ops from session with ``sess.run(inference.reset)``. """ def __init__(self, latent_vars=None, data=None): """Initialization. Parameters ---------- latent_vars : dict, optional Collection of latent variables (of type ``RandomVariable`` or ``tf.Tensor``) to perform inference on. Each random variable is binded to another random variable; the latter will infer the former conditional on data. data : dict, optional Data dictionary which binds observed variables (of type ``RandomVariable`` or ``tf.Tensor``) to their realizations (of type ``tf.Tensor``). It can also bind placeholders (of type ``tf.Tensor``) used in the model to their realizations; and prior latent variables (of type ``RandomVariable``) to posterior latent variables (of type ``RandomVariable``). Examples -------- >>> mu = Normal(loc=tf.constant(0.0), scale=tf.constant(1.0)) >>> x = Normal(loc=tf.ones(50) * mu, scale=tf.constant(1.0)) >>> >>> qmu_loc = tf.Variable(tf.random_normal([])) >>> qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([]))) >>> qmu = Normal(loc=qmu_loc, scale=qmu_scale) >>> >>> inference = ed.Inference({mu: qmu}, data={x: tf.zeros(50)}) """ sess = get_session() if latent_vars is None: latent_vars = {} if data is None: data = {} check_latent_vars(latent_vars) self.latent_vars = latent_vars check_data(data) self.data = {} for key, value in six.iteritems(data): if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type: self.data[key] = value elif isinstance(key, (RandomVariable, tf.Tensor)): if isinstance(value, (RandomVariable, tf.Tensor)): self.data[key] = value elif isinstance( value, (float, list, int, np.ndarray, np.number, str)): # If value is a Python type, store it in the graph. # Assign its placeholder with the key's data type. with tf.variable_scope("data"): ph = tf.placeholder(key.dtype, np.shape(value)) var = tf.Variable(ph, trainable=False, collections=[]) sess.run(var.initializer, {ph: value}) self.data[key] = var def run(self, variables=None, use_coordinator=True, *args, **kwargs): """A simple wrapper to run inference. 1. Initialize algorithm via ``initialize``. 2. (Optional) Build a TensorFlow summary writer for TensorBoard. 3. (Optional) Initialize TensorFlow variables. 4. (Optional) Start queue runners. 5. Run ``update`` for ``self.n_iter`` iterations. 6. While running, ``print_progress``. 7. Finalize algorithm via ``finalize``. 8. (Optional) Stop queue runners. To customize the way inference is run, run these steps individually. Parameters ---------- variables : list, optional A list of TensorFlow variables to initialize during inference. Default is to initialize all variables (this includes reinitializing variables that were already initialized). To avoid initializing any variables, pass in an empty list. use_coordinator : bool, optional Whether to start and stop queue runners during inference using a TensorFlow coordinator. For example, queue runners are necessary for batch training with file readers. *args Passed into ``initialize``. **kwargs Passed into ``initialize``. """ self.initialize(*args, **kwargs) if variables is None: init = tf.global_variables_initializer() else: init = tf.variables_initializer(variables) # Feed placeholders in case initialization depends on them. feed_dict = {} for key, value in six.iteritems(self.data): if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type: feed_dict[key] = value init.run(feed_dict) if use_coordinator: # Start input enqueue threads. self.coord = tf.train.Coordinator() self.threads = tf.train.start_queue_runners(coord=self.coord) for _ in range(self.n_iter): info_dict = self.update() self.print_progress(info_dict) self.finalize() if use_coordinator: # Ask threads to stop. self.coord.request_stop() self.coord.join(self.threads) @abc.abstractmethod def initialize(self, n_iter=1000, n_print=None, scale=None, logdir=None, log_timestamp=True, log_vars=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Any derived class of ``Inference`` **must** implement this method. No methods which build ops should be called outside ``initialize()``. Parameters ---------- n_iter : int, optional Number of iterations for algorithm. n_print : int, optional Number of iterations for each print progress. To suppress print progress, then specify 0. Default is ``int(n_iter / 100)``. scale : dict of RandomVariable to tf.Tensor, optional A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. logdir : str, optional Directory where event file will be written. For details, see ``tf.summary.FileWriter``. Default is to log nothing. log_timestamp : bool, optional If True (and ``logdir`` is specified), create a subdirectory of ``logdir`` to save the specific run results. The subdirectory's name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'. log_vars : list, optional Specifies the list of variables to log after each ``n_print`` steps. If None, will log all variables. If ``[]``, no variables will be logged. ``logdir`` must be specified for variables to be logged. debug : bool, optional If True, add checks for ``NaN`` and ``Inf`` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) self.summarize = tf.summary.merge_all() else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() # Store reset ops which user can call. Subclasses should append # any ops needed to reset internal variables in inference. self.reset = [tf.variables_initializer([self.t])] @abc.abstractmethod def update(self, feed_dict=None): """Run one iteration of inference. Any derived class of ``Inference`` **must** implement this method. Parameters ---------- feed_dict : dict, optional Feed dictionary for a TensorFlow session run. It is used to feed placeholders that are not fed during initialization. Returns ------- dict Dictionary of algorithm-specific information. """ if feed_dict is None: feed_dict = {} for key, value in six.iteritems(self.data): if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type: feed_dict[key] = value sess = get_session() t = sess.run(self.increment_t) if self.debug: sess.run(self.op_check, feed_dict) if self.logging and self.n_print != 0: if t == 1 or t % self.n_print == 0: summary = sess.run(self.summarize, feed_dict) self.train_writer.add_summary(summary, t) return {'t': t} def print_progress(self, info_dict): """Print progress to output. Parameters ---------- info_dict : dict Dictionary of algorithm-specific information. """ if self.n_print != 0: t = info_dict['t'] if t == 1 or t % self.n_print == 0: self.progbar.update(t) def finalize(self): """Function to call after convergence. """ if self.logging: self.train_writer.close() def _set_log_variables(self, log_vars=None): """Log variables to TensorBoard. For each variable in ``log_vars``, forms a ``tf.summary.scalar``if the variable has scalar shape; otherwise forms a``tf.summary.histogram``. Parameters ---------- log_vars : list, optional Specifies the list of variables to log after each ``n_print`` steps. If None, will log all variables. If ``[]``, no variables will be logged. """ summary_key = 'summaries_' + str(id(self)) if log_vars is None: log_vars = [] for key in six.iterkeys(self.data): log_vars += get_variables(key) for key, value in six.iteritems(self.latent_vars): log_vars += get_variables(key) log_vars += get_variables(value) log_vars = set(log_vars) for var in log_vars: # replace colons which are an invalid character var_name = var.name.replace(':', '/') # Log all scalars. if len(var.shape) == 0: tf.summary.scalar("parameter/{}".format(var_name), var, collections=[summary_key]) elif len(var.shape) == 1 and var.shape[0] == 1: tf.summary.scalar("parameter/{}".format(var_name), var[0], collections=[summary_key]) else: # If var is multi-dimensional, log a histogram of its values. tf.summary.histogram("parameter/{}".format(var_name), var, collections=[summary_key])
def main(_): ed.set_seed(42) # DATA x_train, _, x_test = text8(FLAGS.data_dir) vocab = string.ascii_lowercase + ' ' vocab_size = len(vocab) encoder = dict(zip(vocab, range(vocab_size))) decoder = {v: k for k, v in encoder.items()} data = generator(x_train, FLAGS.batch_size, FLAGS.timesteps, encoder) # MODEL x_ph = tf.placeholder(tf.int32, [None, FLAGS.timesteps]) with tf.variable_scope("language_model"): # Shift input sequence to right by 1, [0, x[0], ..., x[timesteps - 2]]. x_ph_shift = tf.pad(x_ph, [[0, 0], [1, 0]])[:, :-1] x = language_model(x_ph_shift, vocab_size) with tf.variable_scope("language_model", reuse=True): x_gen = language_model_gen(5, vocab_size) imb = range(0, len(x_test) - FLAGS.timesteps, FLAGS.timesteps) encoded_x_test = np.asarray( [[encoder[c] for c in x_test[i:(i + FLAGS.timesteps)]] for i in imb], dtype=np.int32) test_size = encoded_x_test.shape[0] print("Test set shape: {}".format(encoded_x_test.shape)) test_nll = -tf.reduce_sum(x.log_prob(x_ph)) # INFERENCE inference = ed.MAP({}, {x: x_ph}) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr) inference.initialize(optimizer=optimizer, logdir=FLAGS.log_dir, log_timestamp=False) print("Number of sets of parameters: {}".format( len(tf.trainable_variables()))) print("Number of parameters: {}".format( np.sum([np.prod(v.shape.as_list()) for v in tf.trainable_variables()]))) for v in tf.trainable_variables(): print(v) sess = ed.get_session() tf.global_variables_initializer().run() # Double n_epoch and print progress every half an epoch. n_iter_per_epoch = len(x_train) // (FLAGS.batch_size * FLAGS.timesteps * 2) epoch = 0.0 for _ in range(FLAGS.n_epoch * 2): epoch += 0.5 print("Epoch: {0}".format(epoch)) avg_nll = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_batch = next(data) info_dict = inference.update({x_ph: x_batch}) avg_nll += info_dict['loss'] # Print average bits per character over epoch. avg_nll /= (n_iter_per_epoch * FLAGS.batch_size * FLAGS.timesteps * np.log(2)) print("Train average bits/char: {:0.8f}".format(avg_nll)) # Print per-data point log-likelihood on test set. avg_nll = 0.0 for start in range(0, test_size, batch_size): end = min(test_size, start + batch_size) x_batch = encoded_x_test[start:end] avg_nll += sess.run(test_nll, {x_ph: x_batch}) avg_nll /= test_size print("Test average NLL: {:0.8f}".format(avg_nll)) # Generate samples from model. samples = sess.run(x_gen) samples = [''.join([decoder[c] for c in sample]) for sample in samples] print("Samples:") for sample in samples: print(sample)
def initialize(self, n_iter=1000, n_print=None, scale=None, auto_transform=True, logdir=None, log_timestamp=True, log_vars=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Any derived class of `Inference` **must** implement this method. No methods which build ops should be called outside `initialize()`. Args: n_iter: int, optional. Number of iterations for algorithm when calling `run()`. Alternatively if controlling inference manually, it is the expected number of calls to `update()`; this number determines tracking information during the print progress. n_print: int, optional. Number of iterations for each print progress. To suppress print progress, then specify 0. Default is `int(n_iter / 100)`. scale: dict of RandomVariable to tf.Tensor, optional. A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. auto_transform: bool, optional. Whether to automatically transform continuous latent variables of unequal support to be on the unconstrained space. It is only applied if the argument is `True`, the latent variable pair are `ed.RandomVariable`s with the `support` attribute, the supports are both continuous and unequal. logdir: str, optional. Directory where event file will be written. For details, see `tf.summary.FileWriter`. Default is to log nothing. log_timestamp: bool, optional. If True (and `logdir` is specified), create a subdirectory of `logdir` to save the specific run results. The subdirectory's name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'. log_vars: list, optional. Specifies the list of variables to log after each `n_print` steps. If None, will log all variables. If `[]`, no variables will be logged. `logdir` must be specified for variables to be logged. debug: bool, optional. If True, add checks for `NaN` and `Inf` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale # map from original latent vars to unconstrained versions self.transformations = {} if auto_transform: latent_vars = self.latent_vars.copy() # latent_vars maps original latent vars to constrained Q's. # latent_vars_unconstrained maps unconstrained vars to unconstrained Q's. self.latent_vars = {} self.latent_vars_unconstrained = {} for z, qz in six.iteritems(latent_vars): if hasattr(z, 'support') and hasattr(qz, 'support') and \ z.support != qz.support and qz.support != 'point': # transform z to an unconstrained space z_unconstrained = transform(z) self.transformations[z] = z_unconstrained # make sure we also have a qz that covers the unconstrained space if qz.support == "points": qz_unconstrained = qz else: qz_unconstrained = transform(qz) self.latent_vars_unconstrained[ z_unconstrained] = qz_unconstrained # additionally construct the transformation of qz # back into the original constrained space if z_unconstrained != z: qz_constrained = transform( qz_unconstrained, bijectors.Invert(z_unconstrained.bijector)) try: # attempt to pushforward the params of Empirical distributions qz_constrained.params = z_unconstrained.bijector.inverse( qz_unconstrained.params) except: # qz_unconstrained is not an Empirical distribution pass else: qz_constrained = qz_unconstrained self.latent_vars[z] = qz_constrained else: self.latent_vars[z] = qz self.latent_vars_unconstrained[z] = qz del latent_vars if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.expanduser(logdir) logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._summary_key = tf.get_default_graph().unique_name("summaries") self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() # Store reset ops which user can call. Subclasses should append # any ops needed to reset internal variables in inference. self.reset = [tf.variables_initializer([self.t])]
# Bind p(x, z) and q(z | x) to the same placeholder for x. data = {x: x_ph} inference = ed.ReparameterizationKLKLqp({z: qz}, data) optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer, use_prettytensor=True) hidden_rep = tf.sigmoid(logits) tf.global_variables_initializer().run() n_epoch = 100 n_iter_per_epoch = 1000 for epoch in range(n_epoch): avg_loss = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_train, _ = mnist.train.next_batch(M) x_train = np.random.binomial(1, x_train) info_dict = inference.update(feed_dict={x_ph: x_train}) avg_loss += info_dict['loss'] # Print a lower bound to the average marginal likelihood for an # image. avg_loss = avg_loss / n_iter_per_epoch avg_loss = avg_loss / M print("log p(x) >= {:0.3f}".format(avg_loss)) # Visualize hidden representations. imgs = hidden_rep.eval()
def initialize(self, n_iter=1000, n_print=None, scale=None, auto_transform=True, logdir=None, log_timestamp=True, log_vars=None, debug=False, optimizer=None, var_list=None, use_prettytensor=False, global_step=None, n_samples=1, kl_scaling=None, maxnorm=5.): if kl_scaling is None: kl_scaling = {} if n_samples <= 0: raise ValueError( "n_samples should be greater than zero: {}".format(n_samples)) self.n_samples = n_samples self.kl_scaling = kl_scaling # from inference.py self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale self.transformations = {} if auto_transform: latent_vars = self.latent_vars.copy() self.latent_vars = {} self.latent_vars_unconstrained = {} for z, qz in six.iteritems(latent_vars): if hasattr(z, 'support') and hasattr(qz, 'support') and \ z.support != qz.support and qz.support != 'point': z_unconstrained = transform(z) self.transformations[z] = z_unconstrained if qz.support == "points": qz_unconstrained = qz else: qz_unconstrained = transform(qz) self.latent_vars_unconstrained[ z_unconstrained] = qz_unconstrained if z_unconstrained != z: qz_constrained = transform( qz_unconstrained, bijectors.Invert(z_unconstrained.bijector)) try: qz_constrained.params = \ z_unconstrained.bijector.inverse( qz_unconstrained.params) except: pass else: qz_constrained = qz_unconstrained self.latent_vars[z] = qz_constrained else: self.latent_vars[z] = qz self.latent_vars_unconstrained[z] = qz del latent_vars if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.expanduser(logdir) logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._summary_key = tf.get_default_graph().unique_name("summaries") self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() self.reset = [tf.variables_initializer([self.t])] # from variational_inference.py if var_list is None: var_list = set() trainables = tf.trainable_variables() for z, qz in six.iteritems(self.latent_vars): var_list.update(get_variables(z, collection=trainables)) var_list.update(get_variables(qz, collection=trainables)) for x, qx in six.iteritems(self.data): if isinstance(x, RandomVariable) and \ not isinstance(qx, RandomVariable): var_list.update(get_variables(x, collection=trainables)) var_list = list(var_list) self.loss, grads_and_vars = self.build_loss_and_gradients(var_list) clipped_grads_and_vars = [] for grad, var in grads_and_vars: if "kernel" in var.name or "bias" in var.name: clipped_grads_and_vars.append((tf.clip_by_norm(grad, maxnorm, axes=[0]), var)) else: clipped_grads_and_vars.append((grad, var)) # for grad, var in grads_and_vars: # clipped_grads_and_vars.append( # (tf.clip_by_value(grad, -1000., 1000.), var)) del grads_and_vars if self.logging: tf.summary.scalar("loss", self.loss, collections=[self._summary_key]) for grad, var in clipped_grads_and_vars: tf.summary.histogram("gradient/" + var.name.replace(':', '/'), grad, collections=[self._summary_key]) tf.summary.scalar("gradient_norm/" + var.name.replace(':', '/'), tf.norm(grad), collections=[self._summary_key]) self.summarize = tf.summary.merge_all(key=self._summary_key) if optimizer is None and global_step is None: global_step = tf.Variable(0, trainable=False, name="global_step") if isinstance(global_step, tf.Variable): starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) else: learning_rate = 0.01 # Build optimizer. if optimizer is None: optimizer = tf.train.AdamOptimizer(learning_rate) elif isinstance(optimizer, str): if optimizer == 'gradientdescent': optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate) elif optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) elif optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) elif optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) elif optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate) else: raise ValueError('Optimizer class not found:', optimizer) elif not isinstance(optimizer, tf.train.Optimizer): raise TypeError( "Optimizer must be str, tf.train.Optimizer, or None.") with tf.variable_scope(None, default_name="optimizer") as scope: if not use_prettytensor: self.train = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step) else: import prettytensor as pt self.train = pt.apply_optimizer(optimizer, losses=[self.loss], global_step=global_step, var_list=var_list) self.reset.append( tf.variables_initializer( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope.name)))