def initialize(self, *args, **kwargs): # Store latent variables in a temporary attribute; MAP will # optimize `PointMass` random variables, which subsequently # optimizes mean parameters of the normal approximations. latent_vars_normal = self.latent_vars.copy() self.latent_vars = {z: PointMass(params=qz.loc) for z, qz in six.iteritems(latent_vars_normal)} super(Laplace, self).initialize(*args, **kwargs) hessians = tf.hessians(self.loss, list(six.itervalues(self.latent_vars))) self.finalize_ops = [] for z, hessian in zip(six.iterkeys(self.latent_vars), hessians): qz = latent_vars_normal[z] if isinstance(qz, (MultivariateNormalDiag, Normal)): scale_var = get_variables(qz.variance())[0] scale = 1.0 / tf.diag_part(hessian) else: # qz is MultivariateNormalTriL scale_var = get_variables(qz.covariance())[0] scale = tf.matrix_inverse(tf.cholesky(hessian)) self.finalize_ops.append(scale_var.assign(scale)) self.latent_vars = latent_vars_normal.copy() del latent_vars_normal
def build_approximation_update(self): """ Calculating the weighted mean and variance of the approximating Gaussians from samples provided by noisy-Adam. Each sample is only seen once and the parameters are updated incrementally for better memory effficiency. See Welford (1962), Note on a method for calculating corrected sums of squares and products or https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance for information about the incremental algorithm. """ update_ops = [] trainables = tf.trainable_variables() wSum = self.wSum.assign_add(self.learning_rate) for z, qz in six.iteritems(self.latent_vars): sample = self.empirical_vals[z] mm_var = get_variables(self.approximations[z].mu)[0] mv_var = get_variables(self.approximations[z].sigma)[0] d_op = self.deltas[z].assign(mm_var) with tf.control_dependencies([d_op]): mm_op = mm_var.assign_add( (self.learning_rate / wSum) * (sample - d_op)) with tf.control_dependencies([mm_op]): mv_op = mv_var.assign( tf.sqrt( tf.divide((tf.square(mv_var) * wSum) + (self.learning_rate * (sample - mm_op) * (sample - d_op)), wSum))) update_ops.append(mv_op) with tf.control_dependencies([tf.group(*update_ops)]): increment_iters = self.update_iters.assign_add( tf.constant(1, tf.int32)) return tf.group(increment_iters)
def initialize(self, *args, **kwargs): # Store latent variables in a temporary attribute; MAP will # optimize ``PointMass`` random variables, which subsequently # optimizes mean parameters of the normal approximations. latent_vars_normal = self.latent_vars.copy() self.latent_vars = {z: PointMass(params=qz.loc) for z, qz in six.iteritems(latent_vars_normal)} super(Laplace, self).initialize(*args, **kwargs) hessians = tf.hessians(self.loss, list(six.itervalues(self.latent_vars))) self.finalize_ops = [] for z, hessian in zip(six.iterkeys(self.latent_vars), hessians): qz = latent_vars_normal[z] if isinstance(qz, (MultivariateNormalDiag, Normal)): scale_var = get_variables(qz.variance())[0] scale = 1.0 / tf.diag_part(hessian) else: # qz is MultivariateNormalTriL scale_var = get_variables(qz.covariance())[0] scale = tf.matrix_inverse(tf.cholesky(hessian)) self.finalize_ops.append(scale_var.assign(scale)) self.latent_vars = latent_vars_normal.copy() del latent_vars_normal
def test_chain_structure(self): with self.test_session(): a = tf.Variable(0.0) b = tf.Variable(a) c = Normal(mu=b, sigma=1.0) self.assertEqual(get_variables(a), []) self.assertEqual(get_variables(b), []) self.assertEqual(get_variables(c), [b])
def test_control_flow(self): with self.test_session(): a = Bernoulli(p=0.5) b = tf.Variable(0.0) c = tf.constant(0.0) d = tf.cond(tf.cast(a, tf.bool), lambda: b, lambda: c) e = Normal(mu=d, sigma=1.0) self.assertEqual(get_variables(d), [b]) self.assertEqual(get_variables(e), [b])
def test_chain_structure(self): """a -> b -> c -> d -> e""" with self.test_session(): a = tf.Variable(0.0) b = tf.Variable(a) c = Normal(b, 1.0) self.assertEqual(get_variables(a), []) self.assertEqual(get_variables(b), []) self.assertEqual(get_variables(c), [b])
def test_tensor(self): with self.test_session(): a = tf.Variable(0.0) b = tf.constant(2.0) c = a + b d = tf.Variable(a) self.assertEqual(get_variables(a), []) self.assertEqual(get_variables(b), []) self.assertEqual(get_variables(c), [a]) self.assertEqual(get_variables(d), [])
def test_a_structure(self): with self.test_session(): a = tf.Variable(0.0) b = Normal(mu=a, sigma=1.0) c = Normal(mu=b, sigma=1.0) d = Normal(mu=a, sigma=1.0) e = Normal(mu=d, sigma=1.0) self.assertEqual(get_variables(a), []) self.assertEqual(get_variables(b), [a]) self.assertEqual(get_variables(c), [a]) self.assertEqual(get_variables(d), [a]) self.assertEqual(get_variables(e), [a])
def test_v_structure(self): with self.test_session(): a = tf.Variable(0.0) b = Normal(mu=a, sigma=1.0) c = tf.Variable(0.0) d = Normal(mu=c, sigma=1.0) e = Normal(mu=tf.multiply(b, d), sigma=1.0) self.assertEqual(get_variables(a), []) self.assertEqual(get_variables(b), [a]) self.assertEqual(get_variables(c), []) self.assertEqual(get_variables(d), [c]) self.assertEqual(set(get_variables(e)), set([a, c]))
def test_v_structure(self): with self.test_session(): a = tf.Variable(0.0) b = Normal(mu=a, sigma=1.0) c = tf.Variable(0.0) d = Normal(mu=c, sigma=1.0) e = Normal(mu=tf.mul(b, d), sigma=1.0) self.assertEqual(get_variables(a), []) self.assertEqual(get_variables(b), [a]) self.assertEqual(get_variables(c), []) self.assertEqual(get_variables(d), [c]) self.assertEqual(set(get_variables(e)), set([a, c]))
def test_a_structure(self): """e <- d <- a -> b -> c""" with self.test_session(): a = tf.Variable(0.0) b = Normal(a, 1.0) c = Normal(b, 1.0) d = Normal(a, 1.0) e = Normal(d, 1.0) self.assertEqual(get_variables(a), []) self.assertEqual(get_variables(b), [a]) self.assertEqual(get_variables(c), [a]) self.assertEqual(get_variables(d), [a]) self.assertEqual(get_variables(e), [a])
def test_v_structure(self): """a -> b -> e <- d <- c""" with self.test_session(): a = tf.Variable(0.0) b = Normal(a, 1.0) c = tf.Variable(0.0) d = Normal(c, 1.0) e = Normal(b * d, 1.0) self.assertEqual(get_variables(a), []) self.assertEqual(get_variables(b), [a]) self.assertEqual(get_variables(c), []) self.assertEqual(get_variables(d), [c]) self.assertEqual(set(get_variables(e)), set([a, c]))
def test_scan_with_a_structure(self): """copied from test_a_structure""" def cumsum(x): return tf.scan(lambda a, x: a + x, x) with self.test_session(): a = tf.Variable([1.0, 1.0, 1.0]) b = Normal(mu=cumsum(a), sigma=tf.ones([3])) c = Normal(mu=cumsum(b), sigma=tf.ones([3])) d = Normal(mu=cumsum(a), sigma=tf.ones([3])) e = Normal(mu=cumsum(d), sigma=tf.ones([3])) self.assertEqual(get_variables(a), []) self.assertEqual(get_variables(b), [a]) self.assertEqual(get_variables(c), [a]) self.assertEqual(get_variables(d), [a]) self.assertEqual(get_variables(e), [a])
def test_scan_with_a_structure(self): """copied from test_a_structure""" def cumsum(x): return tf.scan(lambda a, x: a + x, x) with self.test_session(): a = tf.Variable([1.0, 1.0, 1.0]) b = Normal(cumsum(a), tf.ones([3])) c = Normal(cumsum(b), tf.ones([3])) d = Normal(cumsum(a), tf.ones([3])) e = Normal(cumsum(d), tf.ones([3])) self.assertEqual(get_variables(a), []) self.assertEqual(get_variables(b), [a]) self.assertEqual(get_variables(c), [a]) self.assertEqual(get_variables(d), [a]) self.assertEqual(get_variables(e), [a])
def _set_log_variables(self, log_vars=None): """Log variables to TensorBoard. For each variable in ``log_vars``, forms a ``tf.summary.scalar``if the variable has scalar shape; otherwise forms a``tf.summary.histogram``. Parameters ---------- log_vars : list, optional Specifies the list of variables to log after each ``n_print`` steps. If None, will log all variables. If ``[]``, no variables will be logged. """ summary_key = 'summaries_' + str(id(self)) if log_vars is None: log_vars = [] for key in six.iterkeys(self.data): log_vars += get_variables(key) for key, value in six.iteritems(self.latent_vars): log_vars += get_variables(key) log_vars += get_variables(value) log_vars = set(log_vars) for var in log_vars: # replace colons which are an invalid character var_name = var.name.replace(':', '/') # Log all scalars. if len(var.shape) == 0: tf.summary.scalar("parameter/{}".format(var_name), var, collections=[summary_key]) elif len(var.shape) == 1 and var.shape[0] == 1: tf.summary.scalar("parameter/{}".format(var_name), var[0], collections=[summary_key]) else: # If var is multi-dimensional, log a histogram of its values. tf.summary.histogram("parameter/{}".format(var_name), var, collections=[summary_key])
def _set_log_variables(self, log_vars=None): """Log variables to TensorBoard. For each variable in `log_vars`, forms a `tf.summary.scalar` if the variable has scalar shape; otherwise forms a `tf.summary.histogram`. Args: log_vars: list, optional. Specifies the list of variables to log after each `n_print` steps. If None, will log all variables. If `[]`, no variables will be logged. """ summary_key = 'summaries_' + str(id(self)) if log_vars is None: log_vars = [] for key in six.iterkeys(self.data): log_vars += get_variables(key) for key, value in six.iteritems(self.latent_vars): log_vars += get_variables(key) log_vars += get_variables(value) log_vars = set(log_vars) for var in log_vars: # replace colons which are an invalid character var_name = var.name.replace(':', '/') # Log all scalars. if len(var.shape) == 0: tf.summary.scalar("parameter/{}".format(var_name), var, collections=[summary_key]) elif len(var.shape) == 1 and var.shape[0] == 1: tf.summary.scalar("parameter/{}".format(var_name), var[0], collections=[summary_key]) else: # If var is multi-dimensional, log a histogram of its values. tf.summary.histogram("parameter/{}".format(var_name), var, collections=[summary_key])
def finalize(self, feed_dict=None): """Function to call after convergence. Computes the Hessian at the mode. Parameters ---------- feed_dict : dict, optional Feed dictionary for a TensorFlow session run during evaluation of Hessian. It is used to feed placeholders that are not fed during initialization. """ if feed_dict is None: feed_dict = {} for key, value in six.iteritems(self.data): if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type: feed_dict[key] = value var_list = list(six.itervalues(self.latent_vars)) hessians = tf.hessians(self.loss, var_list) assign_ops = [] for z, hessian in zip(six.iterkeys(self.latent_vars), hessians): qz = self.latent_vars_normal[z] sigma_var = get_variables(qz.sigma)[0] if isinstance(qz, MultivariateNormalCholesky): sigma = tf.matrix_inverse(tf.cholesky(hessian)) elif isinstance(qz, MultivariateNormalDiag): sigma = 1.0 / tf.diag_part(hessian) else: # qz is MultivariateNormalFull sigma = tf.matrix_inverse(hessian) assign_ops.append(sigma_var.assign(sigma)) sess = get_session() sess.run(assign_ops, feed_dict) self.latent_vars = self.latent_vars_normal.copy() del self.latent_vars_normal super(Laplace, self).finalize()
def initialize(self, optimizer=None, var_list=None, use_prettytensor=False, *args, **kwargs): """Initialize variational inference. Parameters ---------- optimizer : str or tf.train.Optimizer, optional A TensorFlow optimizer, to use for optimizing the variational objective. Alternatively, one can pass in the name of a TensorFlow optimizer, and default parameters for the optimizer will be used. var_list : list of tf.Variable, optional List of TensorFlow variables to optimize over. Default is all trainable variables that ``latent_vars`` and ``data`` depend on, excluding those that are only used in conditionals in ``data``. use_prettytensor : bool, optional ``True`` if aim to use PrettyTensor optimizer (when using PrettyTensor) or ``False`` if aim to use TensorFlow optimizer. Defaults to TensorFlow. """ super(VariationalInference, self).initialize(*args, **kwargs) if var_list is None: if self.model_wrapper is None: # Traverse random variable graphs to get default list of variables. var_list = set([]) trainables = tf.trainable_variables() for z, qz in six.iteritems(self.latent_vars): if isinstance(z, RandomVariable): var_list.update(get_variables(z, collection=trainables)) var_list.update(get_variables(qz, collection=trainables)) for x, qx in six.iteritems(self.data): if isinstance(x, RandomVariable) and \ not isinstance(qx, RandomVariable): var_list.update(get_variables(x, collection=trainables)) var_list = list(var_list) else: # Variables may not be instantiated for model wrappers until # their methods are first called. For now, hard-code # ``var_list`` inside build_losses. var_list = None self.loss, grads_and_vars = self.build_loss_and_gradients(var_list) if optimizer is None: # Use ADAM with a decaying scale factor. global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) elif isinstance(optimizer, str): if optimizer == 'gradientdescent': optimizer = tf.train.GradientDescentOptimizer(0.01) elif optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer() elif optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(0.01) elif optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(0.01, 0.9) elif optimizer == 'adam': optimizer = tf.train.AdamOptimizer() elif optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(0.01) elif optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(0.01) else: raise ValueError('Optimizer class not found:', optimizer) global_step = None elif isinstance(optimizer, tf.train.Optimizer): # Custom optimizers have no control over global_step. global_step = None else: raise TypeError() if not use_prettytensor: self.train = optimizer.apply_gradients(grads_and_vars, global_step=global_step) else: # Note PrettyTensor optimizer does not accept manual updates; # it autodiffs the loss directly. self.train = pt.apply_optimizer(optimizer, losses=[self.loss], global_step=global_step, var_list=var_list)
def test_scan(self): with self.test_session(): b = tf.Variable(0.0) op = tf.scan(lambda a, x: a + b + x, tf.constant([2.0, 3.0, 1.0])) self.assertEqual(get_variables(op), [b])
ph = tf.placeholder(tf.float32, y_train.shape) var = tf.Variable(ph, trainable=False, collections=[]) sess.run(var.initializer, {ph: y_train}) # n_samples is the number of samples in building loss function n_samples = args.samp t = tf.Variable(0, trainable=False) increment_t = t.assign_add(1) # find the list of variables var_list = set([]) trainables = tf.trainable_variables() for z, qz in six.iteritems(latent_vars): if isinstance(z, RandomVariable): var_list.update(get_variables(z, collection=trainables)) var_list.update(get_variables(qz, collection=trainables)) for x, qx in six.iteritems(data): if isinstance(x, RandomVariable) and not isinstance(qx, RandomVariable): var_list.update(get_variables(x, collection=trainables)) var_list = list(var_list) # build a loss function p_log_lik = [0.0] * n_samples z_sample = {} for z, qz in six.iteritems(latent_vars): # Copy q(z) to obtain new set of posterior samples.
def initialize(self, optimizer=None, var_list=None, use_prettytensor=False, global_step=None, *args, **kwargs): """Initialize variational inference. Parameters ---------- optimizer : str or tf.train.Optimizer, optional A TensorFlow optimizer, to use for optimizing the variational objective. Alternatively, one can pass in the name of a TensorFlow optimizer, and default parameters for the optimizer will be used. var_list : list of tf.Variable, optional List of TensorFlow variables to optimize over. Default is all trainable variables that ``latent_vars`` and ``data`` depend on, excluding those that are only used in conditionals in ``data``. use_prettytensor : bool, optional ``True`` if aim to use PrettyTensor optimizer (when using PrettyTensor) or ``False`` if aim to use TensorFlow optimizer. Defaults to TensorFlow. global_step : tf.Variable, optional A TensorFlow variable to hold the global step. """ super(VariationalInference, self).initialize(*args, **kwargs) if var_list is None: # Traverse random variable graphs to get default list of variables. var_list = set() trainables = tf.trainable_variables() for z, qz in six.iteritems(self.latent_vars): if isinstance(z, RandomVariable): var_list.update(get_variables(z, collection=trainables)) var_list.update(get_variables(qz, collection=trainables)) for x, qx in six.iteritems(self.data): if isinstance(x, RandomVariable) and \ not isinstance(qx, RandomVariable): var_list.update(get_variables(x, collection=trainables)) var_list = list(var_list) self.loss, grads_and_vars = self.build_loss_and_gradients(var_list) if self.logging: summary_key = 'summaries_' + str(id(self)) tf.summary.scalar("loss", self.loss, collections=[summary_key]) for grad, var in grads_and_vars: # replace colons which are an invalid character tf.summary.histogram("gradient/" + var.name.replace(':', '/'), grad, collections=[summary_key]) tf.summary.scalar("gradient_norm/" + var.name.replace(':', '/'), tf.norm(grad), collections=[summary_key]) self.summarize = tf.summary.merge_all(key=summary_key) if optimizer is None and global_step is None: # Default optimizer always uses a global step variable. global_step = tf.Variable(0, trainable=False, name="global_step") if isinstance(global_step, tf.Variable): starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) else: learning_rate = 0.01 global_step = None # Build optimizer. if optimizer is None: optimizer = tf.train.AdamOptimizer(learning_rate) elif isinstance(optimizer, str): if optimizer == 'gradientdescent': optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate) elif optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) elif optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) elif optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) elif optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate) else: raise ValueError('Optimizer class not found:', optimizer) elif not isinstance(optimizer, tf.train.Optimizer): raise TypeError( "Optimizer must be str, tf.train.Optimizer, or None.") scope = "optimizer_" + str(id(self)) with tf.variable_scope(scope): if not use_prettytensor: self.train = optimizer.apply_gradients(grads_and_vars, global_step=global_step) else: # Note PrettyTensor optimizer does not accept manual updates; # it autodiffs the loss directly. self.train = pt.apply_optimizer(optimizer, losses=[self.loss], global_step=global_step, var_list=var_list) self.reset.append( tf.variables_initializer( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope)))
def initialize(self, optimizer=None, var_list=None, use_prettytensor=False, global_step=None, *args, **kwargs): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Args: optimizer: str or tf.train.Optimizer, optional. A TensorFlow optimizer, to use for optimizing the variational objective. Alternatively, one can pass in the name of a TensorFlow optimizer, and default parameters for the optimizer will be used. var_list: list of tf.Variable, optional. List of TensorFlow variables to optimize over. Default is all trainable variables that `latent_vars` and `data` depend on, excluding those that are only used in conditionals in `data`. use_prettytensor: bool, optional. `True` if aim to use PrettyTensor optimizer (when using PrettyTensor) or `False` if aim to use TensorFlow optimizer. Defaults to TensorFlow. global_step: tf.Variable, optional. A TensorFlow variable to hold the global step. """ super(VariationalInference, self).initialize(*args, **kwargs) if var_list is None: # Traverse random variable graphs to get default list of variables. var_list = set() trainables = tf.trainable_variables() for z, qz in six.iteritems(self.latent_vars): if isinstance(z, RandomVariable): var_list.update(get_variables(z, collection=trainables)) var_list.update(get_variables(qz, collection=trainables)) for x, qx in six.iteritems(self.data): if isinstance(x, RandomVariable) and \ not isinstance(qx, RandomVariable): var_list.update(get_variables(x, collection=trainables)) var_list = list(var_list) self.loss, grads_and_vars = self.build_loss_and_gradients(var_list) if self.logging: summary_key = 'summaries_' + str(id(self)) tf.summary.scalar("loss", self.loss, collections=[summary_key]) for grad, var in grads_and_vars: # replace colons which are an invalid character tf.summary.histogram("gradient/" + var.name.replace(':', '/'), grad, collections=[summary_key]) tf.summary.scalar("gradient_norm/" + var.name.replace(':', '/'), tf.norm(grad), collections=[summary_key]) self.summarize = tf.summary.merge_all(key=summary_key) if optimizer is None and global_step is None: # Default optimizer always uses a global step variable. global_step = tf.Variable(0, trainable=False, name="global_step") if isinstance(global_step, tf.Variable): starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) else: learning_rate = 0.01 # Build optimizer. if optimizer is None: optimizer = tf.train.AdamOptimizer(learning_rate) elif isinstance(optimizer, str): if optimizer == 'gradientdescent': optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate) elif optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) elif optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) elif optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) elif optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate) else: raise ValueError('Optimizer class not found:', optimizer) elif not isinstance(optimizer, tf.train.Optimizer): raise TypeError("Optimizer must be str, tf.train.Optimizer, or None.") scope = "optimizer_" + str(id(self)) with tf.variable_scope(scope): if not use_prettytensor: self.train = optimizer.apply_gradients(grads_and_vars, global_step=global_step) else: # Note PrettyTensor optimizer does not accept manual updates; # it autodiffs the loss directly. self.train = pt.apply_optimizer(optimizer, losses=[self.loss], global_step=global_step, var_list=var_list) self.reset.append(tf.variables_initializer( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope)))
def initialize(self, n_iter=1000, n_print=None, scale=None, auto_transform=True, logdir=None, log_timestamp=True, log_vars=None, debug=False, optimizer=None, var_list=None, use_prettytensor=False, global_step=None, n_samples=1, kl_scaling=None, maxnorm=5.): if kl_scaling is None: kl_scaling = {} if n_samples <= 0: raise ValueError( "n_samples should be greater than zero: {}".format(n_samples)) self.n_samples = n_samples self.kl_scaling = kl_scaling # from inference.py self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale self.transformations = {} if auto_transform: latent_vars = self.latent_vars.copy() self.latent_vars = {} self.latent_vars_unconstrained = {} for z, qz in six.iteritems(latent_vars): if hasattr(z, 'support') and hasattr(qz, 'support') and \ z.support != qz.support and qz.support != 'point': z_unconstrained = transform(z) self.transformations[z] = z_unconstrained if qz.support == "points": qz_unconstrained = qz else: qz_unconstrained = transform(qz) self.latent_vars_unconstrained[ z_unconstrained] = qz_unconstrained if z_unconstrained != z: qz_constrained = transform( qz_unconstrained, bijectors.Invert(z_unconstrained.bijector)) try: qz_constrained.params = \ z_unconstrained.bijector.inverse( qz_unconstrained.params) except: pass else: qz_constrained = qz_unconstrained self.latent_vars[z] = qz_constrained else: self.latent_vars[z] = qz self.latent_vars_unconstrained[z] = qz del latent_vars if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.expanduser(logdir) logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._summary_key = tf.get_default_graph().unique_name("summaries") self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() self.reset = [tf.variables_initializer([self.t])] # from variational_inference.py if var_list is None: var_list = set() trainables = tf.trainable_variables() for z, qz in six.iteritems(self.latent_vars): var_list.update(get_variables(z, collection=trainables)) var_list.update(get_variables(qz, collection=trainables)) for x, qx in six.iteritems(self.data): if isinstance(x, RandomVariable) and \ not isinstance(qx, RandomVariable): var_list.update(get_variables(x, collection=trainables)) var_list = list(var_list) self.loss, grads_and_vars = self.build_loss_and_gradients(var_list) clipped_grads_and_vars = [] for grad, var in grads_and_vars: if "kernel" in var.name or "bias" in var.name: clipped_grads_and_vars.append((tf.clip_by_norm(grad, maxnorm, axes=[0]), var)) else: clipped_grads_and_vars.append((grad, var)) # for grad, var in grads_and_vars: # clipped_grads_and_vars.append( # (tf.clip_by_value(grad, -1000., 1000.), var)) del grads_and_vars if self.logging: tf.summary.scalar("loss", self.loss, collections=[self._summary_key]) for grad, var in clipped_grads_and_vars: tf.summary.histogram("gradient/" + var.name.replace(':', '/'), grad, collections=[self._summary_key]) tf.summary.scalar("gradient_norm/" + var.name.replace(':', '/'), tf.norm(grad), collections=[self._summary_key]) self.summarize = tf.summary.merge_all(key=self._summary_key) if optimizer is None and global_step is None: global_step = tf.Variable(0, trainable=False, name="global_step") if isinstance(global_step, tf.Variable): starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) else: learning_rate = 0.01 # Build optimizer. if optimizer is None: optimizer = tf.train.AdamOptimizer(learning_rate) elif isinstance(optimizer, str): if optimizer == 'gradientdescent': optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate) elif optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) elif optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) elif optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) elif optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate) else: raise ValueError('Optimizer class not found:', optimizer) elif not isinstance(optimizer, tf.train.Optimizer): raise TypeError( "Optimizer must be str, tf.train.Optimizer, or None.") with tf.variable_scope(None, default_name="optimizer") as scope: if not use_prettytensor: self.train = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step) else: import prettytensor as pt self.train = pt.apply_optimizer(optimizer, losses=[self.loss], global_step=global_step, var_list=var_list) self.reset.append( tf.variables_initializer( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope.name)))