def transform(x, *args, **kwargs): """Transform a continuous random variable to the unconstrained space. Transform selects among a number of defaults transformations which depend on the support of the provided random variable. Args: x : RandomVariable. Continuous random variable to transform. *args, **kwargs : optional. Arguments to overwrite when forming the ``TransformedDistribution``. For example, one can manually specify the transformation by passing in the ``bijector`` argument. Returns: RandomVariable. A ``TransformedDistribution`` random variable, or the provided random variable if no transformation was applied. #### Examples ```python x = Gamma(1.0, 1.0) y = ed.transform(x) sess = tf.Session() sess.run(y) -2.2279539 ``` """ if len(args) != 0 or kwargs.get('bijector', None) is not None: return TransformedDistribution(x, *args, **kwargs) try: support = x.support except AttributeError as e: msg = """'{}' object has no 'support' so cannot be transformed.""".format(type(x).__name__) raise ValueError(msg) if support == '01': bij = bijectors.Invert(bijectors.Sigmoid()) elif support == 'nonnegative': bij = bijectors.Invert(bijectors.Softplus()) elif support == 'simplex': bij = bijectors.Invert(bijectors.SoftmaxCentered(event_ndims=1)) elif support == 'real' or support == 'multivariate_real': return x else: msg = "'transform' does not handle supports of type '{}'".format( support) raise NotImplementedError(msg) return TransformedDistribution(x, bij, *args, **kwargs)
def test_auto_transform_true(self): with self.test_session() as sess: # Match normal || softplus-inverse-normal distribution with # automated transformation on latter (assuming it is softplus). x = TransformedDistribution( distribution=Normal(0.0, 0.5), bijector=tf.contrib.distributions.bijectors.Softplus()) x.support = 'nonnegative' qx = Normal(loc=tf.Variable(tf.random_normal([])), scale=tf.nn.softplus(tf.Variable(tf.random_normal( [])))) inference = ed.KLqp({x: qx}) inference.initialize(auto_transform=True, n_samples=5, n_iter=1000) tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() # Check approximation on constrained space has same moments as # target distribution. n_samples = 10000 x_mean, x_var = tf.nn.moments(x.sample(n_samples), 0) x_unconstrained = inference.transformations[x] qx_constrained = transform( qx, bijectors.Invert(x_unconstrained.bijector)) qx_mean, qx_var = tf.nn.moments(qx_constrained.sample(n_samples), 0) stats = sess.run([x_mean, qx_mean, x_var, qx_var]) self.assertAllClose(info_dict['loss'], 0.0, rtol=0.2, atol=0.2) self.assertAllClose(stats[0], stats[1], rtol=1e-1, atol=1e-1) self.assertAllClose(stats[2], stats[3], rtol=1e-1, atol=1e-1)
def initialize(self, n_iter=1000, n_print=None, scale=None, auto_transform=True, logdir=None, log_timestamp=True, log_vars=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Any derived class of `Inference` **must** implement this method. No methods which build ops should be called outside `initialize()`. Args: n_iter: int, optional. Number of iterations for algorithm when calling `run()`. Alternatively if controlling inference manually, it is the expected number of calls to `update()`; this number determines tracking information during the print progress. n_print: int, optional. Number of iterations for each print progress. To suppress print progress, then specify 0. Default is `int(n_iter / 100)`. scale: dict of RandomVariable to tf.Tensor, optional. A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. auto_transform: bool, optional. Whether to automatically transform continuous latent variables of unequal support to be on the unconstrained space. It is only applied if the argument is `True`, the latent variable pair are `ed.RandomVariable`s with the `support` attribute, the supports are both continuous and unequal. logdir: str, optional. Directory where event file will be written. For details, see `tf.summary.FileWriter`. Default is to log nothing. log_timestamp: bool, optional. If True (and `logdir` is specified), create a subdirectory of `logdir` to save the specific run results. The subdirectory's name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'. log_vars: list, optional. Specifies the list of variables to log after each `n_print` steps. If None, will log all variables. If `[]`, no variables will be logged. `logdir` must be specified for variables to be logged. debug: bool, optional. If True, add checks for `NaN` and `Inf` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale # map from original latent vars to unconstrained versions self.transformations = {} if auto_transform: latent_vars = self.latent_vars.copy() # latent_vars maps original latent vars to constrained Q's. # latent_vars_unconstrained maps unconstrained vars to unconstrained Q's. self.latent_vars = {} self.latent_vars_unconstrained = {} for z, qz in six.iteritems(latent_vars): if hasattr(z, 'support') and hasattr(qz, 'support') and \ z.support != qz.support and qz.support != 'point': # transform z to an unconstrained space z_unconstrained = transform(z) self.transformations[z] = z_unconstrained # make sure we also have a qz that covers the unconstrained space if qz.support == "points": qz_unconstrained = qz else: qz_unconstrained = transform(qz) self.latent_vars_unconstrained[ z_unconstrained] = qz_unconstrained # additionally construct the transformation of qz # back into the original constrained space if z_unconstrained != z: qz_constrained = transform( qz_unconstrained, bijectors.Invert(z_unconstrained.bijector)) try: # attempt to pushforward the params of Empirical distributions qz_constrained.params = z_unconstrained.bijector.inverse( qz_unconstrained.params) except: # qz_unconstrained is not an Empirical distribution pass else: qz_constrained = qz_unconstrained self.latent_vars[z] = qz_constrained else: self.latent_vars[z] = qz self.latent_vars_unconstrained[z] = qz del latent_vars if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.expanduser(logdir) logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._summary_key = tf.get_default_graph().unique_name("summaries") self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() # Store reset ops which user can call. Subclasses should append # any ops needed to reset internal variables in inference. self.reset = [tf.variables_initializer([self.t])]
def initialize(self, n_iter=1000, n_print=None, scale=None, auto_transform=True, logdir=None, log_timestamp=True, log_vars=None, debug=False, optimizer=None, var_list=None, use_prettytensor=False, global_step=None, n_samples=1, kl_scaling=None, maxnorm=5.): if kl_scaling is None: kl_scaling = {} if n_samples <= 0: raise ValueError( "n_samples should be greater than zero: {}".format(n_samples)) self.n_samples = n_samples self.kl_scaling = kl_scaling # from inference.py self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale self.transformations = {} if auto_transform: latent_vars = self.latent_vars.copy() self.latent_vars = {} self.latent_vars_unconstrained = {} for z, qz in six.iteritems(latent_vars): if hasattr(z, 'support') and hasattr(qz, 'support') and \ z.support != qz.support and qz.support != 'point': z_unconstrained = transform(z) self.transformations[z] = z_unconstrained if qz.support == "points": qz_unconstrained = qz else: qz_unconstrained = transform(qz) self.latent_vars_unconstrained[ z_unconstrained] = qz_unconstrained if z_unconstrained != z: qz_constrained = transform( qz_unconstrained, bijectors.Invert(z_unconstrained.bijector)) try: qz_constrained.params = \ z_unconstrained.bijector.inverse( qz_unconstrained.params) except: pass else: qz_constrained = qz_unconstrained self.latent_vars[z] = qz_constrained else: self.latent_vars[z] = qz self.latent_vars_unconstrained[z] = qz del latent_vars if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.expanduser(logdir) logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._summary_key = tf.get_default_graph().unique_name("summaries") self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() self.reset = [tf.variables_initializer([self.t])] # from variational_inference.py if var_list is None: var_list = set() trainables = tf.trainable_variables() for z, qz in six.iteritems(self.latent_vars): var_list.update(get_variables(z, collection=trainables)) var_list.update(get_variables(qz, collection=trainables)) for x, qx in six.iteritems(self.data): if isinstance(x, RandomVariable) and \ not isinstance(qx, RandomVariable): var_list.update(get_variables(x, collection=trainables)) var_list = list(var_list) self.loss, grads_and_vars = self.build_loss_and_gradients(var_list) clipped_grads_and_vars = [] for grad, var in grads_and_vars: if "kernel" in var.name or "bias" in var.name: clipped_grads_and_vars.append((tf.clip_by_norm(grad, maxnorm, axes=[0]), var)) else: clipped_grads_and_vars.append((grad, var)) # for grad, var in grads_and_vars: # clipped_grads_and_vars.append( # (tf.clip_by_value(grad, -1000., 1000.), var)) del grads_and_vars if self.logging: tf.summary.scalar("loss", self.loss, collections=[self._summary_key]) for grad, var in clipped_grads_and_vars: tf.summary.histogram("gradient/" + var.name.replace(':', '/'), grad, collections=[self._summary_key]) tf.summary.scalar("gradient_norm/" + var.name.replace(':', '/'), tf.norm(grad), collections=[self._summary_key]) self.summarize = tf.summary.merge_all(key=self._summary_key) if optimizer is None and global_step is None: global_step = tf.Variable(0, trainable=False, name="global_step") if isinstance(global_step, tf.Variable): starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) else: learning_rate = 0.01 # Build optimizer. if optimizer is None: optimizer = tf.train.AdamOptimizer(learning_rate) elif isinstance(optimizer, str): if optimizer == 'gradientdescent': optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate) elif optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) elif optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) elif optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) elif optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate) else: raise ValueError('Optimizer class not found:', optimizer) elif not isinstance(optimizer, tf.train.Optimizer): raise TypeError( "Optimizer must be str, tf.train.Optimizer, or None.") with tf.variable_scope(None, default_name="optimizer") as scope: if not use_prettytensor: self.train = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step) else: import prettytensor as pt self.train = pt.apply_optimizer(optimizer, losses=[self.loss], global_step=global_step, var_list=var_list) self.reset.append( tf.variables_initializer( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope.name)))
probs=p, concentrations=conc_param * tf.ones(n_samples, dtype=np.float32), sample_shape=n_samples, value=tf.zeros(n_samples, dtype="float32")) # INFERENCE #qp = edward.models.BetaWithSoftplusConcentration(tf.Variable(1.), tf.Variable(1.)) qp = ed.models.Normal(loc=tf.get_variable("qp/loc", []), scale=tf.nn.softplus(tf.get_variable("qp/scale", []))) qconc = ed.models.Normal(loc=tf.get_variable("qconc/loc", []), scale=tf.nn.softplus( tf.get_variable("qconc/scale", []))) inference = ed.KLqp({p: qp, conc_param: qconc}, data={x: x_data}) inference.run() # PRINT RESULTS qp_samples = ed.transform( qp, bijectors.Invert( inference.transformations[p].bijector)).sample(100).eval() print("True prob success: {:.2f}, inferred {:.3f} +- {:.2f}".format( p_true, qp_samples.mean(), np.sqrt(qp_samples.var()))) qconc_samples = ed.transform( qconc, bijectors.Invert( inference.transformations[conc_param].bijector)).sample(100).eval() print("True concentration: {:.2f}, Inferred: {:.3f} +- {:.2f}".format( true_conc, qconc_samples.mean(), np.sqrt(qconc_samples.var())))