Beispiel #1
0
	def init_bijectors(self, n_layers, hidden_layers):
		with tf.variable_scope(self.name):
			bijectors = []
			for i in range(n_layers):
				if self.flow_type == "MAF":
					bijectors.append(tfb.MaskedAutoregressiveFlow(
						shift_and_log_scale_fn=tfb.masked_autoregressive_default_template(
							hidden_layers=hidden_layers,
							name = "MAF_template_{}".format(i)),
						name = "MAF_{}".format(i)))
				elif self.flow_type == "IAF":
					bijectors.append(
						tfb.Invert(
							tfb.MaskedAutoregressiveFlow(
								shift_and_log_scale_fn=tfb.masked_autoregressive_default_template(
									hidden_layers=hidden_layers,
									name = "MAF_template_{}".format(i))
								),
							name = "IAF_{}".format(i)
						)
					)
				bijectors.append(tfb.Permute(permutation=self.init_once(
					np.random.permutation(self.event_size).astype("int32"),
					name="permutation_{}".format(i))))

			flow_bijector = tfb.Chain(list(reversed(bijectors[:-1])))

			return flow_bijector
Beispiel #2
0
    def init_bijectors(self, n_layers, hidden_layers):
        with tf.variable_scope(self.name):
            bijectors = []
            for i in range(n_layers):
                if self.flow_type == "MAF":
                    bijectors.append(
                        tfb.MaskedAutoregressiveFlow(
                            shift_and_log_scale_fn=tfb.
                            masked_autoregressive_default_template(
                                hidden_layers=hidden_layers,
                                activation=tf.nn.relu,
                                log_scale_min_clip=self.log_scale_min_clip,
                                log_scale_max_clip=self.log_scale_max_clip,
                                shift_only=self.shift_only,
                                log_scale_clip_gradient=self.
                                log_scale_clip_gradient,
                                name="MAF_template_{}".format(i)),
                            name="MAF_{}".format(i)))
                elif self.flow_type == "IAF":
                    bijectors.append(
                        tfb.Invert(tfb.MaskedAutoregressiveFlow(
                            shift_and_log_scale_fn=tfb.
                            masked_autoregressive_default_template(
                                hidden_layers=hidden_layers,
                                activation=tf.nn.relu,
                                log_scale_min_clip=self.log_scale_min_clip,
                                log_scale_max_clip=self.log_scale_max_clip,
                                shift_only=self.shift_only,
                                log_scale_clip_gradient=self.
                                log_scale_clip_gradient,
                                name="MAF_template_{}".format(i))),
                                   name="IAF_{}".format(i)))
                elif self.flow_type == "RealNVP":
                    bijectors.append(
                        tfb.RealNVP(num_masked=self.event_size - 1,
                                    shift_and_log_scale_fn=tfb.
                                    real_nvp_default_template(
                                        hidden_layers=hidden_layers,
                                        activation=tf.nn.relu,
                                        shift_only=self.shift_only,
                                        name="RealNVP_template_{}".format(i)),
                                    name="RealNVP_{}".format(i)))
                else:
                    raise ValueError("Unknown flow type {}".format(
                        self.flow_type))
                bijectors.append(
                    tfb.Permute(permutation=list(range(1, self.event_size)) +
                                [0]))
                # bijectors.append(
                #     tfb.Permute(
                #         self.init_once(np.random.permutation(self.event_size).astype("int32"),
                #                        name="permutation_{}".format(i))
                #     )
                # )

            flow_bijector = tfb.Chain(list(reversed(bijectors[:-1])),
                                      validate_args=True,
                                      name="NF_chain")

            return flow_bijector
Beispiel #3
0
 def __init__(self,
              n_layers,
              event_size,
              flow_to_reverse=None,
              hidden_layers=[32],
              sample_num=100,
              flow_type="IAF",
              shift_only=False,
              log_scale_min_clip=-0.1,
              log_scale_max_clip=0.1,
              log_scale_clip_gradient=False,
              name="NF"):
     if flow_to_reverse is None:
         self.event_size = event_size
         self.sample_num = sample_num
         self.flow_type = flow_type
         self.shift_only = shift_only
         self.log_scale_min_clip = log_scale_min_clip
         self.log_scale_max_clip = log_scale_max_clip
         self.log_scale_clip_gradient = log_scale_clip_gradient
         self.name = name
         self.bijector = self.init_bijectors(n_layers, hidden_layers)
     else:
         self.event_size = flow_to_reverse.event_size
         self.sample_num = flow_to_reverse.sample_num
         self.flow_type = flow_to_reverse.flow_type + "_reversed"
         self.name = flow_to_reverse.name + "_reversed"
         self.bijector = tfb.Invert(flow_to_reverse.bijector)
Beispiel #4
0
    def init_bijectors(self,
                       a1: tf.Tensor,
                       b1: tf.Tensor,
                       theta: tf.Tensor,
                       a2: tf.Tensor,
                       b2: tf.Tensor,
                       name: str = 'bernstein_flow') -> tfb.Bijector:
        """
        Builds a normalizing flow using a Bernstein polynomial as Bijector.

        :param      a1:     The scale of f1.
        :type       a1:     Tensor
        :param      b1:     The shift of f1.
        :type       b1:     Tensor
        :param      theta:  The Bernstein coefficients.
        :type       theta:  Tensor
        :param      a2:     The scale of f3.
        :type       a2:     Tensor
        :param      b2:     The shift of f3.
        :type       b2:     Tensor
        :param      name:   The name to give Ops created by the initializer.
        :type       name:   string

        :returns:   The Bernstein flow.
        :rtype:     Bijector
        """
        bijectors = []

        # f1: ŷ = sigma(a1(x)*y - b1(x))
        f1_scale = tfb.Scale(a1, name='f1_scale')
        bijectors.append(f1_scale)
        f1_shift = tfb.Shift(b1, name='f1_shift')
        bijectors.append(f1_shift)

        # clip to range [0, 1]
        bijectors.append(tfb.SoftClip(low=0, high=1, hinge_softness=1.5))

        # f2: ẑ = Bernstein Polynomial
        f2 = BernsteinBijector(theta=theta, name='f2')
        bijectors.append(f2)

        # clip to range [min(theta), max(theta)]
        # bijectors.append(
        #     tfb.Invert(
        #         tfb.SoftClip(
        #             high=tf.math.reduce_max(theta, axis=-1),
        #             low=tf.math.reduce_min(theta, axis=-1),
        #             hinge_softness=0.5
        #         )
        #     )
        # )
        # f3: z = a2(x)*ẑ - b2(x)
        f3_scale = tfb.Scale(a2, name='f3_scale')
        bijectors.append(f3_scale)
        f3_shift = tfb.Shift(b2, name='f3_shift')
        bijectors.append(f3_shift)

        bijectors = list(reversed(bijectors))

        return tfb.Invert(tfb.Chain(bijectors))
Beispiel #5
0
    def __init__(self,
                 a,
                 theta,
                 alpha,
                 beta,
                 validate_args=False,
                 allow_nan_stats=True,
                 name='Amoroso'):

        parameters = dict(locals())
        with tf.name_scope(name) as name:
            self._a = tensor_util.convert_nonref_to_tensor(a)
            self._theta = tensor_util.convert_nonref_to_tensor(theta)
            self._alpha = tensor_util.convert_nonref_to_tensor(alpha)
            self._beta = tensor_util.convert_nonref_to_tensor(beta)
            gamma = tfd.Gamma(alpha, 1.)

            chain = tfb.Invert(
                tfb.Chain([
                    tfb.Exp(),
                    tfb.Scale(beta),
                    tfb.Shift(-tf.math.log(theta)),
                    tfb.Log(),
                    tfb.Shift(-a),
                ]))

            super().__init__(distribution=gamma,
                             bijector=chain,
                             validate_args=validate_args,
                             parameters=parameters,
                             name=name)
Beispiel #6
0
    def __init__(self, *args, **kwargs):
        self._parents = []
        # Override default bijector if provided
        self._bijector = kwargs.pop("bijector", self._bijector)

        self._untransformed_distribution = self._base_dist(*args, **kwargs)
        self._sample_shape = ()
        self._dim_names = ()
        ctx = contexts.get_context()
        self.name = kwargs.get("name", None)
        if isinstance(ctx, contexts.InferenceContext) and self.name is None:
            # Unfortunately autograph does not allow changing the AST,
            # thus we instead retrieve the name from when it was set
            # ForwardContext where AST parsing is possible.
            order_id = len(ctx.vars)  # where am I in the order of RV creation?
            self.name = ctx._names[order_id]

        if not isinstance(ctx, contexts.FreeForwardContext) and self.name is None:
            # We only require names for book keeping during inference
            raise ValueError("No name was set. Supply one via the name kwarg.")

        self._creation_context_id = id(ctx)
        self._backend_tensor = None

        self._distribution = tfd.TransformedDistribution(
            distribution=self._untransformed_distribution, bijector=bijectors.Invert(self._bijector)
        )
        ctx.add_variable(self)
    def transformed_interceptor(rv_ctor, *rv_args, **rv_kwargs):
        global bijectors
        try:
            bijector = bijectors.pop(0)
        except IndexError:
            bijector = None

        if bijector is None:
            return edward2.interceptable(rv_ctor)(*rv_args, **rv_kwargs)

        distribution = rv_ctor(*rv_args, **rv_kwargs).distribution
        if invert:
            bijector = tfb.Invert(bijector)

        name = rv_kwargs.pop('name', None)
        value = rv_kwargs.pop('value', None)
        transformed_value = value
        if value is not None:
            transformed_value = bijector.forward(value)

        rv = edward2.TransformedDistribution(distribution,
                                             bijector,
                                             value=transformed_value,
                                             name=name)
        return bijector.inverse(rv)
Beispiel #8
0
    def __init__(self, *args, **kwargs):
        """Initialize UnitContinuousRV.

        Developer Note
        --------------
            The inverse of the sigmoid bijector is the logodds bijector.
        """
        super().__init__(*args, **kwargs)
        self._transformed_distribution = tfd.TransformedDistribution(
            distribution=self._distribution,
            bijector=bijectors.Invert(bijectors.Sigmoid()))
Beispiel #9
0
    def __init__(self, *args, **kwargs):
        """Initialize PositiveContinuousRV.

        Developer Note
        --------------
            The inverse of the exponential bijector is the log bijector.
        """
        super().__init__(*args, **kwargs)
        self._transformed_distribution = tfd.TransformedDistribution(
            distribution=self._distribution,
            bijector=bijectors.Invert(bijectors.Exp()))
Beispiel #10
0
def test_transformed_executor_logp_tensorflow(transformed_model):
    norm_log = tfd.TransformedDistribution(tfd.HalfNormal(1), bij.Invert(bij.Exp()))

    _, state = pm.evaluate_model_transformed(transformed_model(), values=dict(__log_n=-math.pi))
    np.testing.assert_allclose(
        state.collect_log_prob(), norm_log.log_prob(-math.pi), equal_nan=False
    )

    _, state = pm.evaluate_model_transformed(transformed_model(), values=dict(n=math.exp(-math.pi)))
    np.testing.assert_allclose(
        state.collect_log_prob(), norm_log.log_prob(-math.pi), equal_nan=False
    )
Beispiel #11
0
    def _init_distribution(conditions):

        concentration, scale = conditions["concentration"], conditions["scale"]

        scale_tensor, concentration_tensor = (
            tf.convert_to_tensor(scale),
            tf.convert_to_tensor(concentration),
        )
        broadcast_shape = dist_util.prefer_static_broadcast_shape(
            scale_tensor.shape, concentration_tensor.shape
        )

        return tfd.TransformedDistribution(
            distribution=tfd.Uniform(low=tf.zeros(broadcast_shape), high=tf.ones(broadcast_shape)),
            bijector=bij.Invert(bij.WeibullCDF(scale=scale, concentration=concentration)),
            name="Weibull",
        )
Beispiel #12
0
def get_iaf_elbo(target, num_mc_samples, param_shapes):
    shape_sizes = [
        _tensorshape_size(pshape) for pshape in param_shapes.values()
    ]
    overall_shape = [sum(shape_sizes)]

    def unmarshal(variational_sample):
        results = []
        n_dimensions_used = 0
        for (n_to_add, result_shape) in zip(shape_sizes,
                                            param_shapes.values()):
            result = variational_sample[Ellipsis,
                                        n_dimensions_used:n_dimensions_used +
                                        n_to_add]
            results.append(tf.reshape(result, result_shape))
            n_dimensions_used += n_to_add
        return tuple(results)

    variational_dist = tfd.TransformedDistribution(
        distribution=tfd.Normal(loc=0., scale=1.),
        bijector=tfb.Invert(
            tfb.MaskedAutoregressiveFlow(
                shift_and_log_scale_fn=tfb.
                masked_autoregressive_default_template(
                    hidden_layers=[256, 256]))),
        event_shape=overall_shape,
        name='q_iaf')

    variational_samples = variational_dist.sample(num_mc_samples)
    target_q_sum = tf.reduce_sum(
        variational_dist.log_prob(variational_samples))
    target_sum = 0.
    for s in range(num_mc_samples):
        params = unmarshal(variational_samples[s, Ellipsis])
        target_sum = target_sum + target(*params)

    energy = target_sum / float(num_mc_samples)
    entropy = -target_q_sum / float(num_mc_samples)
    elbo = energy + entropy

    tf.summary.scalar('energy', energy)
    tf.summary.scalar('entropy', entropy)
    tf.summary.scalar('elbo', elbo)

    return elbo
Beispiel #13
0
    def german_credit_model():
        x_numeric = tf.constant(numericals.astype(np.float32))
        x_categorical = [tf.one_hot(c, c.max() + 1) for c in categoricals]
        all_x = tf.concat([x_numeric] + x_categorical, 1)
        num_features = int(all_x.shape[1])

        overall_log_scale = ed.Normal(loc=0.,
                                      scale=10.,
                                      name='overall_log_scale')
        beta_log_scales = ed.TransformedDistribution(
            tfd.Gamma(0.5 * tf.ones([num_features]), 0.5),
            bijector=tfb.Invert(tfb.Exp()),
            name='beta_log_scales')
        beta = ed.Normal(loc=tf.zeros([num_features]),
                         scale=tf.exp(overall_log_scale + beta_log_scales),
                         name='beta')
        logits = tf.einsum('nd,md->mn', all_x, beta[tf.newaxis, :])
        return ed.Bernoulli(logits=logits, name='y')
Beispiel #14
0
    def test_noiseless_is_consistent_with_cumsum_bijector(self):
        num_timesteps = 10
        ssm = AutoregressiveMovingAverageStateSpaceModel(
            num_timesteps=num_timesteps,
            ar_coefficients=[0.7, -0.2, 0.1],
            ma_coefficients=[0.6],
            level_scale=0.6,
            level_drift=-0.3,
            observation_noise_scale=0.,
            initial_state_prior=tfd.MultivariateNormalDiag(loc=tf.zeros([3]),
                                                           scale_diag=tf.ones(
                                                               [3])))
        cumsum_ssm = IntegratedStateSpaceModel(ssm)
        x, lp = cumsum_ssm.experimental_sample_and_log_prob(
            [2], seed=test_util.test_seed())

        flatten_event = tfb.Reshape([num_timesteps],
                                    event_shape_in=[num_timesteps, 1])
        cumsum_dist = tfb.Chain(
            [tfb.Invert(flatten_event),
             tfb.Cumsum(), flatten_event])(ssm)
        self.assertAllClose(lp, cumsum_dist.log_prob(x), atol=1e-5)
    def recenter(rv_constructor, *rv_args, **rv_kwargs):

        rv_name = rv_kwargs.get('name')
        rv_value = rv_kwargs.pop('value', None)

        base_bijector = None
        if rv_constructor.__name__ == 'TransformedDistribution':
            if (rv_args[1].__class__.__name__ == 'Invert'
                    and rv_args[1].bijector.__class__.__name__ == 'SoftClip'):
                distribution = rv_args[0]
                base_bijector = rv_args[1].bijector
                rv_constructor = distribution.__class__
                rv_kwargs = distribution.parameters
                rv_args = rv_args[2:]
                # We were given a value for the transformed RV. Let's pretend it was
                # for the original.
                if rv_value is not None:
                    rv_value = base_bijector.forward(rv_value)

        if (rv_constructor.__name__ == 'Normal'
                and not rv_name.startswith('y')):

            # NB: assume everything is kwargs for now.
            x_loc = rv_kwargs['loc']
            x_scale = rv_kwargs['scale']

            name = rv_kwargs['name']
            a, b, _ = get_or_init(name,
                                  loc_shape=tf.shape(x_loc),
                                  scale_shape=tf.shape(x_scale),
                                  parameterisation_type='scalar')

            kwargs_std = {}
            kwargs_std['loc'] = tf.multiply(x_loc, a)
            kwargs_std['scale'] = tf.pow(
                x_scale, b)  # tf.multiply(x_scale - 1., b) + 1.
            kwargs_std['name'] = name

            scale = x_scale / kwargs_std['scale']  # tf.pow(x_scale, 1. - b)
            shift = x_loc - tf.multiply(scale, kwargs_std['loc'])
            b = tfb.AffineScalar(scale=scale, shift=shift)
            if rv_value is not None:
                rv_value = b.inverse(rv_value)
            learnable_parameters[name +
                                 '_prior_mean'] = tf.convert_to_tensor(x_loc)
            learnable_parameters[name + '_prior_scale'] = tf.convert_to_tensor(
                x_scale)

            # If original RV was constrained, transform the constraint to the new
            # standardized RV. For now we assume a double-sided constraint.
            if base_bijector is not None:
                constraint_std = tfb.SoftClip(
                    low=b.inverse(base_bijector.low),
                    high=b.inverse(base_bijector.high),
                    hinge_softness=base_bijector.hinge_softness / scale
                    if base_bijector.hinge_softness is not None else None)
                rv_std = edward2.TransformedDistribution(
                    rv_constructor(**kwargs_std),
                    tfb.Invert(constraint_std),
                    value=constraint_std.inverse(rv_value)
                    if rv_value is not None else None)
                b = b(constraint_std)
            else:
                kwargs_std['value'] = rv_value
                rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std)
            bijectors[name] = b
            return b.forward(rv_std)

        elif ((rv_constructor.__name__.startswith('MultivariateNormal')
               or rv_constructor.__name__.startswith('GaussianProcess'))
              and not rv_kwargs['name'].startswith('y')):

            name = rv_kwargs['name']

            if rv_constructor.__name__.startswith('GaussianProcess'):
                gp_dist = rv_constructor(*rv_args, **rv_kwargs).distribution
                X = gp_dist._get_index_points()
                x_loc = gp_dist.mean_fn(X)
                x_cov = gp_dist._compute_covariance(index_points=X)
            else:
                x_loc = rv_kwargs['loc']
                x_cov = rv_kwargs['covariance_matrix']

            a, b, c = get_or_init(name,
                                  loc_shape=tf.shape(x_loc),
                                  scale_shape=tf.shape(x_cov)[:-1],
                                  parameterisation_type=parameterisation_type)
            ndims = tf.shape(x_cov)[-1]
            x_loc = tf.broadcast_to(x_loc, tf.shape(x_cov)[:-1])
            cov_dtype = tf.float64 if FLAGS.float64 else x_cov.dtype
            x_cov = tf.cast(x_cov, cov_dtype)
            if parameterisation_type == 'eig':
                """Extra cost of the eigendecomposition?

        we do the eig to get Lambda, Q.
        We rescale Lambda and create the prior dist linop
           - point one: the prior is an MVN (albeit an efficient one), where
              in NCP it's just Normal
        Then we construct the remaining scale matrix. (an n**3 matmul)
        And unlike a cholesky factor these matrices aren't triangular, so
        multiplication or division

        - can we
        """

                Lambda, Q = eigh_with_safe_gradient(x_cov)
                Lambda = tf.abs(Lambda)
                Lambda = tf.cast(Lambda, tf.float32)
                Q = tf.cast(Q, tf.float32)
                Lambda_hat_b = tf.pow(Lambda, b)
                if tied_pparams:
                    # If the scale parameterization is in the eigenbasis,
                    # apply it to the mean in the same basis.
                    loc_in_eigenbasis = tf.linalg.matvec(Q,
                                                         x_loc,
                                                         adjoint_a=True)
                    reparam_loc = tf.linalg.matvec(
                        Q, tf.multiply(loc_in_eigenbasis, a))
                else:
                    reparam_loc = tf.multiply(x_loc, a)

                kwargs_std = {}
                kwargs_std['loc'] = reparam_loc
                kwargs_std['scale'] = LinearOperatorEigenScale(
                    Q, d=tf.sqrt(Lambda_hat_b))
                kwargs_std['name'] = name

                Q_linop = LinearOperatorOrthogonal(Q, det_is_positive=True)
                scale = tf.linalg.LinearOperatorComposition([
                    Q_linop,
                    tf.linalg.LinearOperatorDiag(tf.sqrt(Lambda + 1e-10)),
                    tf.linalg.LinearOperatorDiag(
                        1. / tf.sqrt(Lambda_hat_b + 1e-10)),
                    Q_linop.adjoint(),
                ])
                shift = x_loc - scale.matvec(reparam_loc)
                b = tfb.AffineLinearOperator(scale=scale, shift=shift)

                if 'value' in rv_kwargs:
                    kwargs_std['value'] = b.inverse(rv_kwargs['value'])

            elif parameterisation_type == 'chol':
                L = tf.linalg.cholesky(x_cov +
                                       1e-6 * tf.eye(ndims, dtype=x_cov.dtype))
                L = tf.cast(L, tf.float32)

                reparam_loc = x_loc * a
                reparam_scale = tf.linalg.LinearOperatorLowerTriangular(
                    tf.linalg.diag(1 - b) + b[..., tf.newaxis] * L)
                kwargs_std = {}
                kwargs_std['loc'] = reparam_loc
                kwargs_std['scale'] = reparam_scale
                kwargs_std['name'] = name

                Dinv = tf.linalg.triangular_solve(
                    tf.cast(reparam_scale.to_dense(), cov_dtype),
                    tf.eye(ndims, dtype=cov_dtype))
                Dinv = tf.cast(Dinv, tf.float32)
                scale = tf.matmul(L, Dinv)
                shift = x_loc - tf.linalg.matvec(scale, reparam_loc)
                b = tfb.AffineLinearOperator(
                    scale=tf.linalg.LinearOperatorFullMatrix(scale),
                    shift=shift)
                if 'value' in rv_kwargs:
                    kwargs_std['value'] = b.inverse(rv_kwargs['value'])

            elif parameterisation_type == 'indep':
                # Assumes `C^-1 = diag(c)` is a learned diagonal matrix of 'evidence
                # precisions'. This approximates the true posterior under an iid
                # Gaussian observation model:
                prior_chol = tf.linalg.cholesky(x_cov)
                prior_inv = tf.linalg.cholesky_solve(
                    prior_chol, tf.eye(ndims, dtype=prior_chol.dtype))
                approx_posterior_prec = prior_inv + tf.cast(
                    tf.linalg.diag(c), prior_inv.dtype)
                approx_posterior_prec_chol = tf.linalg.cholesky(
                    approx_posterior_prec)
                approx_posterior_cov = tf.linalg.cholesky_solve(
                    approx_posterior_prec_chol,
                    tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype))
                cov_chol = tf.linalg.cholesky(approx_posterior_cov)

                cov_chol = tf.cast(cov_chol, tf.float32)
                prior_chol = tf.cast(prior_chol, tf.float32)
                scale_linop = tf.linalg.LinearOperatorLowerTriangular(cov_chol)

                reparam_loc = x_loc * a
                reparam_scale = tf.linalg.LinearOperatorComposition([
                    tf.linalg.LinearOperatorInversion(scale_linop),
                    tf.linalg.LinearOperatorLowerTriangular(prior_chol)
                ])
                kwargs_std = {}
                kwargs_std['loc'] = reparam_loc
                kwargs_std['scale'] = reparam_scale
                kwargs_std['name'] = name

                shift = x_loc - scale_linop.matvec(reparam_loc)
                b = tfb.AffineLinearOperator(scale=scale_linop, shift=shift)
                if 'value' in rv_kwargs:
                    kwargs_std['value'] = b.inverse(rv_kwargs['value'])

            elif parameterisation_type == 'eigindep':
                # Combines 'eig' and 'indep' parameterizations, modeling the posterior
                # as
                # (V D**(-b) V' + diag(c))^-1
                # where VDV' is the eigendecomposition of the prior cov, and b and c
                # are learned vectors.
                b, c = [tf.cast(x, cov_dtype) for x in (b, c)]
                Lambda, Q = eigh_with_safe_gradient(x_cov)
                Lambda = tf.abs(Lambda)
                Lambda_hat_b = 1e-6 + tf.pow(Lambda, b)
                prior = tf.matmul(
                    Q,
                    tf.matmul(tf.linalg.diag(Lambda_hat_b), Q, adjoint_b=True))
                prior_chol = tf.linalg.cholesky(
                    prior + 1e-6 * tf.eye(ndims, dtype=prior.dtype))
                prior_prec = tf.linalg.cholesky_solve(
                    prior_chol + 1e-6 * tf.eye(ndims, dtype=prior_chol.dtype),
                    tf.eye(ndims, dtype=prior_chol.dtype))

                approx_posterior_prec = prior_prec + tf.linalg.diag(c)
                approx_posterior_prec_chol = tf.linalg.cholesky(
                    approx_posterior_prec)
                approx_posterior_cov = tf.linalg.cholesky_solve(
                    approx_posterior_prec_chol + 1e-6 *
                    tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype),
                    tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype))
                cov_chol = tf.linalg.cholesky(
                    approx_posterior_cov +
                    1e-6 * tf.eye(ndims, dtype=approx_posterior_cov.dtype))
                cov_chol = tf.cast(cov_chol, tf.float32)
                prior_chol = tf.cast(prior_chol, tf.float32)
                scale_linop = tf.linalg.LinearOperatorLowerTriangular(cov_chol)

                reparam_loc = tf.multiply(x_loc, a)

                reparam_scale = tf.linalg.LinearOperatorComposition([
                    tf.linalg.LinearOperatorInversion(scale_linop),
                    tf.linalg.LinearOperatorLowerTriangular(prior_chol)
                ])
                kwargs_std = {}
                kwargs_std['loc'] = reparam_loc
                kwargs_std['scale'] = reparam_scale
                kwargs_std['name'] = name

                shift = x_loc - scale_linop.matvec(reparam_loc)
                b = tfb.AffineLinearOperator(scale=scale_linop, shift=shift)
                if 'value' in rv_kwargs:
                    kwargs_std['value'] = b.inverse(rv_kwargs['value'])
            else:
                raise Exception('unrecognized reparameterization strategy!')

            if rv_constructor.__name__.startswith('GaussianProcess'):
                rv_std = edward2.MultivariateNormalLinearOperator(
                    *rv_args, **kwargs_std)
            else:
                rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std)

            bijectors[name] = b
            return b.forward(rv_std)
        else:
            return interceptable(rv_constructor)(*rv_args, **rv_kwargs)