Beispiel #1
0
    def __init__(self, data, Y_var):
        super().__init__(active_dims=[0])
        self.Y_var = Y_var
        self.num_genes = data.m_obs.shape[1]
        #         l_affine = tfb.AffineScalar(shift=tf.cast(1., tf.float64),
        #                             scale=tf.cast(4-1., tf.float64))
        #         l_sigmoid = tfb.Sigmoid()
        #         l_logistic = tfb.Chain([l_affine, l_sigmoid])

        self.lengthscale = gpflow.Parameter(1.414, transform=positive())

        D_affine = tfb.AffineScalar(shift=tf.cast(0.1, tf.float64),
                                    scale=tf.cast(1.5 - 0.1, tf.float64))
        D_sigmoid = tfb.Sigmoid()
        D_logistic = tfb.Chain([D_affine, D_sigmoid])
        S_affine = tfb.AffineScalar(shift=tf.cast(0.1, tf.float64),
                                    scale=tf.cast(4. - 0.1, tf.float64))
        S_sigmoid = tfb.Sigmoid()
        S_logistic = tfb.Chain([S_affine, S_sigmoid])

        self.D = gpflow.Parameter(np.random.uniform(0.9, 1, self.num_genes),
                                  transform=positive(),
                                  dtype=tf.float64)
        #         self.D[3].trainable = False
        #         self.D[3].assign(0.8)
        self.S = gpflow.Parameter(np.random.uniform(1, 1, self.num_genes),
                                  transform=positive(),
                                  dtype=tf.float64)
        #         self.S[3].trainable = False
        #         self.S[3].assign(1)
        self.kervar = gpflow.Parameter(np.float64(1), transform=positive())
        self.noise_term = gpflow.Parameter(
            0.1353 * tf.ones(self.num_genes, dtype='float64'),
            transform=positive())
Beispiel #2
0
    def __init__(self,
                 active_dims=[0],
                 gap_decay=0.1,
                 match_decay=0.9,
                 max_subsequence_length=3,
                 max_occurence_length=10,
                 alphabet=[],
                 maxlen=0,
                 normalize=True,
                 batch_size=1000):
        super().__init__(active_dims=active_dims)
        # constrain kernel params to between 0 and 1
        self.logistic_gap = tfb.Chain([
            tfb.AffineScalar(shift=tf.cast(0, tf.float64),
                             scale=tf.cast(1, tf.float64)),
            tfb.Sigmoid()
        ])
        self.logisitc_match = tfb.Chain([
            tfb.AffineScalar(shift=tf.cast(0, tf.float64),
                             scale=tf.cast(1, tf.float64)),
            tfb.Sigmoid()
        ])
        self.gap_decay_param = Parameter(gap_decay,
                                         transform=self.logistic_gap,
                                         name="gap_decay")
        self.match_decay_param = Parameter(match_decay,
                                           transform=self.logisitc_match,
                                           name="match_decay")
        self.max_subsequence_length = max_subsequence_length
        self.max_occurence_length = max_occurence_length
        self.alphabet = alphabet
        self.maxlen = maxlen
        self.normalize = normalize
        self.batch_size = batch_size
        self.symmetric = False

        # use will use copies of the kernel params to stop building expensive computation graph
        # we instead efficientely calculate gradients using dynamic programming
        # These params are updated at every call to K and K_diag (to check if parameters have been updated)
        self.match_decay = self.match_decay_param.numpy()
        self.gap_decay = self.gap_decay_param.numpy()
        self.match_decay_unconstrained = self.match_decay_param.unconstrained_variable.numpy(
        )
        self.gap_decay_unconstrained = self.gap_decay_param.unconstrained_variable.numpy(
        )

        # initialize helful construction matricies to be lazily computed once needed
        self.D = None
        self.dD_dgap = None

        # build a lookup table of the alphabet to encode input strings
        self.table = tf.lookup.StaticHashTable(
            initializer=tf.lookup.KeyValueTensorInitializer(
                keys=tf.constant(["PAD"] + alphabet),
                values=tf.constant(range(0,
                                         len(alphabet) + 1)),
            ),
            default_value=0)
    def recenter(rv_constructor, *rv_args, **rv_kwargs):
        if (rv_constructor.__name__ == 'Normal'
                and not rv_kwargs['name'].startswith('y')):

            # NB: assume everything is kwargs for now.
            x_loc = rv_kwargs['loc']
            x_scale = rv_kwargs['scale']

            name = rv_kwargs['name']
            shape = rv_constructor(*rv_args, **rv_kwargs).shape

            a, b = get_or_init(name, shape)  # w

            kwargs_std = {}
            kwargs_std['loc'] = tf.multiply(x_loc, a)
            kwargs_std['scale'] = tf.pow(x_scale, b)
            kwargs_std['name'] = name + '_param'

            scale = tf.pow(x_scale, 1. - b)
            b = tfb.AffineScalar(scale=scale,
                                 shift=x_loc +
                                 tf.multiply(scale, -kwargs_std['loc']))
            if 'value' in rv_kwargs:
                kwargs_std['value'] = b.inverse(rv_kwargs['value'])

            rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std)
            bijectors[name] = b
            return b.forward(rv_std)

        else:
            return interceptable(rv_constructor)(*rv_args, **rv_kwargs)
Beispiel #4
0
    def __init__(self,rank=1,active_dims=[0],gap_decay=0.1, match_decay=0.9,max_subsequence_length=3,
                 alphabet = [], maxlen=0):
        super().__init__(active_dims=active_dims)
        # constrain decay kernel params to between 0 and 1
        logistic_gap = tfb.Chain([tfb.Shift(tf.cast(0,tf.float64))(tfb.Scale(tf.cast(1,tf.float64))),tfb.Sigmoid()])
        logisitc_match = tfb.Chain([tfb.AffineScalar(shift=tf.cast(0,tf.float64),scale=tf.cast(1,tf.float64)),tfb.Sigmoid()])
        self.gap_decay= Parameter(gap_decay, transform=logistic_gap ,name="gap_decay")
        self.match_decay = Parameter(match_decay, transform=logisitc_match,name="match_decay")

        # prepare similarity matrix parameters
        self.rank=rank
        W = 0.1 * tf.ones((len(alphabet), self.rank))
        kappa = tf.ones(len(alphabet))

        self.W = Parameter(W,name="W")
        self.kappa = Parameter(kappa, transform=positive(),name="kappa")
  
        # store additional kernel parameters
        self.max_subsequence_length = tf.constant(max_subsequence_length)
        self.alphabet =  tf.constant(alphabet)
        self.alphabet_size=tf.shape(self.alphabet)[0]
        self.maxlen =  tf.constant(maxlen)

        # build a lookup table of the alphabet to encode input strings
        self.table = tf.lookup.StaticHashTable(
            initializer=tf.lookup.KeyValueTensorInitializer(
                keys=tf.constant(["PAD"]+alphabet),
                values=tf.constant(range(0,len(alphabet)+1)),),default_value=0)
 def bounded_parameter(low, high, param):
     """Make parameter tfp Parameter with optimization bounds."""
     affine = tfb.AffineScalar(shift=tf.cast(low, tf.float64),
                               scale=tf.cast(high - low, tf.float64))
     sigmoid = tfb.Sigmoid()
     logistic = tfb.Chain([affine, sigmoid])
     parameter = gpf.Parameter(param, transform=logistic, dtype=tf.float64)
     return parameter
Beispiel #6
0
    def __init__(self,
                 m=1,
                 active_dims=[0],
                 gap_decay=0.1,
                 match_decay=0.9,
                 max_subsequence_length=3,
                 alphabet=[],
                 maxlen=0):
        super().__init__(active_dims=active_dims)
        # constrain decay kernel params to between 0 and 1
        logistic_gap = tfb.Chain([
            tfb.Shift(tf.cast(0, tf.float64))(tfb.Scale(tf.cast(1,
                                                                tf.float64))),
            tfb.Sigmoid()
        ])
        logisitc_match = tfb.Chain([
            tfb.AffineScalar(shift=tf.cast(0, tf.float64),
                             scale=tf.cast(1, tf.float64)),
            tfb.Sigmoid()
        ])
        self.gap_decay = Parameter(gap_decay,
                                   transform=logistic_gap,
                                   name="gap_decay")
        self.match_decay = Parameter(match_decay,
                                     transform=logisitc_match,
                                     name="match_decay")

        # prepare order coefs params
        order_coefs = tf.ones(max_subsequence_length)
        self.order_coefs = Parameter(order_coefs,
                                     transform=positive(),
                                     name="order_coefs")

        # get split weights
        self.m = m
        split_weights = tf.ones(2 * self.m - 1)
        self.split_weights = Parameter(split_weights,
                                       transform=positive(),
                                       name="order_coefs")

        # store additional kernel parameters
        self.max_subsequence_length = tf.constant(max_subsequence_length)
        self.alphabet = tf.constant(alphabet)
        self.alphabet_size = tf.shape(self.alphabet)[0]
        self.maxlen = tf.cast(tf.math.ceil(maxlen / self.m), dtype=tf.int32)
        self.full_maxlen = tf.constant(maxlen)
        # build a lookup table of the alphabet to encode input strings
        self.table = tf.lookup.StaticHashTable(
            initializer=tf.lookup.KeyValueTensorInitializer(
                keys=tf.constant(["PAD"] + alphabet),
                values=tf.constant(range(0,
                                         len(alphabet) + 1)),
            ),
            default_value=0)
def ncp(rv_constructor, *rv_args, **rv_kwargs):
    if (rv_constructor.__name__ == 'Normal'
            and not rv_kwargs['name'].startswith('y')):
        loc = rv_kwargs['loc']
        scale = rv_kwargs['scale']
        name = rv_kwargs['name']

        shape = rv_constructor(*rv_args, **rv_kwargs).shape

        kwargs_std = {}
        kwargs_std['loc'] = tf.zeros(shape)
        kwargs_std['scale'] = tf.ones(shape)
        kwargs_std['name'] = name + '_std'

        b = tfb.AffineScalar(scale=scale, shift=loc)
        if 'value' in rv_kwargs:
            kwargs_std['value'] = b.inverse(rv_kwargs['value'])

        rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std)

        return b.forward(rv_std)

    else:
        return interceptable(rv_constructor)(*rv_args, **rv_kwargs)
Beispiel #8
0
    def __init__(self,
                 rank=1,
                 active_dims=[0],
                 gap_decay=0.1,
                 match_decay=0.9,
                 max_subsequence_length=3,
                 alphabet=[],
                 maxlen=0,
                 batch_size=100):
        super().__init__(active_dims=active_dims)
        # constrain decay kernel params to between 0 and 1
        self.logistic_gap = tfb.Chain([
            tfb.Shift(tf.cast(0, tf.float64))(tfb.Scale(tf.cast(1,
                                                                tf.float64))),
            tfb.Sigmoid()
        ])
        self.logisitc_match = tfb.Chain([
            tfb.AffineScalar(shift=tf.cast(0, tf.float64),
                             scale=tf.cast(1, tf.float64)),
            tfb.Sigmoid()
        ])

        self.gap_decay_param = Parameter(gap_decay,
                                         transform=self.logistic_gap,
                                         name="gap_decay")
        self.match_decay_param = Parameter(match_decay,
                                           transform=self.logisitc_match,
                                           name="match_decay")

        self.W_param = Parameter(0.1 * tf.ones((len(alphabet), rank)),
                                 name="W")

        self.kappa_param = Parameter(tf.ones(len(alphabet)),
                                     transform=positive(),
                                     name="kappa")

        # use will use copies of the kernel params to stop building expensive computation graph
        # we instead efficientely calculate gradients using dynamic programming
        # These params are updated at every call to K and K_diag (to check if parameters have been updated)
        self.match_decay = self.match_decay_param.numpy()
        self.gap_decay = self.gap_decay_param.numpy()
        self.kappa = self.kappa_param.numpy()
        self.W = self.W_param.numpy()
        self.match_decay_unconstrained = self.match_decay_param.unconstrained_variable.numpy(
        )
        self.gap_decay_unconstrained = self.gap_decay_param.unconstrained_variable.numpy(
        )
        self.kappa_unconstrained = self.kappa_param.unconstrained_variable.numpy(
        )
        self.W_unconstrained = self.W_param.unconstrained_variable.numpy()

        # store additional kernel parameters
        self.max_subsequence_length = tf.constant(max_subsequence_length)
        self.alphabet = tf.constant(alphabet)
        self.alphabet_size = tf.shape(self.alphabet)[0]
        self.maxlen = tf.constant(maxlen)
        self.batch_size = tf.constant(batch_size)
        self.rank = tf.constant(rank)

        # build a lookup table of the alphabet to encode input strings
        self.table = tf.lookup.StaticHashTable(
            initializer=tf.lookup.KeyValueTensorInitializer(
                keys=tf.constant(["PAD"] + alphabet),
                values=tf.constant(range(0,
                                         len(alphabet) + 1)),
            ),
            default_value=0)

        # initialize helful construction matricies to be lazily computed once needed
        self.D = None
        self.dD_dgap = None
    def recenter(rv_constructor, *rv_args, **rv_kwargs):

        rv_name = rv_kwargs.get('name')
        rv_value = rv_kwargs.pop('value', None)

        base_bijector = None
        if rv_constructor.__name__ == 'TransformedDistribution':
            if (rv_args[1].__class__.__name__ == 'Invert'
                    and rv_args[1].bijector.__class__.__name__ == 'SoftClip'):
                distribution = rv_args[0]
                base_bijector = rv_args[1].bijector
                rv_constructor = distribution.__class__
                rv_kwargs = distribution.parameters
                rv_args = rv_args[2:]
                # We were given a value for the transformed RV. Let's pretend it was
                # for the original.
                if rv_value is not None:
                    rv_value = base_bijector.forward(rv_value)

        if (rv_constructor.__name__ == 'Normal'
                and not rv_name.startswith('y')):

            # NB: assume everything is kwargs for now.
            x_loc = rv_kwargs['loc']
            x_scale = rv_kwargs['scale']

            name = rv_kwargs['name']
            a, b, _ = get_or_init(name,
                                  loc_shape=tf.shape(x_loc),
                                  scale_shape=tf.shape(x_scale),
                                  parameterisation_type='scalar')

            kwargs_std = {}
            kwargs_std['loc'] = tf.multiply(x_loc, a)
            kwargs_std['scale'] = tf.pow(
                x_scale, b)  # tf.multiply(x_scale - 1., b) + 1.
            kwargs_std['name'] = name

            scale = x_scale / kwargs_std['scale']  # tf.pow(x_scale, 1. - b)
            shift = x_loc - tf.multiply(scale, kwargs_std['loc'])
            b = tfb.AffineScalar(scale=scale, shift=shift)
            if rv_value is not None:
                rv_value = b.inverse(rv_value)
            learnable_parameters[name +
                                 '_prior_mean'] = tf.convert_to_tensor(x_loc)
            learnable_parameters[name + '_prior_scale'] = tf.convert_to_tensor(
                x_scale)

            # If original RV was constrained, transform the constraint to the new
            # standardized RV. For now we assume a double-sided constraint.
            if base_bijector is not None:
                constraint_std = tfb.SoftClip(
                    low=b.inverse(base_bijector.low),
                    high=b.inverse(base_bijector.high),
                    hinge_softness=base_bijector.hinge_softness / scale
                    if base_bijector.hinge_softness is not None else None)
                rv_std = edward2.TransformedDistribution(
                    rv_constructor(**kwargs_std),
                    tfb.Invert(constraint_std),
                    value=constraint_std.inverse(rv_value)
                    if rv_value is not None else None)
                b = b(constraint_std)
            else:
                kwargs_std['value'] = rv_value
                rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std)
            bijectors[name] = b
            return b.forward(rv_std)

        elif ((rv_constructor.__name__.startswith('MultivariateNormal')
               or rv_constructor.__name__.startswith('GaussianProcess'))
              and not rv_kwargs['name'].startswith('y')):

            name = rv_kwargs['name']

            if rv_constructor.__name__.startswith('GaussianProcess'):
                gp_dist = rv_constructor(*rv_args, **rv_kwargs).distribution
                X = gp_dist._get_index_points()
                x_loc = gp_dist.mean_fn(X)
                x_cov = gp_dist._compute_covariance(index_points=X)
            else:
                x_loc = rv_kwargs['loc']
                x_cov = rv_kwargs['covariance_matrix']

            a, b, c = get_or_init(name,
                                  loc_shape=tf.shape(x_loc),
                                  scale_shape=tf.shape(x_cov)[:-1],
                                  parameterisation_type=parameterisation_type)
            ndims = tf.shape(x_cov)[-1]
            x_loc = tf.broadcast_to(x_loc, tf.shape(x_cov)[:-1])
            cov_dtype = tf.float64 if FLAGS.float64 else x_cov.dtype
            x_cov = tf.cast(x_cov, cov_dtype)
            if parameterisation_type == 'eig':
                """Extra cost of the eigendecomposition?

        we do the eig to get Lambda, Q.
        We rescale Lambda and create the prior dist linop
           - point one: the prior is an MVN (albeit an efficient one), where
              in NCP it's just Normal
        Then we construct the remaining scale matrix. (an n**3 matmul)
        And unlike a cholesky factor these matrices aren't triangular, so
        multiplication or division

        - can we
        """

                Lambda, Q = eigh_with_safe_gradient(x_cov)
                Lambda = tf.abs(Lambda)
                Lambda = tf.cast(Lambda, tf.float32)
                Q = tf.cast(Q, tf.float32)
                Lambda_hat_b = tf.pow(Lambda, b)
                if tied_pparams:
                    # If the scale parameterization is in the eigenbasis,
                    # apply it to the mean in the same basis.
                    loc_in_eigenbasis = tf.linalg.matvec(Q,
                                                         x_loc,
                                                         adjoint_a=True)
                    reparam_loc = tf.linalg.matvec(
                        Q, tf.multiply(loc_in_eigenbasis, a))
                else:
                    reparam_loc = tf.multiply(x_loc, a)

                kwargs_std = {}
                kwargs_std['loc'] = reparam_loc
                kwargs_std['scale'] = LinearOperatorEigenScale(
                    Q, d=tf.sqrt(Lambda_hat_b))
                kwargs_std['name'] = name

                Q_linop = LinearOperatorOrthogonal(Q, det_is_positive=True)
                scale = tf.linalg.LinearOperatorComposition([
                    Q_linop,
                    tf.linalg.LinearOperatorDiag(tf.sqrt(Lambda + 1e-10)),
                    tf.linalg.LinearOperatorDiag(
                        1. / tf.sqrt(Lambda_hat_b + 1e-10)),
                    Q_linop.adjoint(),
                ])
                shift = x_loc - scale.matvec(reparam_loc)
                b = tfb.AffineLinearOperator(scale=scale, shift=shift)

                if 'value' in rv_kwargs:
                    kwargs_std['value'] = b.inverse(rv_kwargs['value'])

            elif parameterisation_type == 'chol':
                L = tf.linalg.cholesky(x_cov +
                                       1e-6 * tf.eye(ndims, dtype=x_cov.dtype))
                L = tf.cast(L, tf.float32)

                reparam_loc = x_loc * a
                reparam_scale = tf.linalg.LinearOperatorLowerTriangular(
                    tf.linalg.diag(1 - b) + b[..., tf.newaxis] * L)
                kwargs_std = {}
                kwargs_std['loc'] = reparam_loc
                kwargs_std['scale'] = reparam_scale
                kwargs_std['name'] = name

                Dinv = tf.linalg.triangular_solve(
                    tf.cast(reparam_scale.to_dense(), cov_dtype),
                    tf.eye(ndims, dtype=cov_dtype))
                Dinv = tf.cast(Dinv, tf.float32)
                scale = tf.matmul(L, Dinv)
                shift = x_loc - tf.linalg.matvec(scale, reparam_loc)
                b = tfb.AffineLinearOperator(
                    scale=tf.linalg.LinearOperatorFullMatrix(scale),
                    shift=shift)
                if 'value' in rv_kwargs:
                    kwargs_std['value'] = b.inverse(rv_kwargs['value'])

            elif parameterisation_type == 'indep':
                # Assumes `C^-1 = diag(c)` is a learned diagonal matrix of 'evidence
                # precisions'. This approximates the true posterior under an iid
                # Gaussian observation model:
                prior_chol = tf.linalg.cholesky(x_cov)
                prior_inv = tf.linalg.cholesky_solve(
                    prior_chol, tf.eye(ndims, dtype=prior_chol.dtype))
                approx_posterior_prec = prior_inv + tf.cast(
                    tf.linalg.diag(c), prior_inv.dtype)
                approx_posterior_prec_chol = tf.linalg.cholesky(
                    approx_posterior_prec)
                approx_posterior_cov = tf.linalg.cholesky_solve(
                    approx_posterior_prec_chol,
                    tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype))
                cov_chol = tf.linalg.cholesky(approx_posterior_cov)

                cov_chol = tf.cast(cov_chol, tf.float32)
                prior_chol = tf.cast(prior_chol, tf.float32)
                scale_linop = tf.linalg.LinearOperatorLowerTriangular(cov_chol)

                reparam_loc = x_loc * a
                reparam_scale = tf.linalg.LinearOperatorComposition([
                    tf.linalg.LinearOperatorInversion(scale_linop),
                    tf.linalg.LinearOperatorLowerTriangular(prior_chol)
                ])
                kwargs_std = {}
                kwargs_std['loc'] = reparam_loc
                kwargs_std['scale'] = reparam_scale
                kwargs_std['name'] = name

                shift = x_loc - scale_linop.matvec(reparam_loc)
                b = tfb.AffineLinearOperator(scale=scale_linop, shift=shift)
                if 'value' in rv_kwargs:
                    kwargs_std['value'] = b.inverse(rv_kwargs['value'])

            elif parameterisation_type == 'eigindep':
                # Combines 'eig' and 'indep' parameterizations, modeling the posterior
                # as
                # (V D**(-b) V' + diag(c))^-1
                # where VDV' is the eigendecomposition of the prior cov, and b and c
                # are learned vectors.
                b, c = [tf.cast(x, cov_dtype) for x in (b, c)]
                Lambda, Q = eigh_with_safe_gradient(x_cov)
                Lambda = tf.abs(Lambda)
                Lambda_hat_b = 1e-6 + tf.pow(Lambda, b)
                prior = tf.matmul(
                    Q,
                    tf.matmul(tf.linalg.diag(Lambda_hat_b), Q, adjoint_b=True))
                prior_chol = tf.linalg.cholesky(
                    prior + 1e-6 * tf.eye(ndims, dtype=prior.dtype))
                prior_prec = tf.linalg.cholesky_solve(
                    prior_chol + 1e-6 * tf.eye(ndims, dtype=prior_chol.dtype),
                    tf.eye(ndims, dtype=prior_chol.dtype))

                approx_posterior_prec = prior_prec + tf.linalg.diag(c)
                approx_posterior_prec_chol = tf.linalg.cholesky(
                    approx_posterior_prec)
                approx_posterior_cov = tf.linalg.cholesky_solve(
                    approx_posterior_prec_chol + 1e-6 *
                    tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype),
                    tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype))
                cov_chol = tf.linalg.cholesky(
                    approx_posterior_cov +
                    1e-6 * tf.eye(ndims, dtype=approx_posterior_cov.dtype))
                cov_chol = tf.cast(cov_chol, tf.float32)
                prior_chol = tf.cast(prior_chol, tf.float32)
                scale_linop = tf.linalg.LinearOperatorLowerTriangular(cov_chol)

                reparam_loc = tf.multiply(x_loc, a)

                reparam_scale = tf.linalg.LinearOperatorComposition([
                    tf.linalg.LinearOperatorInversion(scale_linop),
                    tf.linalg.LinearOperatorLowerTriangular(prior_chol)
                ])
                kwargs_std = {}
                kwargs_std['loc'] = reparam_loc
                kwargs_std['scale'] = reparam_scale
                kwargs_std['name'] = name

                shift = x_loc - scale_linop.matvec(reparam_loc)
                b = tfb.AffineLinearOperator(scale=scale_linop, shift=shift)
                if 'value' in rv_kwargs:
                    kwargs_std['value'] = b.inverse(rv_kwargs['value'])
            else:
                raise Exception('unrecognized reparameterization strategy!')

            if rv_constructor.__name__.startswith('GaussianProcess'):
                rv_std = edward2.MultivariateNormalLinearOperator(
                    *rv_args, **kwargs_std)
            else:
                rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std)

            bijectors[name] = b
            return b.forward(rv_std)
        else:
            return interceptable(rv_constructor)(*rv_args, **rv_kwargs)
def ncp(rv_constructor, *rv_args, **rv_kwargs):
    base_bijector = None
    rv_value = rv_kwargs.pop('value', None)
    if rv_constructor.__name__ == 'TransformedDistribution':
        if (rv_args[1].__class__.__name__ == 'Invert'
                and rv_args[1].bijector.__class__.__name__ == 'SoftClip'):
            distribution = rv_args[0]
            base_bijector = rv_args[1].bijector
            rv_constructor = distribution.__class__
            rv_kwargs = distribution.parameters
            rv_args = rv_args[2:]
            # We were given a value for the transformed RV. Let's pretend it was
            # for the original.
            if rv_value is not None:
                rv_value = base_bijector.forward(rv_value)

    if (rv_constructor.__name__ == 'Normal'
            and not rv_kwargs['name'].startswith('y')):
        loc = rv_kwargs['loc']
        scale = rv_kwargs['scale']
        name = rv_kwargs['name']

        kwargs_std = {}
        kwargs_std['loc'] = tf.zeros_like(loc)
        kwargs_std['scale'] = tf.ones_like(scale)
        kwargs_std['name'] = name + '_std'

        b = tfb.AffineScalar(scale=scale, shift=loc)
        if rv_value is not None:
            rv_value = b.inverse(rv_value)

        kwargs_std['value'] = rv_value
        rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std)
        return b.forward(rv_std)

    elif ((rv_constructor.__name__.startswith('MultivariateNormal')
           or rv_constructor.__name__.startswith('GaussianProcess'))
          and not rv_kwargs['name'].startswith('y')):

        name = rv_kwargs['name']

        if rv_constructor.__name__.startswith('GaussianProcess'):
            gp_dist = rv_constructor(*rv_args, **rv_kwargs).distribution
            X = gp_dist._get_index_points()
            x_loc = gp_dist.mean_fn(X)
            x_cov = gp_dist._compute_covariance(index_points=X)
            shape = tfd.MultivariateNormalFullCovariance(x_loc,
                                                         x_cov).event_shape

        else:
            x_loc = rv_kwargs['loc']
            x_cov = rv_kwargs['covariance_matrix']
            shape = rv_constructor(*rv_args, **rv_kwargs).shape

        kwargs_std = {}

        kwargs_std['loc'] = tf.zeros(shape)
        kwargs_std['scale_diag'] = tf.ones(shape[0])
        kwargs_std['name'] = name + '_std'

        scale = tf.linalg.cholesky(x_cov + 1e-6 * tf.eye(tf.shape(x_cov)[-1]))
        b = tfb.AffineLinearOperator(
            scale=tf.linalg.LinearOperatorLowerTriangular(scale), shift=x_loc)

        if 'value' in rv_kwargs:
            kwargs_std['value'] = b.inverse(rv_kwargs['value'])
        rv_std = edward2.MultivariateNormalDiag(*rv_args, **kwargs_std)
        return b.forward(rv_std)

    else:
        return interceptable(rv_constructor)(*rv_args, **rv_kwargs)