Ejemplo n.º 1
0
 def log_jac_det(self, value, *inputs):
     return at.sum(value[..., self.diag_idxs], axis=-1)
Ejemplo n.º 2
0
 def logdet(self):
     return at.repeat(at.sum(at.log(self.scale)), self.z0.shape[0])
Ejemplo n.º 3
0
 def log_jac_det(self, value, *inputs):
     y = at.zeros(value.shape)
     return at.sum(y, axis=-1)
Ejemplo n.º 4
0
def MvNormalLogp():
    """Compute the log pdf of a multivariate normal distribution.

    This should be used in MvNormal.logp once Theano#5908 is released.

    Parameters
    ----------
    cov: aet.matrix
        The covariance matrix.
    delta: aet.matrix
        Array of deviations from the mean.
    """
    cov = aet.matrix("cov")
    cov.tag.test_value = floatX(np.eye(3))
    delta = aet.matrix("delta")
    delta.tag.test_value = floatX(np.zeros((2, 3)))

    solve_lower = Solve(A_structure="lower_triangular")
    solve_upper = Solve(A_structure="upper_triangular")
    cholesky = Cholesky(lower=True, on_error="nan")

    n, k = delta.shape
    n, k = f(n), f(k)
    chol_cov = cholesky(cov)
    diag = aet.diag(chol_cov)
    ok = aet.all(diag > 0)

    chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1))
    delta_trans = solve_lower(chol_cov, delta.T).T

    result = n * k * aet.log(f(2) * np.pi)
    result += f(2) * n * aet.sum(aet.log(diag))
    result += (delta_trans ** f(2)).sum()
    result = f(-0.5) * result
    logp = aet.switch(ok, result, -np.inf)

    def dlogp(inputs, gradients):
        (g_logp,) = gradients
        cov, delta = inputs

        g_logp.tag.test_value = floatX(1.0)
        n, k = delta.shape

        chol_cov = cholesky(cov)
        diag = aet.diag(chol_cov)
        ok = aet.all(diag > 0)

        chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1))
        delta_trans = solve_lower(chol_cov, delta.T).T

        inner = n * aet.eye(k) - aet.dot(delta_trans.T, delta_trans)
        g_cov = solve_upper(chol_cov.T, inner)
        g_cov = solve_upper(chol_cov.T, g_cov.T)

        tau_delta = solve_upper(chol_cov.T, delta_trans.T)
        g_delta = tau_delta.T

        g_cov = aet.switch(ok, g_cov, -np.nan)
        g_delta = aet.switch(ok, g_delta, -np.nan)

        return [-0.5 * g_cov * g_logp, -g_delta * g_logp]

    return OpFromGraph([cov, delta], [logp], grad_overrides=dlogp, inline=True)
Ejemplo n.º 5
0
def logsumexp(x, axis=None, keepdims=True):
    # Adapted from https://github.com/Theano/Theano/issues/1563
    x_max = at.max(x, axis=axis, keepdims=True)
    x_max = at.switch(at.isinf(x_max), 0, x_max)
    res = at.log(at.sum(at.exp(x - x_max), axis=axis, keepdims=True)) + x_max
    return res if keepdims else res.squeeze()
Ejemplo n.º 6
0
def total_norm_constraint(tensor_vars,
                          max_norm,
                          epsilon=1e-7,
                          return_norm=False):
    """Rescales a list of tensors based on their combined norm

    If the combined norm of the input tensors exceeds the threshold then all
    tensors are rescaled such that the combined norm is equal to the threshold.

    Scaling the norms of the gradients is often used when training recurrent
    neural networks [1]_.

    Parameters
    ----------
    tensor_vars: List of TensorVariables.
        Tensors to be rescaled.
    max_norm: float
        Threshold value for total norm.
    epsilon: scalar, optional
        Value used to prevent numerical instability when dividing by
        very small or zero norms.
    return_norm: bool
        If true the total norm is also returned.

    Returns
    -------
    tensor_vars_scaled: list of TensorVariables
        The scaled tensor variables.
    norm: Aesara scalar
        The combined norms of the input variables prior to rescaling,
        only returned if ``return_norms=True``.

    Examples
    --------
    >>> from lasagne.layers import InputLayer, DenseLayer
    >>> import lasagne
    >>> from lasagne.updates import sgd, total_norm_constraint
    >>> x = at.matrix()
    >>> y = at.ivector()
    >>> l_in = InputLayer((5, 10))
    >>> l1 = DenseLayer(l_in, num_units=7, nonlinearity=at.nnet.softmax)
    >>> output = lasagne.layers.get_output(l1, x)
    >>> cost = at.mean(at.nnet.categorical_crossentropy(output, y))
    >>> all_params = lasagne.layers.get_all_params(l1)
    >>> all_grads = at.grad(cost, all_params)
    >>> scaled_grads = total_norm_constraint(all_grads, 5)
    >>> updates = sgd(scaled_grads, all_params, learning_rate=0.1)

    Notes
    -----
    The total norm can be used to monitor training.

    References
    ----------
    .. [1] Sutskever, I., Vinyals, O., & Le, Q. V. (2014): Sequence to sequence
       learning with neural networks. In Advances in Neural Information
       Processing Systems (pp. 3104-3112).
    """
    norm = at.sqrt(sum(at.sum(tensor**2) for tensor in tensor_vars))
    dtype = np.dtype(aesara.config.floatX).type
    target_norm = at.clip(norm, 0, dtype(max_norm))
    multiplier = target_norm / (dtype(epsilon) + norm)
    tensor_vars_scaled = [step * multiplier for step in tensor_vars]

    if return_norm:
        return tensor_vars_scaled, norm
    else:
        return tensor_vars_scaled
Ejemplo n.º 7
0
def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7):
    """Max weight norm constraints and gradient clipping

    This takes a TensorVariable and rescales it so that incoming weight
    norms are below a specified constraint value. Vectors violating the
    constraint are rescaled so that they are within the allowed range.

    Parameters
    ----------
    tensor_var: TensorVariable
        Aesara expression for update, gradient, or other quantity.
    max_norm: scalar
        This value sets the maximum allowed value of any norm in
        `tensor_var`.
    norm_axes: sequence (list or tuple)
        The axes over which to compute the norm.  This overrides the
        default norm axes defined for the number of dimensions
        in `tensor_var`. When this is not specified and `tensor_var` is a
        matrix (2D), this is set to `(0,)`. If `tensor_var` is a 3D, 4D or
        5D tensor, it is set to a tuple listing all axes but axis 0. The
        former default is useful for working with dense layers, the latter
        is useful for 1D, 2D and 3D convolutional layers.
        (Optional)
    epsilon: scalar, optional
        Value used to prevent numerical instability when dividing by
        very small or zero norms.

    Returns
    -------
    TensorVariable
        Input `tensor_var` with rescaling applied to weight vectors
        that violate the specified constraints.

    Examples
    --------
    >>> param = aesara.shared(
    ...     np.random.randn(100, 200).astype(aesara.config.floatX))
    >>> update = param + 100
    >>> update = norm_constraint(update, 10)
    >>> func = aesara.function([], [], updates=[(param, update)])
    >>> # Apply constrained update
    >>> _ = func()
    >>> from lasagne.utils import compute_norms
    >>> norms = compute_norms(param.get_value())
    >>> np.isclose(np.max(norms), 10)
    True

    Notes
    -----
    When `norm_axes` is not specified, the axes over which the norm is
    computed depend on the dimensionality of the input variable. If it is
    2D, it is assumed to come from a dense layer, and the norm is computed
    over axis 0. If it is 3D, 4D or 5D, it is assumed to come from a
    convolutional layer and the norm is computed over all trailing axes
    beyond axis 0. For other uses, you should explicitly specify the axes
    over which to compute the norm using `norm_axes`.
    """
    ndim = tensor_var.ndim

    if norm_axes is not None:
        sum_over = tuple(norm_axes)
    elif ndim == 2:  # DenseLayer
        sum_over = (0, )
    elif ndim in [3, 4, 5]:  # Conv{1,2,3}DLayer
        sum_over = tuple(range(1, ndim))
    else:
        raise ValueError("Unsupported tensor dimensionality {}."
                         "Must specify `norm_axes`".format(ndim))

    dtype = np.dtype(aesara.config.floatX).type
    norms = at.sqrt(at.sum(at.sqr(tensor_var), axis=sum_over, keepdims=True))
    target_norms = at.clip(norms, 0, dtype(max_norm))
    constrained_output = tensor_var * (target_norms / (dtype(epsilon) + norms))

    return constrained_output
Ejemplo n.º 8
0
 def log_jac_det(self, value, *inputs):
     return at.sum(value[..., 1:], axis=-1)
Ejemplo n.º 9
0
 def backward(self, value, *inputs):
     remaining = 1 - at.sum(value[..., :], axis=-1, keepdims=True)
     return at.concatenate([value[..., :], remaining], axis=-1)
Ejemplo n.º 10
0
 def diag(self, X):
     X, Xc, _ = self._common(X, None)
     return at.sum(at.square(Xc), 1)
Ejemplo n.º 11
0
 def log_jac_det(self, value, *inputs):
     diag_idxs = self.param_extract_fn(inputs)
     return at.sum(value[diag_idxs])
Ejemplo n.º 12
0
    ]
    p2['f'] = 100 * (x_dev[1] - x_dev[0]**2)**2 + (1 - x_dev[0])**2
    p2['ce'] = None
    p2['neq'] = 0
    p2['ci'] = None
    p2['nineq'] = 0
    p2['init'] = np.random.randn(2).astype(float_dtype)
    p2['ground_truth'] = [np.array([1.0, 1.0], dtype=float_dtype)]

    #test_problems.append(p2)

    p3 = dict()
    p3['text_statements'] = [
        'maximize f(x, y) = x + y subject to x**2 + y**2 = 1'
    ]
    p3['f'] = -T.sum(x_dev)
    p3['ce'] = T.sum(x_dev**2) - 1.0
    p3['neq'] = 1
    p3['ci'] = None
    p3['nineq'] = 0
    p3['init'] = np.random.randn(2).astype(float_dtype)
    p3['ground_truth'] = [
        np.array([np.sqrt(2.0) / 2.0, np.sqrt(2.0) / 2.0], dtype=float_dtype)
    ]

    #test_problems.append(p3)

    p4 = dict()
    p4['text_statements'] = [
        'maximize f(x, y) = (x**2)*y subject to x**2 + y**2 = 3'
    ]
Ejemplo n.º 13
0
def logpt(
    var: Union[TensorVariable, List[TensorVariable]],
    rv_values: Optional[Union[TensorVariable, Dict[TensorVariable,
                                                   TensorVariable]]] = None,
    *,
    jacobian: bool = True,
    scaling: bool = True,
    transformed: bool = True,
    sum: bool = True,
    **kwargs,
) -> Union[TensorVariable, List[TensorVariable]]:
    """Create a measure-space (i.e. log-likelihood) graph for a random variable
    or a list of random variables at a given point.

    The input `var` determines which log-likelihood graph is used and
    `rv_value` is that graph's input parameter.  For example, if `var` is
    the output of a ``NormalRV`` ``Op``, then the output is a graph of the
    density function for `var` set to the value `rv_value`.

    Parameters
    ==========
    var
        The `RandomVariable` output that determines the log-likelihood graph.
        Can also be a list of variables. The final log-likelihood graph will
        be the sum total of all individual log-likelihood graphs of variables
        in the list.
    rv_values
        A variable, or ``dict`` of variables, that represents the value of
        `var` in its log-likelihood.  If no `rv_value` is provided,
        ``var.tag.value_var`` will be checked and, when available, used.
    jacobian
        Whether or not to include the Jacobian term.
    scaling
        A scaling term to apply to the generated log-likelihood graph.
    transformed
        Apply transforms.
    sum
        Sum the log-likelihood or return each term as a separate list item.

    """
    # TODO: In future when we drop support for tag.value_var most of the following
    # logic can be removed and logpt can just be a wrapper function that calls aeppl's
    # joint_logprob directly.

    # If var is not a list make it one.
    if not isinstance(var, (list, tuple)):
        var = [var]

    # If logpt isn't provided values it is assumed that the tagged value var or
    # observation is the value variable for that particular RV.
    if rv_values is None:
        rv_values = {}
        for rv in var:
            value_var = getattr(rv.tag, "observations",
                                getattr(rv.tag, "value_var", None))
            if value_var is None:
                raise ValueError(f"No value variable found for var {rv}")
            rv_values[rv] = value_var
    # Else we assume we were given a single rv and respective value
    elif not isinstance(rv_values, Mapping):
        if len(var) == 1:
            rv_values = {
                var[0]: at.as_tensor_variable(rv_values).astype(var[0].type)
            }
        else:
            raise ValueError(
                "rv_values must be a dict if more than one var is requested")

    if scaling:
        rv_scalings = {}
        for rv, value_var in rv_values.items():
            rv_scalings[value_var] = _get_scaling(
                getattr(rv.tag, "total_size", None), value_var.shape,
                value_var.ndim)

    # Aeppl needs all rv-values pairs, not just that of the requested var.
    # Hence we iterate through the graph to collect them.
    tmp_rvs_to_values = rv_values.copy()
    for node in io_toposort(graph_inputs(var), var):
        try:
            curr_vars = [node.default_output()]
        except ValueError:
            curr_vars = node.outputs
        for curr_var in curr_vars:
            if curr_var in tmp_rvs_to_values:
                continue
            # Check if variable has a value variable
            value_var = getattr(curr_var.tag, "observations",
                                getattr(curr_var.tag, "value_var", None))
            if value_var is not None:
                tmp_rvs_to_values[curr_var] = value_var

    # After collecting all necessary rvs and values, we check for any value transforms
    transform_map = {}
    if transformed:
        for rv, value_var in tmp_rvs_to_values.items():
            if hasattr(value_var.tag, "transform"):
                transform_map[value_var] = value_var.tag.transform
            # If the provided value_variable does not have transform information, we
            # check if the original `rv.tag.value_var` does.
            # TODO: This logic should be replaced by an explicit dict of
            #  `{value_var: transform}` similar to `rv_values`.
            else:
                original_value_var = getattr(rv.tag, "value_var", None)
                if original_value_var is not None and hasattr(
                        original_value_var.tag, "transform"):
                    transform_map[value_var] = original_value_var.tag.transform

    transform_opt = TransformValuesOpt(transform_map)
    temp_logp_var_dict = factorized_joint_logprob(tmp_rvs_to_values,
                                                  extra_rewrites=transform_opt,
                                                  use_jacobian=jacobian,
                                                  **kwargs)

    # aeppl returns the logpt for every single value term we provided to it. This includes
    # the extra values we plugged in above, so we filter those we actually wanted in the
    # same order they were given in.
    logp_var_dict = {}
    for value_var in rv_values.values():
        logp_var_dict[value_var] = temp_logp_var_dict[value_var]

    if scaling:
        for value_var in logp_var_dict.keys():
            if value_var in rv_scalings:
                logp_var_dict[value_var] *= rv_scalings[value_var]

    if sum:
        logp_var = at.sum(
            [at.sum(factor) for factor in logp_var_dict.values()])
    else:
        logp_var = list(logp_var_dict.values())
        # TODO: deprecate special behavior when only one variable is requested and
        #  always return a list. This is here for backwards compatibility as logpt
        #  started as a replacement to factor.logpt, but it should now be considered an
        #  internal function reached only via model.logp* methods.
        if len(logp_var) == 1:
            logp_var = logp_var[0]

    return logp_var
Ejemplo n.º 14
0
def logcdfpt(
    var: TensorVariable,
    rv_values: Optional[Union[TensorVariable, Dict[TensorVariable, TensorVariable]]] = None,
    *,
    scaling: bool = True,
    sum: bool = True,
    **kwargs,
) -> TensorVariable:
    """Create a measure-space (i.e. log-cdf) graph for a random variable at a given point.

    Parameters
    ==========
    var
        The `RandomVariable` output that determines the log-likelihood graph.
    rv_values
        A variable, or ``dict`` of variables, that represents the value of
        `var` in its log-likelihood.  If no `rv_value` is provided,
        ``var.tag.value_var`` will be checked and, when available, used.
    jacobian
        Whether or not to include the Jacobian term.
    scaling
        A scaling term to apply to the generated log-likelihood graph.
    transformed
        Apply transforms.
    sum
        Sum the log-likelihood.

    """
    if not isinstance(rv_values, Mapping):
        rv_values = {var: rv_values} if rv_values is not None else {}

    rv_var, rv_value_var = extract_rv_and_value_vars(var)

    rv_value = rv_values.get(rv_var, rv_value_var)

    if rv_var is not None and rv_value is None:
        raise ValueError(f"No value variable specified or associated with {rv_var}")

    if rv_value is not None:
        rv_value = at.as_tensor(rv_value)

        if rv_var is not None:
            # Make sure that the value is compatible with the random variable
            rv_value = rv_var.type.filter_variable(rv_value.astype(rv_var.dtype))

        if rv_value_var is None:
            rv_value_var = rv_value

    rv_node = rv_var.owner

    rng, size, dtype, *dist_params = rv_node.inputs

    # Here, we plug the actual random variable into the log-likelihood graph,
    # because we want a log-likelihood graph that only contains
    # random variables.  This is important, because a random variable's
    # parameters can contain random variables themselves.
    # Ultimately, with a graph containing only random variables and
    # "deterministics", we can simply replace all the random variables with
    # their value variables and be done.
    tmp_rv_values = rv_values.copy()
    tmp_rv_values[rv_var] = rv_var

    logp_var = _logcdf(rv_node.op, rv_var, tmp_rv_values, *dist_params, **kwargs)

    transform = getattr(rv_value_var.tag, "transform", None) if rv_value_var else None

    # Replace random variables with their value variables
    replacements = rv_values.copy()
    replacements.update({rv_var: rv_value, rv_value_var: rv_value})

    (logp_var,), _ = rvs_to_value_vars(
        (logp_var,),
        apply_transforms=False,
        initial_replacements=replacements,
    )

    if sum:
        logp_var = at.sum(logp_var)

    if scaling:
        logp_var *= _get_scaling(
            getattr(rv_var.tag, "total_size", None), rv_value.shape, rv_value.ndim
        )

    # Recompute test values for the changes introduced by the replacements
    # above.
    if config.compute_test_value != "off":
        for node in io_toposort(graph_inputs((logp_var,)), (logp_var,)):
            compute_test_value(node)

    if rv_var.name is not None:
        logp_var.name = f"__logp_{rv_var.name}"

    return logp_var
Ejemplo n.º 15
0
def logpt(
    var: TensorVariable,
    rv_values: Optional[Union[TensorVariable, Dict[TensorVariable, TensorVariable]]] = None,
    *,
    jacobian: bool = True,
    scaling: bool = True,
    transformed: bool = True,
    sum: bool = True,
    **kwargs,
) -> TensorVariable:
    """Create a measure-space (i.e. log-likelihood) graph for a random variable
    or a list of random variables at a given point.

    The input `var` determines which log-likelihood graph is used and
    `rv_value` is that graph's input parameter.  For example, if `var` is
    the output of a ``NormalRV`` ``Op``, then the output is a graph of the
    density function for `var` set to the value `rv_value`.

    Parameters
    ==========
    var
        The `RandomVariable` output that determines the log-likelihood graph.
        Can also be a list of variables. The final log-likelihood graph will
        be the sum total of all individual log-likelihood graphs of variables
        in the list.
    rv_values
        A variable, or ``dict`` of variables, that represents the value of
        `var` in its log-likelihood.  If no `rv_value` is provided,
        ``var.tag.value_var`` will be checked and, when available, used.
    jacobian
        Whether or not to include the Jacobian term.
    scaling
        A scaling term to apply to the generated log-likelihood graph.
    transformed
        Apply transforms.
    sum
        Sum the log-likelihood.

    """
    # TODO: In future when we drop support for tag.value_var most of the following
    # logic can be removed and logpt can just be a wrapper function that calls aeppl's
    # joint_logprob directly.

    # If var is not a list make it one.
    if not isinstance(var, list):
        var = [var]

    # If logpt isn't provided values and the variable (provided in var)
    # is an RV, it is assumed that the tagged value var or observation is
    # the value variable for that particular RV.
    if rv_values is None:
        rv_values = {}
        for _var in var:
            if isinstance(_var.owner.op, RandomVariable):
                rv_value_var = getattr(
                    _var.tag, "observations", getattr(_var.tag, "value_var", _var)
                )
                rv_values = {_var: rv_value_var}
    elif not isinstance(rv_values, Mapping):
        # Else if we're given a single value and a single variable we assume a mapping among them.
        rv_values = (
            {var[0]: at.as_tensor_variable(rv_values).astype(var[0].type)} if len(var) == 1 else {}
        )

    # Since the filtering of logp graph is based on value variables
    # provided to this function
    if not rv_values:
        warnings.warn("No value variables provided the logp will be an empty graph")

    if scaling:
        rv_scalings = {}
        for _var in var:
            rv_value_var = getattr(_var.tag, "observations", getattr(_var.tag, "value_var", _var))
            rv_scalings[rv_value_var] = _get_scaling(
                getattr(_var.tag, "total_size", None), rv_value_var.shape, rv_value_var.ndim
            )

    # Unlike aeppl, PyMC's logpt is expected to plug in the values variables to corresponding
    # RVs automatically unless the values are explicity set to None. Hence we iterate through
    # the graph to find RVs and construct a new RVs to values dictionary.
    tmp_rvs_to_values = rv_values.copy()
    transform_map = {}
    for node in io_toposort(graph_inputs(var), var):
        if isinstance(node.op, RandomVariable):
            curr_var = node.out
            rv_value_var = getattr(
                curr_var.tag, "observations", getattr(curr_var.tag, "value_var", curr_var)
            )
            rv_value = rv_values.get(curr_var, rv_value_var)
            tmp_rvs_to_values[curr_var] = rv_value
            # Along with value variables we also check for transforms if any.
            if hasattr(rv_value_var.tag, "transform") and transformed:
                transform_map[rv_value] = rv_value_var.tag.transform
        # The condition below is a hackish way of excluding the value variable for the
        # RV being indexed in case of Advanced Indexing of RVs. It gets added by the
        # logic above but aeppl does not expect us to include it in the dictionary of
        # {RV:values} given to it.
        if isinstance(node.op, subtensor_types):
            curr_var = node.out
            if (
                curr_var in tmp_rvs_to_values.keys()
                and curr_var.owner.inputs[0] in tmp_rvs_to_values.keys()
            ):
                tmp_rvs_to_values.pop(curr_var.owner.inputs[0])

    transform_opt = TransformValuesOpt(transform_map)
    temp_logp_var_dict = factorized_joint_logprob(
        tmp_rvs_to_values, extra_rewrites=transform_opt, use_jacobian=jacobian, **kwargs
    )

    # aeppl returns the logpt for every single value term we provided to it. This includes
    # the extra values we plugged in above so we need to filter those out.
    logp_var_dict = {}
    for value_var, _logp in temp_logp_var_dict.items():
        if value_var in rv_values.values():
            logp_var_dict[value_var] = _logp

    # If it's an empty dictionary the logp is None
    if not logp_var_dict:
        logp_var = None
    else:
        # Otherwise apply appropriate scalings and at.add and/or at.sum the
        # graphs accordingly.
        if scaling:
            for _value in logp_var_dict.keys():
                if _value in rv_scalings:
                    logp_var_dict[_value] *= rv_scalings[_value]

        if len(logp_var_dict) == 1:
            logp_var_dict = tuple(logp_var_dict.values())[0]
            if sum:
                logp_var = at.sum(logp_var_dict)
            else:
                logp_var = logp_var_dict
        else:
            if sum:
                logp_var = at.sum([at.sum(factor) for factor in logp_var_dict.values()])
            else:
                logp_var = at.add(*logp_var_dict.values())

        # Recompute test values for the changes introduced by the replacements
        # above.
        if config.compute_test_value != "off":
            for node in io_toposort(graph_inputs((logp_var,)), (logp_var,)):
                compute_test_value(node)

    return logp_var
Ejemplo n.º 16
0
def logpt(
    var: TensorVariable,
    rv_values: Optional[Union[TensorVariable, Dict[TensorVariable,
                                                   TensorVariable]]] = None,
    *,
    jacobian: bool = True,
    scaling: bool = True,
    transformed: bool = True,
    cdf: bool = False,
    sum: bool = False,
    **kwargs,
) -> TensorVariable:
    """Create a measure-space (i.e. log-likelihood) graph for a random variable at a given point.

    The input `var` determines which log-likelihood graph is used and
    `rv_value` is that graph's input parameter.  For example, if `var` is
    the output of a ``NormalRV`` ``Op``, then the output is a graph of the
    density function for `var` set to the value `rv_value`.

    Parameters
    ==========
    var
        The `RandomVariable` output that determines the log-likelihood graph.
    rv_values
        A variable, or ``dict`` of variables, that represents the value of
        `var` in its log-likelihood.  If no `rv_value` is provided,
        ``var.tag.value_var`` will be checked and, when available, used.
    jacobian
        Whether or not to include the Jacobian term.
    scaling
        A scaling term to apply to the generated log-likelihood graph.
    transformed
        Apply transforms.
    cdf
        Return the log cumulative distribution.
    sum
        Sum the log-likelihood.

    """
    if not isinstance(rv_values, Mapping):
        rv_values = {var: rv_values} if rv_values is not None else {}

    rv_var, rv_value_var = extract_rv_and_value_vars(var)

    rv_value = rv_values.get(rv_var, rv_value_var)

    if rv_var is not None and rv_value is None:
        raise ValueError(
            f"No value variable specified or associated with {rv_var}")

    if rv_value is not None:
        rv_value = at.as_tensor(rv_value)

        if rv_var is not None:
            # Make sure that the value is compatible with the random variable
            rv_value = rv_var.type.filter_variable(
                rv_value.astype(rv_var.dtype))

        if rv_value_var is None:
            rv_value_var = rv_value

    if rv_var is None:
        if var.owner is not None:
            return _logp(
                var.owner.op,
                var,
                rv_values,
                *var.owner.inputs,
                jacobian=jacobian,
                scaling=scaling,
                transformed=transformed,
                cdf=cdf,
                sum=sum,
            )

        return at.zeros_like(var)

    rv_node = rv_var.owner

    rng, size, dtype, *dist_params = rv_node.inputs

    # Here, we plug the actual random variable into the log-likelihood graph,
    # because we want a log-likelihood graph that only contains
    # random variables.  This is important, because a random variable's
    # parameters can contain random variables themselves.
    # Ultimately, with a graph containing only random variables and
    # "deterministics", we can simply replace all the random variables with
    # their value variables and be done.
    tmp_rv_values = rv_values.copy()
    tmp_rv_values[rv_var] = rv_var

    if not cdf:
        logp_var = _logp(rv_node.op, rv_var, tmp_rv_values, *dist_params,
                         **kwargs)
    else:
        logp_var = _logcdf(rv_node.op, rv_var, tmp_rv_values, *dist_params,
                           **kwargs)

    transform = getattr(rv_value_var.tag, "transform",
                        None) if rv_value_var else None

    if transform and transformed and not cdf and jacobian:
        transformed_jacobian = transform.jacobian_det(rv_var, rv_value)
        if transformed_jacobian:
            if logp_var.ndim > transformed_jacobian.ndim:
                logp_var = logp_var.sum(axis=-1)
            logp_var += transformed_jacobian

    # Replace random variables with their value variables
    replacements = rv_values.copy()
    replacements.update({rv_var: rv_value, rv_value_var: rv_value})

    (logp_var, ), _ = rvs_to_value_vars(
        (logp_var, ),
        apply_transforms=transformed and not cdf,
        initial_replacements=replacements,
    )

    if sum:
        logp_var = at.sum(logp_var)

    if scaling:
        logp_var *= _get_scaling(getattr(rv_var.tag, "total_size", None),
                                 rv_value.shape, rv_value.ndim)

    # Recompute test values for the changes introduced by the replacements
    # above.
    if config.compute_test_value != "off":
        for node in io_toposort(graph_inputs((logp_var, )), (logp_var, )):
            compute_test_value(node)

    if rv_var.name is not None:
        logp_var.name = "__logp_%s" % rv_var.name

    return logp_var