Exemple #1
0
def resize_from_observed(
        observed, ndim_implied: int
) -> Tuple[int, StrongSize, Union[np.ndarray, Variable]]:
    """Determines a potential resize shape from observations.

    Parameters
    ----------
    observed : scalar, array-like
        The value of the `observed` kwarg to the RV creation.
    ndim_implied : int
        Number of RV dimensions that were implied from its inputs alone.

    Returns
    -------
    ndim_resize : int
        Number of dimensions that should be added through resizing.
    resize_shape : array-like
        The shape of the new dimensions.
    observed : scalar, array-like
        Observations as numpy array or `Variable`.
    """
    if not hasattr(observed, "shape"):
        observed = pandas_to_array(observed)
    ndim_resize = observed.ndim - ndim_implied
    resize_shape = tuple(observed.shape[d] for d in range(ndim_resize))
    return ndim_resize, resize_shape, observed
Exemple #2
0
    def __new__(self, name, value, *, dims=None, export_index_as_coords=False):
        if isinstance(value, list):
            value = np.array(value)

        # Add data container to the named variables of the model.
        try:
            model = pm.Model.get_context()
        except TypeError:
            raise TypeError(
                "No model on context stack, which is needed to instantiate a data container. "
                "Add variable inside a 'with model:' block.")
        name = model.name_for(name)

        # `pandas_to_array` takes care of parameter `value` and
        # transforms it to something digestible for pymc
        shared_object = aesara.shared(pandas_to_array(value), name)

        if isinstance(dims, str):
            dims = (dims, )
        if not (dims is None or len(dims) == shared_object.ndim):
            raise pm.exceptions.ShapeError(
                "Length of `dims` must match the dimensions of the dataset.",
                actual=len(dims),
                expected=shared_object.ndim,
            )

        coords = self.set_coords(model, value, dims)

        if export_index_as_coords:
            model.add_coords(coords)
        elif dims:
            # Register new dimension lengths
            for d, dname in enumerate(dims):
                if not dname in model.dim_lengths:
                    model.add_coord(dname,
                                    values=None,
                                    length=shared_object.shape[d])

        # To draw the node for this variable in the graphviz Digraph we need
        # its shape.
        # XXX: This needs to be refactored
        # shared_object.dshape = tuple(shared_object.shape.eval())
        # if dims is not None:
        #     shape_dims = model.shape_from_dims(dims)
        #     if shared_object.dshape != shape_dims:
        #         raise pm.exceptions.ShapeError(
        #             "Data shape does not match with specified `dims`.",
        #             actual=shared_object.dshape,
        #             expected=shape_dims,
        #         )

        model.add_random_variable(shared_object, dims=dims)

        return shared_object
Exemple #3
0
def test_pandas_to_array_pandas_index():
    data = pd.Index([1, 2, 3])
    result = pandas_to_array(data)
    expected = np.array([1, 2, 3])
    np.testing.assert_array_equal(result, expected)
Exemple #4
0
def Data(
    name: str,
    value,
    *,
    dims: Optional[Sequence[str]] = None,
    export_index_as_coords=False,
    mutable: Optional[bool] = None,
    **kwargs,
) -> Union[SharedVariable, TensorConstant]:
    """Data container that registers a data variable with the model.

    Depending on the ``mutable`` setting (default: True), the variable
    is registered as a ``SharedVariable``, enabling it to be altered
    in value and shape, but NOT in dimensionality using ``pm.set_data()``.

    Parameters
    ----------
    name: str
        The name for this variable
    value: {List, np.ndarray, pd.Series, pd.Dataframe}
        A value to associate with this variable
    mutable : bool, optional
        Switches between creating a ``SharedVariable`` (``mutable=True``, default)
        vs. creating a ``TensorConstant`` (``mutable=False``).
        Consider using ``pm.ConstantData`` or ``pm.MutableData`` as less verbose
        alternatives to ``pm.Data(..., mutable=...)``.
    dims: {str, tuple of str}, optional, default=None
        Dimension names of the random variables (as opposed to the shapes of these
        random variables). Use this when `value` is a pandas Series or DataFrame. The
        `dims` will then be the name of the Series / DataFrame's columns. See ArviZ
        documentation for more information about dimensions and coordinates:
        :ref:`arviz:quickstart`.
    export_index_as_coords: bool, optional, default=False
        If True, the `Data` container will try to infer what the coordinates should be
        if there is an index in `value`.
    **kwargs: dict, optional
        Extra arguments passed to :func:`aesara.shared`.

    Examples
    --------

    >>> import pymc as pm
    >>> import numpy as np
    >>> # We generate 10 datasets
    >>> true_mu = [np.random.randn() for _ in range(10)]
    >>> observed_data = [mu + np.random.randn(20) for mu in true_mu]

    >>> with pm.Model() as model:
    ...     data = pm.MutableData('data', observed_data[0])
    ...     mu = pm.Normal('mu', 0, 10)
    ...     pm.Normal('y', mu=mu, sigma=1, observed=data)

    >>> # Generate one trace for each dataset
    >>> idatas = []
    >>> for data_vals in observed_data:
    ...     with model:
    ...         # Switch out the observed dataset
    ...         model.set_data('data', data_vals)
    ...         idatas.append(pm.sample())

    To set the value of the data container variable, check out
    :func:`pymc.model.set_data()`.

    For more information, take a look at this example notebook
    https://docs.pymc.io/notebooks/data_container.html
    """
    if isinstance(value, list):
        value = np.array(value)

    # Add data container to the named variables of the model.
    try:
        model = pm.Model.get_context()
    except TypeError:
        raise TypeError(
            "No model on context stack, which is needed to instantiate a data container. "
            "Add variable inside a 'with model:' block.")
    name = model.name_for(name)

    # `pandas_to_array` takes care of parameter `value` and
    # transforms it to something digestible for Aesara.
    arr = pandas_to_array(value)

    if mutable is None:
        current = version.Version(pm.__version__)
        mutable = current.major == 4 and current.minor < 1
        if mutable:
            warnings.warn(
                "The `mutable` kwarg was not specified. Currently it defaults to `pm.Data(mutable=True)`,"
                " which is equivalent to using `pm.MutableData()`."
                " In v4.1.0 the default will change to `pm.Data(mutable=False)`, equivalent to `pm.ConstantData`."
                " Set `pm.Data(..., mutable=False/True)`, or use `pm.ConstantData`/`pm.MutableData`.",
                FutureWarning,
            )
    if mutable:
        x = aesara.shared(arr, name, **kwargs)
    else:
        x = at.as_tensor_variable(arr, name, **kwargs)

    if isinstance(dims, str):
        dims = (dims, )
    if not (dims is None or len(dims) == x.ndim):
        raise pm.exceptions.ShapeError(
            "Length of `dims` must match the dimensions of the dataset.",
            actual=len(dims),
            expected=x.ndim,
        )

    coords = determine_coords(model, value, dims)

    if export_index_as_coords:
        model.add_coords(coords)
    elif dims:
        # Register new dimension lengths
        for d, dname in enumerate(dims):
            if not dname in model.dim_lengths:
                model.add_coord(dname, values=None, length=x.shape[d])

    model.add_random_variable(x, dims=dims)

    return x