def get_steps( steps: Optional[Union[int, np.ndarray, TensorVariable]], *, shape: Optional[Shape] = None, dims: Optional[Dims] = None, observed: Optional[Any] = None, step_shape_offset: int = 0, ): """Extract number of steps from shape / dims / observed information Parameters ---------- steps: User specified steps for timeseries distribution shape: User specified shape for timeseries distribution dims: User specified dims for timeseries distribution observed: User specified observed data from timeseries distribution step_shape_offset: Difference between last shape dimension and number of steps in timeseries distribution, defaults to 0 Returns ------- steps Steps, if specified directly by user, or inferred from the last dimension of shape / dims / observed. When two sources of step information are provided, a symbolic Assert is added to ensure they are consistent. """ inferred_steps = None if shape is not None: shape = to_tuple(shape) if shape[-1] is not ...: inferred_steps = shape[-1] - step_shape_offset if inferred_steps is None and dims is not None: dims = convert_dims(dims) if dims[-1] is not ...: model = modelcontext(None) inferred_steps = model.dim_lengths[dims[-1]] - step_shape_offset if inferred_steps is None and observed is not None: observed = convert_observed_data(observed) inferred_steps = observed.shape[-1] - step_shape_offset if inferred_steps is None: inferred_steps = steps # If there are two sources of information for the steps, assert they are consistent elif steps is not None: inferred_steps = Assert(msg="Steps do not match last shape dimension")( inferred_steps, at.eq(inferred_steps, steps) ) return inferred_steps
def resize_from_observed( observed, ndim_implied: int) -> Tuple[StrongSize, Union[np.ndarray, Variable]]: """Determines a potential resize shape from observations. Parameters ---------- observed : scalar, array-like The value of the `observed` kwarg to the RV creation. ndim_implied : int Number of RV dimensions that were implied from its inputs alone. Returns ------- resize_shape : array-like Shape of new dimensions that should be prepended. observed : scalar, array-like Observations as numpy array or `Variable`. """ if not hasattr(observed, "shape"): observed = convert_observed_data(observed) ndim_resize = observed.ndim - ndim_implied resize_shape = tuple(observed.shape[d] for d in range(ndim_resize)) return resize_shape, observed
def test_pandas_to_array_pandas_index(): pd = pytest.importorskip("pandas") data = pd.Index([1, 2, 3]) result = convert_observed_data(data) expected = np.array([1, 2, 3]) np.testing.assert_array_equal(result, expected)
def Data( name: str, value, *, dims: Optional[Sequence[str]] = None, coords: Optional[Dict[str, Sequence]] = None, export_index_as_coords=False, mutable: Optional[bool] = None, **kwargs, ) -> Union[SharedVariable, TensorConstant]: """Data container that registers a data variable with the model. Depending on the ``mutable`` setting (default: True), the variable is registered as a :class:`~aesara.compile.sharedvalue.SharedVariable`, enabling it to be altered in value and shape, but NOT in dimensionality using :func:`pymc.set_data`. To set the value of the data container variable, check out :func:`pymc.Model.set_data`. For more information, read the notebook :ref:`nb:data_container`. Parameters ---------- name : str The name for this variable. value : array_like or pandas.Series, pandas.Dataframe A value to associate with this variable. dims : str or tuple of str, optional Dimension names of the random variables (as opposed to the shapes of these random variables). Use this when ``value`` is a pandas Series or DataFrame. The ``dims`` will then be the name of the Series / DataFrame's columns. See ArviZ documentation for more information about dimensions and coordinates: :ref:`arviz:quickstart`. If this parameter is not specified, the random variables will not have dimension names. coords : dict, optional Coordinate values to set for new dimensions introduced by this ``Data`` variable. export_index_as_coords : bool, default=False If True, the ``Data`` container will try to infer what the coordinates and dimension names should be if there is an index in ``value``. mutable : bool, optional Switches between creating a :class:`~aesara.compile.sharedvalue.SharedVariable` (``mutable=True``) vs. creating a :class:`~aesara.tensor.TensorConstant` (``mutable=False``). Consider using :class:`pymc.ConstantData` or :class:`pymc.MutableData` as less verbose alternatives to ``pm.Data(..., mutable=...)``. If this parameter is not specified, the value it takes will depend on the version of the package. Since ``v4.1.0`` the default value is ``mutable=False``, with previous versions having ``mutable=True``. **kwargs : dict, optional Extra arguments passed to :func:`aesara.shared`. Examples -------- >>> import pymc as pm >>> import numpy as np >>> # We generate 10 datasets >>> true_mu = [np.random.randn() for _ in range(10)] >>> observed_data = [mu + np.random.randn(20) for mu in true_mu] >>> with pm.Model() as model: ... data = pm.MutableData('data', observed_data[0]) ... mu = pm.Normal('mu', 0, 10) ... pm.Normal('y', mu=mu, sigma=1, observed=data) >>> # Generate one trace for each dataset >>> idatas = [] >>> for data_vals in observed_data: ... with model: ... # Switch out the observed dataset ... model.set_data('data', data_vals) ... idatas.append(pm.sample()) """ if coords is None: coords = {} if isinstance(value, list): value = np.array(value) # Add data container to the named variables of the model. model = pm.Model.get_context(error_if_none=False) if model is None: raise TypeError( "No model on context stack, which is needed to instantiate a data container. " "Add variable inside a 'with model:' block." ) name = model.name_for(name) # `convert_observed_data` takes care of parameter `value` and # transforms it to something digestible for Aesara. arr = convert_observed_data(value) if mutable is None: major, minor = (int(v) for v in pm.__version__.split(".")[:2]) mutable = major == 4 and minor < 1 if mutable: warnings.warn( "The `mutable` kwarg was not specified. Currently it defaults to `pm.Data(mutable=True)`," " which is equivalent to using `pm.MutableData()`." " In v4.1.0 the default will change to `pm.Data(mutable=False)`, equivalent to `pm.ConstantData`." " Set `pm.Data(..., mutable=False/True)`, or use `pm.ConstantData`/`pm.MutableData`.", FutureWarning, ) if mutable: x = aesara.shared(arr, name, **kwargs) else: x = at.as_tensor_variable(arr, name, **kwargs) if isinstance(dims, str): dims = (dims,) if not (dims is None or len(dims) == x.ndim): raise pm.exceptions.ShapeError( "Length of `dims` must match the dimensions of the dataset.", actual=len(dims), expected=x.ndim, ) # Optionally infer coords and dims from the input value. if export_index_as_coords: coords, dims = determine_coords(model, value, dims) if dims: if not mutable: # Use the dimension lengths from the before it was tensorified. # These can still be tensors, but in many cases they are numeric. xshape = np.shape(arr) else: xshape = x.shape # Register new dimension lengths for d, dname in enumerate(dims): if not dname in model.dim_lengths: model.add_coord( name=dname, # Note: Coordinate values can't be taken from # the value, because it could be N-dimensional. values=coords.get(dname, None), mutable=mutable, length=xshape[d], ) model.add_random_variable(x, dims=dims) return x