Ejemplo n.º 1
0
    def __init__(self,
                 df,
                 col_t,
                 col_obs,
                 col_covs,
                 col_group,
                 param_names,
                 link_fun,
                 var_link_fun,
                 fun,
                 col_obs_se=None,
                 loss_fun=None,
                 scale_obs_se=True):
        """Constructor function of LogisticCurveModel.

        Args:
            df (pandas.DataFrame):
                Data frame that contains all the information.
            col_t (str):
                The column name in the data frame that contains independent
                variable.
            col_obs (str):
                The column name in the data frame that contains dependent
                variable.
            col_covs (list{list{str}}):
                List of list of column name in the data frame used as
                covariates. The outer list len should be number of parameters.
            col_group (str):
                The column name in the data frame that contains the grouping
                information.
            param_names (list{str}):
                Names of the parameters in the specific functional form.
            link_fun (list{function}):
                List of link functions for each parameter.
            var_link_fun (list{function}):
                List of link functions for the variables including fixed effects
                and random effects.
            fun (callable):
                Specific functional form that the curve will fit to.
            col_obs_se (str | None, optional):
                Column name of the observation standard error. When `None`,
                assume all the observation standard error to be all one.
            loss_fun(callable | None, optional):
                Loss function, if None, use Gaussian distribution.
            scale_obs_se (bool, optional):
                If scale the observation standard deviation by the absolute mean
                of the observations.
        """
        # input data
        self.df = df.copy()
        self.col_t = col_t
        self.col_obs = col_obs
        self.col_covs = col_covs
        self.col_group = col_group
        self.param_names = np.array(param_names)
        self.link_fun = link_fun
        self.var_link_fun = var_link_fun
        self.fun = fun
        self.loss_fun = normal_loss if loss_fun is None else loss_fun
        self.col_obs_se = col_obs_se

        self.group_names = np.sort(self.df[self.col_group].unique())

        # dimensions
        self.num_obs = self.df.shape[0]
        self.num_params = self.param_names.size
        self.num_groups = self.group_names.size

        # sort the dataframe by group
        self.df.sort_values([self.col_group, self.col_t], inplace=True)

        # extracting information
        self.obs = self.df[self.col_obs].values
        self.obs_se = np.ones(self.num_obs) if self.col_obs_se is None else \
            self.df[col_obs_se].values

        self.scale_obs_se = scale_obs_se
        if self.scale_obs_se:
            self.obs_se *= np.abs(self.obs).mean() / self.obs_se.mean()

        self.t = self.df[self.col_t].values
        self.group = self.df[self.col_group].values
        self.covs = [df[name].values for name in self.col_covs]
        self.fe_sizes = np.array([cov.shape[1] for cov in self.covs])
        self.fe_idx = utils.sizes_to_indices(self.fe_sizes)
        self.num_fe = self.fe_sizes.sum()
        self.num_re = self.num_groups * self.num_fe

        # parameter information
        self.param_idx = {name: i for i, name in enumerate(self.param_names)}

        # group information
        self.group_sizes = {
            name: np.sum(self.group == name)
            for name in self.group_names
        }
        self.order_group_sizes = np.array(
            [self.group_sizes[name] for name in self.group_names])
        self.order_group_idx = np.cumsum(self.order_group_sizes) - 1
        group_idx = utils.sizes_to_indices(
            np.array([self.group_sizes[name] for name in self.group_names]))
        self.group_idx = {
            name: group_idx[i]
            for i, name in enumerate(self.group_names)
        }

        # place holder
        self.param_shared = []
        self.result = None
        self.params = None
        self.fe_gprior = np.array([[0.0, np.inf]] * self.num_fe)
        self.re_gprior = np.array([[0.0, np.inf]] * self.num_fe)
        self.fun_gprior = None
Ejemplo n.º 2
0
#! /usr/bin/env python3
"""
{begin_markdown sizes_to_indices_xam}
{spell_markdown utils}

# Example and Test of sizes_to_indices

## Function Documentation
[size_to_indices](sizes_to_indices.md)

## Example Source Code
```python"""
import numpy

from curvefit.core.utils import sizes_to_indices

sizes = [2, 4, 3]
indices = sizes_to_indices(sizes)
assert all(indices[0] == numpy.array([0, 1]))
assert all(indices[1] == numpy.array([2, 3, 4, 5]))
assert all(indices[2] == numpy.array([6, 7, 8]))

print('sizes_to_indices.py: OK')
"""```
{end_markdown sizes_to_indices_xam}
"""
Ejemplo n.º 3
0
def test_sizes_to_indices(sizes, indices):
    my_indices = utils.sizes_to_indices(sizes)
    print(my_indices)
    assert all(
        [np.allclose(indices[i], my_indices[i]) for i in range(sizes.size)])
Ejemplo n.º 4
0
def effects2params(x, group_sizes, covs, link_fun, var_link_fun, expand=True):
    """
    {begin_markdown effects2params}
    {spell_markdown params covs}

    # `curvefit.core.effects2params`
    ## Map Vector of Fixed and Random Effects to Parameter Matrix

    Extracts fixed and random effects and converts them to parameters.
    Needs to use [`unzip_x`](unzip_x.md).

    ## Syntax
    ```python
    params = curvefit.core.effects2params.effects2params(
        x, group_sizes, covs, link_fun, var_link_fun, expand=True
    )
    ```

    ## Arguments

    - `x (np.array)`:
        This is a one dimensional numpy array contain a value for the fixed effects
        followed by the random effects. The random effects are divided into
        sub-vectors with length equal to the number of fixed effects.
        The i-th sub-vector corresponds to the i-th group of observations.
    - `group_sizes (array-like)`: A vector of positive integers.
        The first `group_sizes[0]` observations correspond to the first group,
        the next `group_sizes[1]` corresponds to the section group, and so on.
        The total number of observations is the sum of the group sizes.
    - `covs (List[np.ndarray])`: Is a `list` with length equal to the number
        of parameters and `covs[k]` is a two dimensional numpy array with the following contents:
        -- `covs[k].shape[0]` is the number of observations
        -- `covs[k].shape[1]` is the number of fixed effects corresponding to the
        k-th parameter.
        -- `covs[k][i, ell]` is the covariate value corresponding to the
        i-th observation and ell-th covariate for the k-th parameter.
    - `link_fun` (List[Callable])`: The value `len(link_fun)` is equal to the
        number of parameters and `link_fun[k]` is a function with one
         numpy array argument and result that acts element by element and transforms the k-th parameter.
    - `var_link_fun` (List[Callable])`:
        The value `len(var_link_fun)` is equal to the number of fixed effects and
        `link_fun[j]` is a function with one numpy array argument and result that
        that acts element by element and transforms the j-th fixed effect.
        The first `len(covs[0])` fixed effects correspond to the first parameter,
        the next `len(covs[1])` fixed effects correspond to the second parameter
        and so on.
    - `expand (bool)`:  If *expand* is `True` (`False`), create
        parameters for each observation (for each group of observations).

    ## Returns

    - `params (array-like)`:
        Let \( f_j \) be the vector of fixed effects and
        \( r_{i,j} \) the matrix of random effects corresponding to *x*.
        We define the matrix, with row dimension equal the number of groups
        and column dimension equal the number of fixed effects
        \[
            v_{i,j} = V_j \left( f_j + r_{i,j} \right)
        \]
        where \( V_j \) is the function `var_link_fun[i]`.
        If *expand* is true (false) \( i \) indexes observations (groups).
        (If *expand* is true the random effect for a group gets repeated
        for all the observations in the group.)
        The return value `params` is a two dimensional numpy array with
        `params.shape[0]` equal to the number of parameters and
        `params.shape[1]` equal to the number of observations, if *expand* is true,
        number of groups, if *expand* is false.
        The value `params[k][i]` is
        \[
            P_k \left( \sum_{j(k)} v_j c_{i,j} \right)
        \]
        where \( P_k \) is the function `link_fun[k]`,
        \( j(k) \) is the set of fixed effects indices
        corresponding to the k-th parameter,
        \( c_{i,j} \) is the covariate value corresponding to the
        j-th fixed effect and the i-th observation, if *expand* is true,
        or i-th group, if *expand* is false.

    ## Example
    See [effects2params_xam](effects2params_xam.md).

    {end_markdown effects2params}
    """
    num_obs = numpy.sum(group_sizes)
    num_groups = len(group_sizes)
    num_params = len(covs)
    group_idx = numpy.cumsum(group_sizes) - 1
    fe_sizes = numpy.array([covs[k].shape[1] for k in range(num_params)])
    num_fe = fe_sizes.sum()
    fe_idx = sizes_to_indices(fe_sizes)

    # asserts
    for k in range(num_params):
        assert covs[k].shape[0] == num_obs
    assert len(link_fun) == num_params

    # unpack fe and re from x
    fe, re = unzip_x(x, num_groups, num_fe)
    if expand:
        # expand random effects
        re = numpy.repeat(re, group_sizes, axis=0)
    else:
        # subsample covariates
        covs = [covs[k][group_idx, :] for k in range(num_params)]

    # var  = var_link_fun( fe + re )
    var = fe + re
    for j in range(num_fe):
        var[:, j] = var_link_fun[j](var[:, j])

    # params[k][i] = link_fun[k] ( sum_{j(k)} covs[j, i] * var[i, j] )
    shape = (num_params, num_obs) if expand else (num_params, num_groups)
    params = numpy.empty(shape, dtype=type(x[0]))
    for k in range(num_params):
        # covariate times variable for i-th parameter
        prod = covs[k] * var[:, fe_idx[k]]
        # sum of produces for i-th parameter
        params[k] = numpy.sum(prod, axis=1)
        # transform the sum
        params[k] = link_fun[k](params[k])

    return params