예제 #1
0
 def __init__(self,
              param_set_processed: ParameterSet = None,
              eta_prior: Prior = None):
     if eta_prior is not None:
         self.eta_prior = eta_prior
     else:
         self.eta_prior = Prior(lower_bound=[0.0], upper_bound=[np.inf])
     super().__init__(param_set_processed)
예제 #2
0
 def __post_init__(self):
     if self.knots_type not in ['frequency', 'domain']:
         raise VariableError(
             f"Unknown knots_type for Spline {self.knots_type}.")
     self.spline = None
     self.add_re = False
     self.fe_prior = None
     Variable.__post_init__(self)
     if self.fe_prior is None:
         self.set_fe_prior(
             Prior(lower_bound=[-np.inf] * self._count_num_fe(),
                   upper_bound=[np.inf] * self._count_num_fe()))
예제 #3
0
def variable():
    return Variable(
        covariate='cov1',
        var_link_fun=lambda x: x,
        fe_prior=GaussianPrior(lower_bound=[-2.0], upper_bound=[3.0]),
        add_re=True,
        col_group='group',
        re_var_prior=GaussianPrior(lower_bound=[-1.0],
                                   upper_bound=[1.0],
                                   mean=[1.0],
                                   std=[2.0]),
        re_prior=Prior(lower_bound=[-10.0], upper_bound=[15.0]),
    )
예제 #4
0
class ParameterFunction:
    """A class for function on parameters.

    Parameters
    ----------
    param_function_name: str
        name of the parameter function
    param_function: callable
        parameter function
    param_function_fe_prior: List[float]
        a list of two floats specifying mean and std for Gaussian prior on the function.
    """

    param_function_name: str
    param_function: Callable
    param_function_fe_prior: Prior = Prior()

    def __post_init__(self):
        assert isinstance(self.param_function_name, str)
예제 #5
0
def test_prior():
    prior = Prior()
    assert prior.lower_bound == [-np.inf]
    assert prior.upper_bound == [np.inf]
예제 #6
0
def test_priors(lower, upper):
    prior = Prior(lower_bound=[lower], upper_bound=[upper])
    assert prior.lower_bound == [lower]
    assert prior.upper_bound == [upper]
예제 #7
0
 def test_set_fe_prior(self, spline_variable):
     with pytest.raises(ValueError):
         spline_variable.set_fe_prior(Prior())
예제 #8
0
class Variable:
    """A class that stores information about a variable.

    Parameters
    ----------
    covariate: str
        name of the covariate for this variable. 
    var_link_fun: callable
        link function for this variable. 
    fe_prior: Prior, optional
        a prior of class :class:`~anml.parameter.prior.Prior`
        for fixed effects coefficient 
    add_re: bool, optional
        whether to add random effects to this variable
    col_group: str, optional
        name for group column
    re_var_prior: Prior, optional
        a prior of class :class:`~anml.parameter.prior.Prior` 
        for random effect variance
    re_prior: Prior, optional
        a prior of class :class:`~anml.parameter.prior.Prior` 
        for random effects.
    
    """
    covariate: str = None
    var_link_fun: Callable = lambda x: x

    fe_prior: Prior = Prior()

    add_re: bool = False
    col_group: str = None
    re_var_prior: Prior = Prior()
    re_prior: Prior = Prior(lower_bound=[0.0])

    def __post_init__(self):
        if self.covariate is not None and self.covariate in PROTECTED_NAMES:
            raise VariableError("Choose a different covariate name that is"
                                f"not in {PROTECTED_NAMES}.")

        if self.add_re and self.col_group is None:
            raise ValueError(
                'When add_re is True, a group column must be provided.')

        self.num_fe = self._count_num_fe()
        if self.add_re:
            self.num_re_var = self.num_fe
        else:
            self.num_re_var = 0

        if self.fe_prior and self.fe_prior.x_dim != self.num_fe:
            raise ValueError(
                f'Dimension of fe_prior = {self.fe_prior.x_dim} should match num_fe = {self.num_fe}.'
            )
        if self.add_re and self.re_var_prior and self.re_var_prior.x_dim != self.num_re_var:
            raise ValueError(
                f'Dimension of re_var_prior = {self.re_var_prior.x_dim} should match num_re_var = {self.num_re_var}.'
            )

        self.reset()

    def reset(self):
        # erase everything related to input df
        # (i.e. not intrinsic to variable)
        self.group_lookup = None
        self.n_groups = None
        self.num_re = 0

    def _check_protected_names(self):
        if self.covariate in PROTECTED_NAMES:
            raise VariableError("Choose a different covariate name that is"
                                f"not in {PROTECTED_NAMES}.")

    def _count_num_fe(self):
        return 1

    def _validate_df(self, df: pd.DataFrame):
        if self.covariate is None:
            raise VariableError("No covariate has been set.")
        if self.covariate not in df.columns:
            raise VariableError(
                f"Covariate {self.covariate} is missing from the data frame.")
        if self.add_re and self.col_group not in df:
            raise VariableError(
                f"Group {self.col_group} is missing from the data frame.")

    def encode_groups(self, df: pd.DataFrame):
        """Convert a categorical column into ordinal numbers.

        Parameters
        ----------
        df : pd.DataFrame
            input dataframe

        Returns
        -------
        List[int]
            a list of ints indicating category of each datapoint.

        Raises
        ------
        ValueError
            Only one group in the entire input dataframe.
        """
        group_assign_cat = df[self.col_group].to_numpy()
        self.group_lookup = encode_groups(group_assign_cat)
        self.n_groups = len(self.group_lookup)
        if self.n_groups < 2:
            raise ValueError(f'Only one group in {self.col_group}.')
        self.num_re = self.n_groups * self.num_fe
        return [self.group_lookup[g] for g in group_assign_cat]

    def _design_matrix(self, df: pd.DataFrame) -> np.ndarray:
        """Returns the design matrix based on a covariate x.

        Parameters
        ----------
        df
            pandas DataFrame of covariate values (one dimensional)

        Returns
        -------
        2-dimensional reshaped version of :python:`x`

        """
        x = df[self.covariate].values
        return np.asarray(x).reshape((len(x), 1))

    def build_design_matrix_fe(self, df: pd.DataFrame):
        """Build design matrix corresponding to fixed effects.

        Parameters
        ----------
        df : pd.DataFrame
            input dataframe
        """
        self._validate_df(df)
        self.design_matrix_fe = self._design_matrix(df)

    def build_design_matrix_re(self, df: pd.DataFrame):
        """Build design matrix corresponding to random effects covariances.

        Parameters
        ----------
        df : pd.DataFrame
            input dataframe
        """
        assert self.add_re, 'No random effects for this variable.'
        if self.design_matrix_fe is None:
            self.build_design_matrix_fe(df)
        group_assign = self.encode_groups(df)
        self.design_matrix_re = build_re_matrix(self.design_matrix_fe,
                                                group_assign, self.n_groups)

    def build_bounds_fe(self):
        """Build bounds for fixed effects
        """
        self.lb_fe = self.fe_prior.lower_bound
        self.ub_fe = self.fe_prior.upper_bound

    def build_constraint_matrix_fe(self):
        """Build constraint matrix for fixed effects
        """
        # if using None or [], need to have extra control flow or dimension matching when combining variables
        self.constr_matrix_fe = np.zeros((1, self.num_fe))
        self.constr_lb_fe = [0.0]
        self.constr_ub_fe = [0.0]

    def build_bounds_re_var(self):
        """Build bounds for random effects covariance.
        """
        assert self.add_re, 'No random effects for this variable'
        self.lb_re_var = np.maximum(0.0, self.re_var_prior.lower_bound)
        self.ub_re_var = self.re_var_prior.upper_bound

    def build_constraint_matrix_re_var(self):
        """Build constraint matrix for random effects covariance.
        """
        assert self.add_re, 'No random effects for this variable'
        self.constr_matrix_re_var = np.zeros((1, self.num_re_var))
        self.constr_lb_re_var = [0.0]
        self.constr_ub_re_var = [0.0]

    def build_bounds_re(self):
        """Build bounds for random effects.
        """
        assert self.add_re and self.num_re > 0, 'No random effects for this variable or grouping is not defined yet.'
        self.lb_re = self.re_prior.lower_bound * self.num_re
        self.ub_re = self.re_prior.upper_bound * self.num_re

    def build_constraint_matrix_re(self):
        """Build constraint matrix for random effects
        """
        assert self.add_re and self.num_re > 0, 'No random effects for this variable or grouping is not defined yet.'
        self.constr_matrix_re = np.zeros((1, self.num_re))
        self.constr_lb_re = [0.0]
        self.constr_ub_re = [0.0]