Beispiel #1
0
def test_loss_fun(test_data, param_names, fun, link_fun, var_link_fun,
                  loss_fun):
    model = CurveModel(test_data,
                       't',
                       'obs', [['intercept']] * 3,
                       'group',
                       param_names,
                       link_fun,
                       var_link_fun,
                       fun,
                       loss_fun=loss_fun)

    x = np.hstack((np.ones(3), np.zeros(3)))
    params = effects2params(x,
                            model.order_group_sizes,
                            model.covs,
                            model.link_fun,
                            model.var_link_fun,
                            expand=False)
    params = params[:, 0]

    residual = (model.obs - fun(model.t, params)) / model.obs_se

    val = model.objective(x)
    my_val = loss_fun(residual)
    assert np.abs(val - my_val) < 1e-10
Beispiel #2
0
def test_compute_rmse(test_data, param_names, fun, link_fun, var_link_fun,
                      loss_fun):
    model = CurveModel(test_data,
                       't',
                       'obs', [['intercept']] * 3,
                       'group',
                       param_names,
                       link_fun,
                       var_link_fun,
                       fun,
                       loss_fun=loss_fun)

    x = np.hstack((np.ones(3), np.zeros(3)))
    params = effects2params(
        x,
        model.order_group_sizes,
        model.covs,
        model.link_fun,
        model.var_link_fun,
    )
    residual = model.obs - model.fun(model.t, params)

    result = model.compute_rmse(x=x, use_obs_se=False)

    assert np.abs(result - np.sqrt(np.mean(residual**2))) < 1e-10
Beispiel #3
0
 def get_params(self, x, expand=False):
     return effects2params(x,
                           self.data_inputs.group_sizes,
                           self.data_inputs.covariates_matrices,
                           self.param_set.link_fun,
                           self.data_inputs.var_link_fun,
                           expand=expand)
    def compute_rmse(self, x=None, use_obs_se=True):
        """Compute the Root Mean Squre Error.

        Args:
            x (numpy.ndarray | None, optional):
                Provided solution array, if None use the object solution.
            use_obs_se (bool, optional):
                If True include the observation standard deviation into the
                calculation.

        Returns:
            float: root mean square error.
        """
        if x is None:
            assert self.result is not None
            x = self.result.x

        params = effects2params(x, self.order_group_sizes, self.covs,
                                self.link_fun, self.var_link_fun)
        residual = self.obs - self.fun(self.t, params)

        if use_obs_se:
            return np.sqrt(
                np.sum(residual**2 / self.obs_se**2) /
                np.sum(1.0 / self.obs_se**2))
        else:
            return np.sqrt(np.mean(residual**2))
Beispiel #5
0
 def fit(self, data, x_init=None, options=None):
     if self.assert_solver_defined() is True:
         self.solver.fit(data, x_init, options)
         model = self.get_model_instance()
         self.input_curve_fun = model.curve_fun
         params = effects2params(
             self.solver.x_opt,
             model.data_inputs.group_sizes,
             model.data_inputs.covariates_matrices,
             model.param_set.link_fun,
             model.data_inputs.var_link_fun,
             expand=False,
         )
         self.gm_model.set_params(params[:, 0])
         gm_solver = ScipyOpt(self.gm_model)
         data_inputs_gm = DataInputs(
             t=model.data_inputs.t,
             obs=model.data_inputs.obs,
             obs_se=model.data_inputs.obs_se,
         )
         obs_gau_pdf = data_translator(data_inputs_gm.obs, model.curve_fun,
                                       gaussian_pdf)
         data_inputs_gm.obs = obs_gau_pdf
         gm_solver.fit(data_inputs_gm)
         self.x_opt = gm_solver.x_opt
         self.fun_val_opt = gm_solver.fun_val_opt
    def fit_params(self,
                   fe_init,
                   re_init=None,
                   fe_bounds=None,
                   re_bounds=None,
                   fe_gprior=None,
                   re_gprior=None,
                   fun_gprior=None,
                   fixed_params=None,
                   smart_initialize=False,
                   fixed_params_initialize=None,
                   options=None,
                   smart_init_options=None):
        """Fit the parameters.

        Args:
            fe_init (numpy.ndarray):
                Initial value for the fixed effects.
            re_init (numpy.ndarray, optional):
                Initial value for the random effects.
            fe_bounds (list of lists, optional):
                Bounds for fixed effects.
            re_bounds (list of lists, optional):
                Bounds for random effects.
            fe_gprior (list of lists, optional):
                Gaussian prior for fixed effects.
            re_gprior (list of lists, optional):
                Gaussian prior for random effects.
            fun_gprior (list of lists, optional):
                Functional Gaussian prior.
            fixed_params (list{str}, optional):
                A list of parameter names that will be fixed at initial value.
            smart_initialize (bool, optional):
                Whether or not to initialize a model's fixed effects based
                on the average fixed effects across many individual models
                fit with the same settings and the random effects
                based on the fixed effects deviation from the average
                in the individual models
            fixed_params_initialize (list{str}, optional):
                A list of parameter names that will be fixed at initial value during the smart initialization.
                Will be ignored if smart_initialize = False and raise warning.
            options (dict, optional):
                Options for the optimizer.
            smart_init_options (dict, optional):
                Options for the inner model
        """
        assert len(fe_init) == self.num_fe
        if fe_bounds is None:
            fe_bounds = [[-np.inf, np.inf]] * self.num_fe
        if re_bounds is None:
            re_bounds = [[-np.inf, np.inf]] * self.num_fe
        assert len(fe_bounds) == self.num_fe
        assert len(re_bounds) == self.num_fe

        if fe_gprior is not None:
            assert len(fe_gprior) == self.num_fe
            self.fe_gprior = np.array(fe_gprior)
        if re_gprior is not None:
            assert len(re_gprior) == self.num_fe
            self.re_gprior = np.array(re_gprior)
        if re_init is None:
            re_init = np.zeros(self.num_re)

        if fun_gprior is not None:
            assert len(fun_gprior) == 2
            assert fun_gprior[1][1] > 0.0

        self.fun_gprior = fun_gprior

        if fixed_params_initialize is not None:
            if not smart_initialize:
                raise Warning(
                    f"You passed in an initialization parameter "
                    f"fixed_params_initialize {fixed_params_initialize} "
                    f"but set smart_initialize=False. Will ignore fixed_params_initialize."
                )

        if smart_init_options is not None:
            if options is None:
                raise RuntimeError(
                    "Need to pass in options if you pass in smart init options."
                )

        if smart_initialize:
            smart_initialize_options = deepcopy(options)
            if smart_init_options is not None:
                smart_initialize_options.update(smart_init_options)
            if self.num_groups == 1:
                raise RuntimeError(
                    "Don't do initialization for models with only one group.")

            fe_dict = get_initial_params(
                groups=self.group_names,
                model=self,
                fit_arg_dict=dict(
                    fe_init=fe_init,
                    fe_bounds=fe_bounds,
                    fe_gprior=fe_gprior,
                    fixed_params=fixed_params_initialize,
                    options=smart_initialize_options,
                ))
            fe_init, re_init = compute_starting_params(fe_dict)
            print(f"Overriding fe_init with {fe_init}.")
            print(f"Overriding re_init with {re_init}.")

        x0 = np.hstack([fe_init, re_init])
        if fe_bounds is None:
            fe_bounds = np.array([[-np.inf, np.inf]] * self.num_fe)
        if re_bounds is None:
            re_bounds = np.array([[-np.inf, np.inf]] * self.num_fe)

        fe_bounds = np.array(fe_bounds)
        re_bounds = np.array(re_bounds)

        if fixed_params is not None:
            for param in fixed_params:
                param_id = self.param_idx[param]
                fe_bounds[param_id] = x0[param_id, None]
                re_bounds[param_id] = 0.0

        re_bounds = np.repeat(re_bounds[None, :, :], self.num_groups, axis=0)
        bounds = np.vstack([fe_bounds, re_bounds.reshape(self.num_re, 2)])

        result = minimize(fun=self.objective,
                          x0=x0,
                          jac=self.gradient,
                          method='L-BFGS-B',
                          bounds=bounds,
                          options=options)

        self.result = result
        self.params = effects2params(self.result.x,
                                     self.order_group_sizes,
                                     self.covs,
                                     self.link_fun,
                                     self.var_link_fun,
                                     expand=False)
Beispiel #7
0
def objective_fun(
    x,
    t,
    obs,
    obs_se,
    covs,
    group_sizes,
    model_fun,
    loss_fun,
    link_fun,
    var_link_fun,
    fe_gprior,
    re_gprior,
    param_gprior,
):
    """
    {begin_markdown objective_fun}
    {spell_markdown covs gprior param params}

    # Curve Fitting Objective Function of Fixed and Random Effects

    ## Syntax
    val = objective_fun(
        x, t, obs, obs_se, covs, group_sizes,
        fun, loss_fun, link_fun, var_link_fun,
        fe_gprior, re_gprior, param_gprior,
    ) :

    ## Notation

    1. *num_obs* = `len(obs)` is the number of observations (measurements)

    2. *num_param* = `len(covs)` is the number of parameters in the model.

    3. *num_fe* = `len(fe_gprior)` is the number of fixed effects.

    4. *num_group~ = `len(group_sizes)` is the number of groups

    5. *params* = `effects2params(x, group_sizes, covs, link_fun, var_link_fun)
    is a *num_param* by *num_obs* matrix containing the parameters
    corresponding to each observation; see [effects2params](effects2params.md).

    6. A vector is either a `list` or a one dimension `numpy.array`.

    ## x
    is a one dimensional numpy array contain a value for the fixed effects
    followed by the random effects. The random effects are divided into
    sub-vectors with length equal to the number of fixed effects.
    The j-th sub-vector corresponds to the j-th group of observations.

    ## t
    is a vector with length *num_obs* containing the value
    of the independent variable corresponding to each observation.

    ## obs
    is a vector with length *num_obs* containing the observations
    (i.e. measurements).

    ## obs_se
    is a vector with length *num_obs* containing the standard deviation
    for the corresponding observation.

    ## covs
    For *k* = 1, ... , *num_param*-1,
    the value `len(covs[k])` is the number of fixed effects
    corresponding to the k-th parameter.
    The vector `covs[k][j]` has length *num_obs* and is the
    j-th covariate vector corresponding to the k-th parameter.

    ## group_sizes
    The observations are divided into groups.
    The first `group_sizes[0]` observations correspond to the first group,
    the next `group_sizes[1]` corresponds to the section group, and so on.
    The the sum of the group sizes is equal to *num_obs*.

    ## model_fun
    This vector valued function vector values maps parameter values,
    [params](effects2params.md) returned by `effects2params`,
    to the model for the corresponding noiseless observations.
    The residual vector has length *num_obs* and is given by
    ```python
        residual = (obs - model_fun(t, params)) / obs_se
    ```

    ## loss_fun
    This scalar value function maps the residual vector to the corresponding
    contribution to the objective function. For example, a Gaussian likelihood
    corresponds to
    ```python
        loss_fun(residual) = 0.5 * sum( residual * residual )
    ```

    ## link_fun, var_link_fun
    are used to compute *params*; see [Notation](#notation)

    ## fe_gprior
    is an *num_fe* by two numpy array. The value `fe_gprior[i][0]`
    is the prior mean for the i-th fixed effect and
    `fe_gprior[i][1]` is its standard deviation.

    ## re_gprior
    is an *num_fe* by *num_groups* by two numpy array, `re_gprior[i,j,0]`
    ( `re_gprior[i,j,1]` ) is the mean (standard deviation) for the
    random effect corresponding to the i-th fixed effect and the j-th group.

    ## param_gprior
    is a list with three elements. The first element is a function
    of the *params* and its result is a numpy array. We use the notation
    ```python
        range_gprior = param_gprior[0](params)
    ```
    The value `param_gprior[1][0]` ( `param_gprior[1][1]` ) is a numpy array
    corresponding to the mean (standard deviation) for *range_gprior*.

    {end_markdown objective_fun}
    """
    num_groups = len(group_sizes)
    num_fe = len(fe_gprior)
    fe, re = effects2params.unzip_x(x, num_groups, num_fe)
    #
    # params
    params = effects2params.effects2params(x, group_sizes, covs, link_fun,
                                           var_link_fun)
    # residual
    residual = (obs - model_fun(t, params)) / obs_se
    #
    # loss
    val = loss_fun(residual)
    # fe_gprior
    val += 0.5 * numpy.sum((fe - fe_gprior.T[0])**2 / fe_gprior.T[1]**2)
    # re_gprior
    val += 0.5 * numpy.sum((re - re_gprior.T[0])**2 / re_gprior.T[1]**2)
    # param_gprior
    if param_gprior is not None:
        params = effects2params.effects2params(x,
                                               group_sizes,
                                               covs,
                                               link_fun,
                                               var_link_fun,
                                               expand=False)
        val += 0.5 * numpy.sum(
            (param_gprior[0](params) - param_gprior[1][0])**2 /
            param_gprior[1][1]**2)
    return val
Beispiel #8
0
def identity_fun(x):
    return x


num_fe = num_param
num_x = (num_group + 1) * num_fe
x = numpy.array(range(num_x), dtype=float) / num_x
group_sizes = numpy.arange(num_group) * 2 + 1
num_obs = sum(group_sizes)
covs = list()
for k in range(num_param):
    covs.append(numpy.ones((num_obs, 1), dtype=float))
link_fun = [numpy.exp, identity_fun, numpy.exp]
var_link_fun = num_param * [identity_fun]
expand = False
param = effects2params(x, group_sizes, covs, link_fun, var_link_fun, expand)
# ----------------------------------------------------------------------
# check result
eps99 = 99.0 * numpy.finfo(float).eps
fe = x[0:num_fe]
re = x[num_fe:].reshape((num_group, num_fe), order='C')
fe_re = fe + re
var = numpy.empty((num_group, num_fe), dtype=float)
for j in range(num_fe):
    var[:, j] = var_link_fun[j](fe_re[:, j])
check = numpy.empty((num_param, num_group), dtype=float)
for k in range(num_param):
    check[k, :] = link_fun[k](var[:, k] * covs[k][0])
#
rel_error = param / check - 1.0
assert ((abs(rel_error) < eps99).all())
def objective_fun(x, t, obs, obs_se, covs, group_sizes, model_fun, loss_fun,
                  link_fun, var_link_fun, fe_gprior, re_gprior, param_gprior,
                  re_zero_sum_std):
    """
    {begin_markdown objective_fun}
    {spell_markdown covs gprior param params obj}

    # `curvefit.core.objective_fun.objective_fun`
    # Curve Fitting Objective Function of Fixed and Random Effects

    ## Syntax
    ```python
    obj_val = curvefit.core.objective_fun.objective_fun(
        x, t, obs, obs_se, covs, group_sizes, model_fun, loss_fun,
        link_fun, var_link_fun, fe_gprior, re_gprior, param_gprior,
        re_zero_sum_std
    )
    ```

    ## Notation

    1. *num_obs* = `len(obs)` is the number of observations (measurements)
    2. *num_param* = `len(covs)` is the number of parameters in the model.
    3. *num_fe* = `fe_gprior.shape[0]` is the number of fixed effects.
    4. *num_group* = `len(group_sizes)` is the number of groups
    5. *params* = `effects2params(x, group_sizes, covs, link_fun, var_link_fun)
    is a *num_param* by *num_obs* numpy array containing the parameters
    corresponding to each observation; see [effects2params](effects2params.md).
    6. A vector is either a `list` or a one dimension `numpy.array`.

    ## Arguments

    - `x (np.array)`: is a one dimension numpy array contain a value for the fixed effects
        followed by the random effects. The random effects are divided into
        sub-vectors with length equal to the number of fixed effects.
        The i-th sub-vector corresponds to the i-th group of observations.
    - `t (np.array)`: is a one dimension numpy array with length *num_obs* containing the value
        of the independent variable corresponding to each observation.
    - `obs (np.array)`: is a one dimension numpy array with length *num_obs* containing the
        observations (i.e. measurements).
    - `obs_se (np.array)`: is a one dimension numpy array with length *num_obs* containing the
        standard deviation for the corresponding observation.
    - `covs (List[np.ndarray])`: is a `list` with length equal to the number of parameters and `covs[k]`
        is a two dimension numpy array with the following contents:
        -- `covs[k].shape[0]` is the number of observations
        -- `covs[k].shape[1]` is the number of fixed effects corresponding to the
            k-th parameter.
        -- `covs[k][i, ell]` is the covariate value corresponding to the
        i-th observation and ell-th covariate for the k-th parameter.
    - `group_sizes (List[int])`: the observations are divided into groups.
        The first `group_sizes[0]` observations correspond to the first group,
        the next `group_sizes[1]` corresponds to the section group, and so on.
        The the sum of the group sizes is equal to *num_obs*.
    - `model_fun (Callable)`: this vector valued function vector values maps parameter values,
        [params](effects2params.md) returned by `effects2params`,
        to the model for the corresponding noiseless observations.
        The observation residual vector has length *num_obs* and is given by
        ```python
            obs_res = (obs - model_fun(t, params)) / obs_se
        ```
    - `loss_fun (Callable)`: this scalar value function maps the observation residual vector to the
        corresponding contribution to the objective function.
        For example, if *loss_fun* corresponds to a Gaussian likelihood,
        it is equal to
        ```python
            gaussian_loss(obs_res) = 0.5 * sum( obs_res * obs_res )
        ```
    - `link_fun (List[callable])`: the parameter link functions, see [Parameter](Parameter.md)
    - `var_fun (List[callable])`: the variable link functions, see [Variable](Variable.md)
    - `fe_gprior (np.array)`: is an *num_fe* by two numpy array. The value `fe_gprior[j][0]`
        is the prior mean for the j-th fixed effect and
        `fe_gprior[j][1]` is its standard deviation.
        If `fe` is the fixed effect sub-vector of `x`, the prior residual
        for the fixed effects is
        ```python
            fe_res = ( fe.T - fe_gprior[:,0] ) / fe_gprior[:,1]
        ```
        where `fe.T` denotes the transpose of `fe`.
    - `re_gprior (np.array)`: is an *num_fe* by *num_groups* by by two numpy array, `re_gprior[j,i,0]`
        ( `re_gprior[j,i,1]` ) is the mean (standard deviation) for the
        random effect corresponding to the j-th fixed effect and the i-th group.
        If `re` is the matrix of random effect corresponding to`x`,
        the prior residual for the random effects is
        ```python
            re_res = ( re.T - re_gprior[:,:,0] ) / re_gprior[:,:,1]
        ```
    - `param_gprior (Tuple[Callable, Tuple[int, int]])`: is a list with two elements.
        The first element is a function
        of the *params* and its result is a numpy array. We use the notation
        ```python
            range_gprior = param_gprior[0](params)
        ```
        There is a subtlety here, column dimension of the *params* above
        is *num_groups* (not *num_obs).
        The value `param_gprior[1][0]` ( `param_gprior[1][1]` ) is a numpy array
        corresponding to the mean (standard deviation) for *range_gprior*.
        The prior residual for the parameters is
        ```python
            param_res = (range_gprior - param_gprior[1][[0]]) / param_gprior[1][1]
        ```
    -   `re_zero_sum_std (np.array)`: is a vector with length *num_fe*,
        `re_zero_sum_std[j]` is the standard deviation for the sum of the
        random effect corresponding to the j-th fixed effect. Note that a
        standard deviation of `np.inf` corresponds
        to no prior on the sum of the corresponding random effects.

    ## Returns
    ### `obj_val`
    The return *val* is a `float` equal to the objective function
    ```python
        obj_val = loss_fun(obs_res) + gaussian_loss(fe_res)
            + gaussian_loss(re_res) + gaussian_loss(param_res)
    ```

    ## Example
    [objective_fun_xam](objective_fun_xam.md)

    {end_markdown objective_fun}
    """
    num_groups = len(group_sizes)
    num_fe = len(fe_gprior)
    fe, re = unzip_x(x, num_groups, num_fe)

    # params
    params = effects2params(x,
                            group_sizes,
                            covs,
                            link_fun,
                            var_link_fun,
                            expand=True)

    # residual
    residual = (obs - model_fun(t, params)) / obs_se

    # loss
    obj_val = loss_fun(residual)
    #
    # fe_gprior
    obj_val += 0.5 * numpy.sum((fe - fe_gprior.T[0])**2 / fe_gprior.T[1]**2)

    # re_gprior
    obj_val += 0.5 * numpy.sum((re - re_gprior.T[0])**2 / re_gprior.T[1]**2)

    # zero_sum_std
    for j in range(num_fe):
        res_j = numpy.sum(re[:, j]) / re_zero_sum_std[j]
        obj_val += 0.5 * res_j * res_j

    # parameter prior (param_prior == None is not in documentation ?)
    if param_gprior is not None:
        params = effects2params(x,
                                group_sizes,
                                covs,
                                link_fun,
                                var_link_fun,
                                expand=False)
        obj_val += 0.5 * numpy.sum(
            (param_gprior[0](params) - param_gprior[1][0])**2 /
            param_gprior[1][1]**2)
    return obj_val