def sample_from_covariance_matrix(
    model,
    modelfit_results=None,
    parameters=None,
    force_posdef_samples=None,
    force_posdef_covmatrix=False,
    n=1,
    seed=None,
):
    """Sample parameter vectors using the covariance matrix

    If modelfit_results is not provided the results from the model will be used

    Parameters
    ----------
    parameters
        Use to only sample a subset of the parameters. None means all
    force_posdef_samples
        Set to how many iterations to do before forcing all samples to be positive definite. None is
        default and means never and 0 means always

    Returns
    -------
    A dataframe with one sample per row
    """
    if modelfit_results is None:
        modelfit_results = model.modelfit_results

    if parameters is None:
        parameters = list(modelfit_results.parameter_estimates.index)

    if seed is None or isinstance(seed, int):
        seed = np.random.default_rng(seed)

    pe = modelfit_results.parameter_estimates[parameters]
    index = pe.index
    mu = pe.to_numpy()
    sigma = modelfit_results.covariance_matrix[parameters].loc[
        parameters].to_numpy()
    if not is_posdef(sigma):
        if force_posdef_covmatrix:
            old_sigma = sigma
            sigma = nearest_posdef(sigma)
            delta_frobenius = np.linalg.norm(old_sigma) - np.linalg.norm(sigma)
            delta_max = np.abs(old_sigma).max() - np.abs(sigma).max()
            warnings.warn(
                f'Covariance matrix was forced to become positive definite.\n'
                f'    Difference in the frobenius norm: {delta_frobenius:.3e}\n'
                f'    Difference in the max norm: {delta_max:.3e}\n')
        else:
            raise ValueError(
                "Uncertainty covariance matrix not positive-definite")

    fn = partial(sample_truncated_joint_normal, mu, sigma, seed=seed)
    samples = sample_from_function(model,
                                   fn,
                                   parameters=index,
                                   force_posdef_samples=force_posdef_samples,
                                   n=n)
    return samples
Exemple #2
0
def calculate_results_using_bipp(frem_model,
                                 continuous,
                                 categorical,
                                 rescale=True,
                                 samples=2000,
                                 seed=None):
    """Estimate a covariance matrix for the frem model using the BIPP method

    Bootstrap on the individual parameter posteriors
    Only the individual estimates, individual unvertainties and the parameter estimates
    are needed.

    """
    if seed is None or isinstance(seed, int):
        seed = np.random.default_rng(seed)
    rvs, dist = frem_model.random_variables.iiv.distributions()[-1]
    etas = [rv.name for rv in rvs]
    pool = sample_individual_estimates(frem_model, parameters=etas, seed=seed)
    ninds = len(pool.index.unique())
    ishr = frem_model.modelfit_results.individual_shrinkage
    lower_indices = np.tril_indices(len(etas))
    pop_params = np.array(dist.sigma).astype(str)[lower_indices]
    parameter_samples = np.empty((samples, len(pop_params)))
    remaining_samples = samples
    k = 0
    while k < remaining_samples:
        bootstrap = pool.sample(n=ninds,
                                replace=True,
                                random_state=seed.bit_generator)
        ishk = ishr.loc[bootstrap.index]
        cf = (1 / (1 - ishk.mean()))**(1 / 2)
        corrected_bootstrap = bootstrap * cf
        bootstrap_cov = corrected_bootstrap.cov()
        if not is_posdef(bootstrap_cov.to_numpy()):
            continue
        parameter_samples[k, :] = bootstrap_cov.values[lower_indices]
        k += 1
    frame = pd.DataFrame(parameter_samples, columns=pop_params)
    res = calculate_results_from_samples(frem_model,
                                         continuous,
                                         categorical,
                                         frame,
                                         rescale=rescale)
    return res
Exemple #3
0
def sample_from_covariance_matrix(model,
                                  modelfit_results=None,
                                  parameters=None,
                                  force_posdef_samples=None,
                                  force_posdef_covmatrix=False,
                                  n=1):
    """Sample parameter vectors using the covariance matrix

       if modelfit_results is not provided the results from the model will be used

       :param parameters: use to only sample a subset of the parameters. None means all
       :param force_posdef_samples: Set to how many iterations to do before forcing all
                                    samples to be positive definite. None is default and means
                                    never and 0 means always
       :return: a dataframe with one sample per row
    """
    if modelfit_results is None:
        modelfit_results = model.modelfit_results

    if parameters is None:
        parameters = list(modelfit_results.parameter_estimates.index)

    pe = modelfit_results.parameter_estimates[parameters]
    index = pe.index
    mu = pe.to_numpy()
    sigma = modelfit_results.covariance_matrix[parameters].loc[
        parameters].to_numpy()
    if not is_posdef(sigma):
        if force_posdef_covmatrix:
            sigma = nearest_posdef(sigma)
        else:
            raise ValueError(
                "Uncertainty covariance matrix not positive-definite")
    parameter_summary = model.parameters.summary().loc[parameters]
    parameter_summary = parameter_summary[~parameter_summary['fix']]
    a = parameter_summary.lower.astype('float64').to_numpy()
    b = parameter_summary.upper.astype('float64').to_numpy()

    # reject non-posdef
    kept_samples = pd.DataFrame()
    remaining = n

    if force_posdef_samples == 0:
        force_posdef = True
    else:
        force_posdef = False

    i = 0
    while remaining > 0:
        samples = sample_truncated_joint_normal(mu, sigma, a, b, n=remaining)
        df = pd.DataFrame(samples, columns=index)
        if not force_posdef:
            selected = df[df.apply(model.random_variables.validate_parameters,
                                   axis=1,
                                   use_cache=True)]
        else:
            selected = df.transform(
                model.random_variables.nearest_valid_parameters, axis=1)
        kept_samples = pd.concat((kept_samples, selected))
        remaining = n - len(kept_samples)
        i += 1
        if not force_posdef and force_posdef_samples is not None and i >= force_posdef_samples:
            force_posdef = True

    return kept_samples.reset_index(drop=True)