Beispiel #1
0
def sample_individual_estimates(model,
                                parameters=None,
                                samples_per_id=100,
                                seed=None):
    """Sample individual estimates given their covariance.

    Parameters
    ----------
    parameters
        A list of a subset of parameters to sample. Default is None, which means all.

    Returns
    -------
    Pool of samples in a DataFrame
    """
    if seed is None or isinstance(seed, int):
        seed = np.random.default_rng(seed)
    ests = model.modelfit_results.individual_estimates
    covs = model.modelfit_results.individual_estimates_covariance
    if parameters is None:
        parameters = ests.columns
    ests = ests[parameters]
    samples = pd.DataFrame()
    for (idx, mu), sigma in zip(ests.iterrows(), covs):
        sigma = sigma[parameters].loc[parameters]
        sigma = nearest_posdef(sigma)
        id_samples = seed.multivariate_normal(mu.values,
                                              sigma.values,
                                              size=samples_per_id)
        id_df = pd.DataFrame(id_samples, columns=ests.columns)
        id_df.index = [idx] * len(id_df)  # ID as index
        samples = pd.concat((samples, id_df))
    return samples
Beispiel #2
0
def sample_from_covariance_matrix(
    model,
    modelfit_results=None,
    parameters=None,
    force_posdef_samples=None,
    force_posdef_covmatrix=False,
    n=1,
    seed=None,
):
    """Sample parameter vectors using the covariance matrix

    If modelfit_results is not provided the results from the model will be used

    Parameters
    ----------
    parameters
        Use to only sample a subset of the parameters. None means all
    force_posdef_samples
        Set to how many iterations to do before forcing all samples to be positive definite. None is
        default and means never and 0 means always

    Returns
    -------
    A dataframe with one sample per row
    """
    if modelfit_results is None:
        modelfit_results = model.modelfit_results

    if parameters is None:
        parameters = list(modelfit_results.parameter_estimates.index)

    if seed is None or isinstance(seed, int):
        seed = np.random.default_rng(seed)

    pe = modelfit_results.parameter_estimates[parameters]
    index = pe.index
    mu = pe.to_numpy()
    sigma = modelfit_results.covariance_matrix[parameters].loc[
        parameters].to_numpy()
    if not is_posdef(sigma):
        if force_posdef_covmatrix:
            old_sigma = sigma
            sigma = nearest_posdef(sigma)
            delta_frobenius = np.linalg.norm(old_sigma) - np.linalg.norm(sigma)
            delta_max = np.abs(old_sigma).max() - np.abs(sigma).max()
            warnings.warn(
                f'Covariance matrix was forced to become positive definite.\n'
                f'    Difference in the frobenius norm: {delta_frobenius:.3e}\n'
                f'    Difference in the max norm: {delta_max:.3e}\n')
        else:
            raise ValueError(
                "Uncertainty covariance matrix not positive-definite")

    fn = partial(sample_truncated_joint_normal, mu, sigma, seed=seed)
    samples = sample_from_function(model,
                                   fn,
                                   parameters=index,
                                   force_posdef_samples=force_posdef_samples,
                                   n=n)
    return samples
Beispiel #3
0
def _choose_param_init(model, rvs, params):
    res = model.modelfit_results
    rvs_names = [rv.name for rv in rvs]

    etas = []
    for i in range(len(rvs)):
        elem = rvs.covariance_matrix.row(i).col(i)[0]
        if str(elem) in [p.name for p in params]:
            etas.append(rvs_names[i])

    sd = np.array([np.sqrt(params[0].init), np.sqrt(params[1].init)])
    init_default = round(0.1 * sd[0] * sd[1], 7)

    if res is not None:
        try:
            ie = res.individual_estimates
            if not all(eta in ie.columns for eta in etas):
                return init_default
        except KeyError:
            return init_default
        eta_corr = ie[etas].corr()
        cov = math.corr2cov(eta_corr.to_numpy(), sd)
        cov[cov == 0] = 0.0001
        cov = math.nearest_posdef(cov)
        return round(cov[1][0], 7)
    else:
        return init_default
Beispiel #4
0
def sample_from_covariance_matrix(model,
                                  modelfit_results=None,
                                  parameters=None,
                                  force_posdef_samples=None,
                                  force_posdef_covmatrix=False,
                                  n=1):
    """Sample parameter vectors using the covariance matrix

       if modelfit_results is not provided the results from the model will be used

       :param parameters: use to only sample a subset of the parameters. None means all
       :param force_posdef_samples: Set to how many iterations to do before forcing all
                                    samples to be positive definite. None is default and means
                                    never and 0 means always
       :return: a dataframe with one sample per row
    """
    if modelfit_results is None:
        modelfit_results = model.modelfit_results

    if parameters is None:
        parameters = list(modelfit_results.parameter_estimates.index)

    pe = modelfit_results.parameter_estimates[parameters]
    index = pe.index
    mu = pe.to_numpy()
    sigma = modelfit_results.covariance_matrix[parameters].loc[
        parameters].to_numpy()
    if not is_posdef(sigma):
        if force_posdef_covmatrix:
            sigma = nearest_posdef(sigma)
        else:
            raise ValueError(
                "Uncertainty covariance matrix not positive-definite")
    parameter_summary = model.parameters.summary().loc[parameters]
    parameter_summary = parameter_summary[~parameter_summary['fix']]
    a = parameter_summary.lower.astype('float64').to_numpy()
    b = parameter_summary.upper.astype('float64').to_numpy()

    # reject non-posdef
    kept_samples = pd.DataFrame()
    remaining = n

    if force_posdef_samples == 0:
        force_posdef = True
    else:
        force_posdef = False

    i = 0
    while remaining > 0:
        samples = sample_truncated_joint_normal(mu, sigma, a, b, n=remaining)
        df = pd.DataFrame(samples, columns=index)
        if not force_posdef:
            selected = df[df.apply(model.random_variables.validate_parameters,
                                   axis=1,
                                   use_cache=True)]
        else:
            selected = df.transform(
                model.random_variables.nearest_valid_parameters, axis=1)
        kept_samples = pd.concat((kept_samples, selected))
        remaining = n - len(kept_samples)
        i += 1
        if not force_posdef and force_posdef_samples is not None and i >= force_posdef_samples:
            force_posdef = True

    return kept_samples.reset_index(drop=True)