def sample_from_covariance_matrix( model, modelfit_results=None, parameters=None, force_posdef_samples=None, force_posdef_covmatrix=False, n=1, seed=None, ): """Sample parameter vectors using the covariance matrix If modelfit_results is not provided the results from the model will be used Parameters ---------- parameters Use to only sample a subset of the parameters. None means all force_posdef_samples Set to how many iterations to do before forcing all samples to be positive definite. None is default and means never and 0 means always Returns ------- A dataframe with one sample per row """ if modelfit_results is None: modelfit_results = model.modelfit_results if parameters is None: parameters = list(modelfit_results.parameter_estimates.index) if seed is None or isinstance(seed, int): seed = np.random.default_rng(seed) pe = modelfit_results.parameter_estimates[parameters] index = pe.index mu = pe.to_numpy() sigma = modelfit_results.covariance_matrix[parameters].loc[ parameters].to_numpy() if not is_posdef(sigma): if force_posdef_covmatrix: old_sigma = sigma sigma = nearest_posdef(sigma) delta_frobenius = np.linalg.norm(old_sigma) - np.linalg.norm(sigma) delta_max = np.abs(old_sigma).max() - np.abs(sigma).max() warnings.warn( f'Covariance matrix was forced to become positive definite.\n' f' Difference in the frobenius norm: {delta_frobenius:.3e}\n' f' Difference in the max norm: {delta_max:.3e}\n') else: raise ValueError( "Uncertainty covariance matrix not positive-definite") fn = partial(sample_truncated_joint_normal, mu, sigma, seed=seed) samples = sample_from_function(model, fn, parameters=index, force_posdef_samples=force_posdef_samples, n=n) return samples
def calculate_results_using_bipp(frem_model, continuous, categorical, rescale=True, samples=2000, seed=None): """Estimate a covariance matrix for the frem model using the BIPP method Bootstrap on the individual parameter posteriors Only the individual estimates, individual unvertainties and the parameter estimates are needed. """ if seed is None or isinstance(seed, int): seed = np.random.default_rng(seed) rvs, dist = frem_model.random_variables.iiv.distributions()[-1] etas = [rv.name for rv in rvs] pool = sample_individual_estimates(frem_model, parameters=etas, seed=seed) ninds = len(pool.index.unique()) ishr = frem_model.modelfit_results.individual_shrinkage lower_indices = np.tril_indices(len(etas)) pop_params = np.array(dist.sigma).astype(str)[lower_indices] parameter_samples = np.empty((samples, len(pop_params))) remaining_samples = samples k = 0 while k < remaining_samples: bootstrap = pool.sample(n=ninds, replace=True, random_state=seed.bit_generator) ishk = ishr.loc[bootstrap.index] cf = (1 / (1 - ishk.mean()))**(1 / 2) corrected_bootstrap = bootstrap * cf bootstrap_cov = corrected_bootstrap.cov() if not is_posdef(bootstrap_cov.to_numpy()): continue parameter_samples[k, :] = bootstrap_cov.values[lower_indices] k += 1 frame = pd.DataFrame(parameter_samples, columns=pop_params) res = calculate_results_from_samples(frem_model, continuous, categorical, frame, rescale=rescale) return res
def sample_from_covariance_matrix(model, modelfit_results=None, parameters=None, force_posdef_samples=None, force_posdef_covmatrix=False, n=1): """Sample parameter vectors using the covariance matrix if modelfit_results is not provided the results from the model will be used :param parameters: use to only sample a subset of the parameters. None means all :param force_posdef_samples: Set to how many iterations to do before forcing all samples to be positive definite. None is default and means never and 0 means always :return: a dataframe with one sample per row """ if modelfit_results is None: modelfit_results = model.modelfit_results if parameters is None: parameters = list(modelfit_results.parameter_estimates.index) pe = modelfit_results.parameter_estimates[parameters] index = pe.index mu = pe.to_numpy() sigma = modelfit_results.covariance_matrix[parameters].loc[ parameters].to_numpy() if not is_posdef(sigma): if force_posdef_covmatrix: sigma = nearest_posdef(sigma) else: raise ValueError( "Uncertainty covariance matrix not positive-definite") parameter_summary = model.parameters.summary().loc[parameters] parameter_summary = parameter_summary[~parameter_summary['fix']] a = parameter_summary.lower.astype('float64').to_numpy() b = parameter_summary.upper.astype('float64').to_numpy() # reject non-posdef kept_samples = pd.DataFrame() remaining = n if force_posdef_samples == 0: force_posdef = True else: force_posdef = False i = 0 while remaining > 0: samples = sample_truncated_joint_normal(mu, sigma, a, b, n=remaining) df = pd.DataFrame(samples, columns=index) if not force_posdef: selected = df[df.apply(model.random_variables.validate_parameters, axis=1, use_cache=True)] else: selected = df.transform( model.random_variables.nearest_valid_parameters, axis=1) kept_samples = pd.concat((kept_samples, selected)) remaining = n - len(kept_samples) i += 1 if not force_posdef and force_posdef_samples is not None and i >= force_posdef_samples: force_posdef = True return kept_samples.reset_index(drop=True)