def test_override_priors(): np.random.seed(0) settings = load_settings(BASE_CASE) alchemy = Alchemy(settings) prior = alchemy.get_smoothing_grid(rate=settings.rate[0]) draws = np.abs(np.random.normal(loc=1e-3, scale=1e-7, size=(len(prior.ages), len(prior.times), 100))) alchemy.override_priors( rate_grid=prior, update_dict={ 'value': draws, 'dage': draws, 'dtime': draws, 'ages': prior.ages, 'times': prior.times }, new_prior_distribution=None ) for (a, age), (t, time) in zip(enumerate(prior.ages), enumerate(prior.times)): assert prior.value[age, time].mean == draws[a, t, :].mean() for (a, age), (t, time) in zip(enumerate(prior.ages[:-1]), enumerate(prior.times)): assert prior.dage[age, time].mean == draws[a, t, :].mean() for (a, age), (t, time) in zip(enumerate(prior.ages), enumerate(prior.times[:-1])): assert prior.dtime[age, time].mean == draws[a, t, :].mean()
def integrand_grids(alchemy: Alchemy, integrands: List[str]) -> Dict[str, Dict[str, np.ndarray]]: """ Get the age-time grids associated with a list of integrands. Should be used for converting priors to posteriors. Uses the default grid unless another one has been specified. Parameters: ---------- alchemy An alchemy object for the model integrands A list of integrands Returns ------- Dict, a dictionary of grids with keys for each integrand, which are dictionaries for "age" and "time". """ grids = dict() default_grid = alchemy.construct_age_time_grid() for integrand in integrands: grids[integrand] = deepcopy(default_grid) rate_grids = alchemy.get_all_rates_grids() for k, v in rate_grids.items(): if k in integrands: grids[k].update({'age': v.ages}) grids[k].update({'time': v.times}) return grids
def test_apply_min_cv_to_value(): settings = load_settings(BASE_CASE) alchemy = Alchemy(settings) prior = alchemy.get_smoothing_grid(rate=settings.rate[0]).value # Apply a ridiculously large coefficient of variation alchemy.apply_min_cv_to_prior_grid(prior_grid=prior, min_cv=1e6) for (a, age), (t, time) in zip(enumerate(prior.ages), enumerate(prior.times)): assert prior[age, time].standard_deviation == prior[age, time].mean * 1e6
def test_estimate_prior_grid(): np.random.seed(0) settings = load_settings(BASE_CASE) alchemy = Alchemy(settings) prior = alchemy.get_smoothing_grid(rate=settings.rate[0]) draws = np.abs(np.random.normal(loc=1e-3, scale=1e-7, size=(len(prior.ages), len(prior.times), 100))) estimate_grid_from_draws( ages=prior.ages, times=prior.times, draws=draws, grid_priors=prior.value ) for (a, age), (t, time) in zip(enumerate(prior.ages), enumerate(prior.times)): assert prior.value[age, time].mean == draws[a, t, :].mean()
def test_predict_sample_pools(mi, settings, dismod): alchemy = Alchemy(settings) predictions = predict_sample_pool(main_db=NAME, index_file_pattern='sample_{index}.db', n_pool=2, n_sim=2) di = DismodIO(NAME) assert len(predictions) == 2 * len(di.avgint)
def get_prior_avgint_grid(settings, integrands, sexes, locations, midpoint=False): """ Get a data frame to use for setting up posterior predictions on a grid. Will still need to have covariates added to it, and prep data from dismod.api.data_tables.prep_data_avgint to convert nodes and covariate names before it can be input into the avgint table in a database. Args: settings: (cascade_at.settings.settings_configuration.SettingsConfiguration) integrands: (list of str) sexes: (list of int) locations: (list of int) midpoint: (bool) Returns: (pd.DataFrame) with columns "avgint_id", "integrand_id", "location_id", "weight_id", "subgroup_id", "age_lower", "age_upper", "time_lower", "time_upper", "sex_id" """ posterior_dfs = pd.DataFrame() alchemy = Alchemy(settings) grids = integrand_grids(alchemy=alchemy, integrands=integrands) for k, v in grids.items(): if midpoint: time = vec_to_midpoint(v['time']) age = vec_to_midpoint(v['age']) else: time = v['time'] age = v['age'] posterior_df = expand_grid({ 'age_lower': age, 'time_lower': time, 'location_id': locations, 'sex_id': sexes }) posterior_df['time_upper'] = posterior_df['time_lower'] posterior_df['age_upper'] = posterior_df['age_lower'] posterior_df['rate'] = k posterior_df['integrand'] = posterior_df['rate'].map(RateToIntegrand) posterior_df['integrand_id'] = posterior_df['integrand'].apply( lambda x: IntegrandEnum[x].value) posterior_df['weight_id'] = posterior_df["integrand"].apply( lambda x: INTEGRAND_TO_WEIGHT[x].value) posterior_df['subgroup_id'] = 0 posterior_dfs = posterior_dfs.append(posterior_df) return posterior_dfs[[ "integrand_id", "location_id", "weight_id", "subgroup_id", "age_lower", "age_upper", "time_lower", "time_upper", "sex_id" ]]
def test_predict_pool(mi, settings, dismod): alchemy = Alchemy(settings) predict = Predict(main_db=NAME, index_file_pattern='sample_{index}.db') result = predict(1) di = DismodIO(NAME) assert len(result) == len(di.avgint) assert all(result.sample_index) == 1 assert all(result.columns == ['predict_id', 'sample_index', 'avgint_id', 'avg_integrand'])
def df(mi, settings): alchemy = Alchemy(settings) d = DismodFiller(path=Path('temp.db'), settings_configuration=settings, measurement_inputs=mi, grid_alchemy=alchemy, parent_location_id=70, sex_id=2) d.fill_for_parent_child() return d
def test_predict_sample(mi, settings, dismod): alchemy = Alchemy(settings) fill_avgint_with_priors_grid(inputs=mi, alchemy=alchemy, settings=settings, source_db_path=NAME, child_locations=[72], child_sexes=[2]) run_dismod_commands(dm_file=NAME, commands=['predict sample']) di = DismodIO(NAME) assert len(di.predict) == 2 * len(di.avgint)
def test_get_prior_avgint_grid(): settings = load_settings(BASE_CASE) alchemy = Alchemy(settings) rates = ['pini', 'iota', 'chi'] grids = integrand_grids(alchemy=alchemy, integrands=rates) df = get_prior_avgint_grid( grids=grids, sexes=[1, 2], locations=[1] ) assert type(df) == pd.DataFrame assert sorted(df['integrand_id'].unique()) == [0, 2, 7] assert all(df.location_id == 1)
def test_format_rate_grid_for_ihme(mi): settings = load_settings(BASE_CASE) alchemy = Alchemy(settings) d = DismodFiller( path='none', settings_configuration=settings, measurement_inputs=mi, grid_alchemy=alchemy, parent_location_id=70, sex_id=2 ) grid = format_rate_grid_for_ihme( rates=d.parent_child_model['rate'], gbd_round_id=6, location_id=70, sex_id=2 ) assert all(grid.columns == ['location_id', 'year_id', 'age_group_id', 'sex_id', 'measure_id', 'mean', 'upper', 'lower'])
def read_inputs(self) -> (MeasurementInputs, Alchemy, SettingsConfig): """ Read the inputs from disk. """ with open(self.inputs_file, "rb") as f: LOG.info(f"Reading input obj from {self.inputs_file}.") inputs = dill.load(f) with open(self.settings_file) as f: settings_json = json.load(f) settings = load_settings(settings_json=settings_json) alchemy = Alchemy(settings=settings) # For some reason the pickling process makes it so that there is a # key error in FormList when trying to access CovariateSpecs # This re-creates the covariate specs for the inputs, but ideally # we don't have to do this if we can figure out why pickling makes it error. inputs.covariate_specs = CovariateSpecs( country_covariates=settings.country_covariate, study_covariates=settings.study_covariate) return inputs, alchemy, settings
def alchemy(modified_settings): return Alchemy(modified_settings)
def alchemy(): return Alchemy(load_settings(BASE_CASE))