def test_Mortality(config, base_plugins): num_days = 365 components = [TestPopulation(), Mortality()] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) df = pd.read_csv(config.path_to_mortality_file) # to save time, only look at locatiosn existing on the test dataset. mortality_rate_df = df[df['LAD.code'] == 'E08000032'] asfr_data = transform_rate_table(mortality_rate_df, 2011, 2012, config.population.age_start, config.population.age_end) simulation._data.write("cause.all_causes.cause_specific_mortality_rate", asfr_data) simulation.setup() simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() print('alive', len(pop[pop['alive'] == 'alive'])) print('dead', len(pop[pop['alive'] != 'alive'])) assert (np.all(pop.alive == 'alive') == False)
def test_incidence(base_config, base_plugins, disease): year_start = base_config.time.start.year year_end = base_config.time.end.year time_step = pd.Timedelta(days=base_config.time.step_size) healthy = BaseDiseaseState('healthy') sick = BaseDiseaseState('sick') key = f"sequela.acute_myocardial_infarction_first_2_days.incidence_rate" transition = RateTransition(input_state=healthy, output_state=sick, get_data_functions={ 'incidence_rate': lambda _, builder: builder.data.load(key) }) healthy.transition_set.append(transition) model = DiseaseModel(disease, initial_state=healthy, states=[healthy, sick]) simulation = InteractiveContext(components=[TestPopulation(), model], configuration=base_config, plugin_configuration=base_plugins, setup=False) simulation._data.write(key, 0.7) simulation.setup() incidence_rate = simulation._values.get_value('sick.incidence_rate') simulation.step() assert np.allclose(from_yearly(0.7, time_step), incidence_rate(simulation.get_population().index), atol=0.00001)
def build_sample_history_single_scenario(output_path: Union[str, Path], location: str, scenario: int): # NOTE: This is 100% necessary or the qsub will fail location = location.strip('"') output_path = Path(output_path) sim = InteractiveContext(paths.MODEL_SPEC_DIR / f'{location}.yaml', setup=False) sim.add_components([SampleHistoryObserver()]) sim.configuration.update({ 'ldlc_treatment_algorithm': { 'scenario': scenario }, 'metrics': { 'disability': { 'by_age': False, 'by_sex': False, 'by_year': False, }, 'mortality': { 'by_age': False, 'by_sex': False, 'by_year': False, }, 'ischemic_heart_disease_observer': { 'by_age': False, 'by_sex': False, 'by_year': False, }, 'ischemic_stroke_observer': { 'by_age': False, 'by_sex': False, 'by_year': False, }, 'diabetes_mellitus_observer': { 'by_age': False, 'by_sex': False, 'by_year': False, }, 'chronic_kidney_disease_observer': { 'by_age': False, 'by_sex': False, 'by_year': False, }, 'miscellaneous_observer': { 'by_age': False, 'by_sex': False, 'by_year': False, }, 'sample_history_observer': { 'path': f'{output_path}/{scenario}_sample_history.hdf' }, }, }) sim.setup() sim.run() sim.finalize() logger.info('**DONE**')
def test_risk_deletion(base_config, base_plugins, disease): time_step = base_config.time.step_size time_step = pd.Timedelta(days=time_step) year_start = base_config.time.start.year year_end = base_config.time.end.year base_rate = 0.7 paf = 0.1 healthy = BaseDiseaseState('healthy') sick = BaseDiseaseState('sick') key = "sequela.acute_myocardial_infarction_first_2_days.incidence_rate" transition = RateTransition(input_state=healthy, output_state=sick, get_data_functions={ 'incidence_rate': lambda _, builder: builder.data.load(key) }) healthy.transition_set.append(transition) model = DiseaseModel(disease, initial_state=healthy, states=[healthy, sick]) class PafModifier: @property def name(self): return 'paf_modifier' def setup(self, builder): builder.value.register_value_modifier( 'sick.incidence_rate.paf', modifier=simulation._tables.build_table( build_table(paf, year_start, year_end), key_columns=('sex', ), parameter_columns=['age', 'year'], value_columns=None)) simulation = InteractiveContext( components=[TestPopulation(), model, PafModifier()], configuration=base_config, plugin_configuration=base_plugins, setup=False) simulation._data.write(key, base_rate) simulation.setup() incidence_rate = simulation._values.get_value('sick.incidence_rate') simulation.step() expected_rate = base_rate * (1 - paf) assert np.allclose(from_yearly(expected_rate, time_step), incidence_rate(simulation.get_population().index), atol=0.00001)
def get_relative_risks(config: Path, input_draw: int, random_seed: int, age_group_id: int) -> pd.DataFrame: sim = InteractiveContext(config, setup=False) artifact_path = sim.configuration.input_data.artifact_path artifact = Artifact(artifact_path) age_bins = artifact.load(data_keys.POPULATION.AGE_BINS).reset_index().set_index('age_group_id') age_start = age_bins.loc[age_group_id, 'age_start'] age_end = age_bins.loc[age_group_id, 'age_end'] year_start = 2019 year_end = 2020 sim.configuration.update({ 'input_data': { 'input_draw_number': input_draw, }, 'randomness': { 'random_seed': random_seed, }, 'population': { 'age_start': age_start, 'age_end': age_end } }) sim.setup() pop = sim.get_population() gestational_ages = sim.get_value('short_gestation.exposure')(pop.index) birth_weights = sim.get_value('low_birth_weight.exposure')(pop.index) interpolators = artifact.load(data_keys.LBWSG.RELATIVE_RISK_INTERPOLATOR) def calculate_rr_by_sex(sex: str) -> float: sex_mask = pop['sex'] == sex row_index = (sex, age_start, age_end, year_start, year_end, 'diarrheal_diseases', 'excess_mortality_rate') interpolator = pickle.loads(bytes.fromhex( interpolators.loc[row_index, f'draw_{input_draw}'] )) rrs = np.exp(interpolator(gestational_ages[sex_mask], birth_weights[sex_mask], grid=False)) return rrs lbwsg_rrs = pd.DataFrame({'relative_risk': 1.0}, index=pop.index) lbwsg_rrs['sex'] = pop['sex'] lbwsg_rrs.loc[lbwsg_rrs['sex'] == 'Female', 'relative_risk'] = calculate_rr_by_sex('Female') lbwsg_rrs.loc[lbwsg_rrs['sex'] == 'Male', 'relative_risk'] = calculate_rr_by_sex('Male') return lbwsg_rrs
def test_FertilityCrudeBirthRate(config, base_plugins): pop_size = config.population.population_size num_days = 100 components = [TestPopulation(), FertilityCrudeBirthRate()] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) simulation._data.write("covariate.live_births_by_sex.estimate", crude_birth_rate_data()) simulation.setup() simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() assert np.all(pop.alive == 'alive') assert len(pop.age) > pop_size
def test_fertility_module(config, base_plugins): start_population_size = config.population.population_size num_days = 365 * 3 components = [TestPopulation(), FertilityAgeSpecificRates()] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) df = pd.read_csv(config.path_to_fertility_file) # to save time, only look at locatiosn existing on the test dataset. fertility_rate_df = df[(df['LAD.code'] == 'E08000032')] asfr_data = transform_rate_table(fertility_rate_df, 2011, 2012, 10, 50, [2]) # Mock Fertility Data simulation._data.write("covariate.age_specific_fertility_rate.estimate", asfr_data) simulation.setup() time_start = simulation._clock.time assert 'last_birth_time' in simulation.get_population().columns, \ 'expect Fertility module to update state table.' assert 'parent_id' in simulation.get_population().columns, \ 'expect Fertility module to update state table.' simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() print(pop) # No death in this model. assert np.all(pop.alive == 'alive'), 'expect all simulants to be alive' # TODO: Write a more rigorous test. assert len(pop.age) > start_population_size, 'expect new simulants' for i in range(start_population_size, len(pop)): assert pop.loc[pop.iloc[i].parent_id].last_birth_time >= time_start, 'expect all children to have mothers who' \ ' gave birth after the simulation starts.'
def test_FertilityCrudeBirthRate_extrapolate_fail(config, base_plugins): config.update({ 'interpolation': { 'extrapolate': False }, 'time': { 'start': {'year': 2016}, 'end': {'year': 2025}, }, }) components = [TestPopulation(), FertilityCrudeBirthRate()] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) simulation._data.write("covariate.live_births_by_sex.estimate", crude_birth_rate_data()) with pytest.raises(ValueError): simulation.setup()
def test_emigration(config, base_plugins): start_population_size = config.population.population_size num_days = 365 * 10 components = [TestPopulation(), Emigration()] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) # setup emigration rates df_emigration = pd.read_csv(config.path_to_emigration_file) df_total_population = pd.read_csv(config.path_to_total_population_file) df_emigration = df_emigration[(df_emigration['LAD.code'] == 'E08000032') | (df_emigration['LAD.code'] == 'E08000032')] df_total_population = df_total_population[ (df_total_population['LAD'] == 'E08000032') | (df_total_population['LAD'] == 'E08000032')] asfr_data_emigration = compute_migration_rates(df_emigration, df_total_population, 2011, 2012, config.population.age_start, config.population.age_end, aggregate_over=75) # Mock emigration Data simulation._data.write("covariate.age_specific_migration_rate.estimate", asfr_data_emigration) simulation.setup() simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() print('emigrated', len(pop[pop['alive'] == 'emigrated'])) print('remaining population', len(pop[pop['emigrated'] == 'no_emigrated'])) assert (np.all(pop.alive == 'alive') == False) assert len(pop[pop['emigrated'] == 'Yes']) > 0, 'expect migration'
def test_Immigration(config, base_plugins): num_days = 10 components = [TestPopulation(), Immigration()] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) df_total_population = pd.read_csv(config.path_to_total_population_file) df_total_population = df_total_population[ (df_total_population['LAD'] == 'E08000032')] # setup immigration rates df_immigration = pd.read_csv(config.path_to_immigration_file) df_immigration = df_immigration[ (df_immigration['LAD.code'] == 'E08000032')] asfr_data_immigration = compute_migration_rates(df_immigration, df_total_population, 2011, 2012, config.population.age_start, config.population.age_end, normalize=False ) # setup immigration rates df_immigration_MSOA = pd.read_csv(config.path_to_immigration_MSOA) # read total immigrants from the file total_immigrants = int(df_immigration[df_immigration.columns[4:]].sum().sum()) simulation._data.write("cause.all_causes.cause_specific_immigration_rate", asfr_data_immigration) simulation._data.write("cause.all_causes.cause_specific_total_immigrants_per_year", total_immigrants) simulation._data.write("cause.all_causes.immigration_to_MSOA", df_immigration_MSOA) simulation.setup() simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() assert (len(pop["entrance_time"].value_counts()) > 1) print (pop)
def test_internal_outmigration(config, base_plugins): num_days = 365 * 5 components = [TestPopulation(), InternalMigration()] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) df = pd.read_csv(config.path_to_internal_outmigration_file) # to save time, only look at locations existing on the test dataset. df_internal_outmigration = df[df['LAD.code'].isin([ 'E08000032', 'E08000033', 'E08000034', 'E06000024', 'E08000035', 'E07000163' ])] asfr_data = transform_rate_table(df_internal_outmigration, 2011, 2012, config.population.age_start, config.population.age_end) simulation._data.write("cause.age_specific_internal_outmigration_rate", asfr_data) # Read MSOA ---> LAD msoa_lad_df = pd.read_csv(config.path_msoa_to_lad) # Read OD matrix, only destinations OD_matrix_dest = pd.read_csv(config.path_to_OD_matrix_index_file, index_col=0) OD_matrix_with_LAD = OD_matrix_dest.merge( msoa_lad_df[["MSOA11CD", "LAD16CD"]], left_index=True, right_on="MSOA11CD") OD_matrix_with_LAD.index = OD_matrix_with_LAD["indices"] # Create indices for MSOA and LAD MSOA_location_index = OD_matrix_with_LAD["MSOA11CD"].to_dict() LAD_location_index = OD_matrix_with_LAD["LAD16CD"].to_dict() # Now, read the whole matrix (if it passes the first check) simulation._data.write("internal_migration.MSOA_index", MSOA_location_index) simulation._data.write("internal_migration.LAD_index", LAD_location_index) simulation._data.write("internal_migration.MSOA_LAD_indices", OD_matrix_with_LAD) simulation._data.write("internal_migration.path_to_OD_matrices", config.path_to_OD_matrices) simulation.setup() simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() print('internal outmigration', len(pop[pop['internal_outmigration'] == 'Yes'])) print('remaining population', len(pop[pop['internal_outmigration'] == 'No'])) assert (np.all(pop.internal_outmigration == 'Yes') == False) assert len(pop[pop['last_outmigration_time'] != 'NaT'] ) > 0, 'time of out migration gets saved.' assert len(pop[pop['previous_MSOA_locations'] != ''] ) > 0, 'previous location of the migrant gets saved.'
def test_pipeline(config, base_plugins): start_population_size = config.population.population_size num_days = 365 * 2 components = [ TestPopulation(), FertilityAgeSpecificRates(), Mortality(), Emigration(), Immigration() ] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) # setup mortality rates df = pd.read_csv(config.path_to_mortality_file) mortality_rate_df = df[(df['LAD.code'] == 'E08000032')] asfr_data = transform_rate_table(mortality_rate_df, 2011, 2012, config.population.age_start, config.population.age_end) simulation._data.write("cause.all_causes.cause_specific_mortality_rate", asfr_data) # setup fertility rates df_fertility = pd.read_csv(config.path_to_fertility_file) fertility_rate_df = df_fertility[(df_fertility['LAD.code'] == 'E08000032')] asfr_data_fertility = transform_rate_table(fertility_rate_df, 2011, 2012, 10, 50, [2]) simulation._data.write("covariate.age_specific_fertility_rate.estimate", asfr_data_fertility) # setup emigration rates df_emigration = pd.read_csv(config.path_to_emigration_file) df_total_population = pd.read_csv(config.path_to_total_population_file) df_emigration = df_emigration[(df_emigration['LAD.code'] == 'E08000032')] df_total_population = df_total_population[( df_total_population['LAD'] == 'E08000032')] asfr_data_emigration = compute_migration_rates(df_emigration, df_total_population, 2011, 2012, config.population.age_start, config.population.age_end) simulation._data.write("covariate.age_specific_migration_rate.estimate", asfr_data_emigration) # setup immigration rates df_immigration = pd.read_csv(config.path_to_immigration_file) df_immigration = df_immigration[( df_immigration['LAD.code'] == 'E08000032')] asfr_data_immigration = compute_migration_rates( df_immigration, df_total_population, 2011, 2012, config.population.age_start, config.population.age_end, normalize=False) # read total immigrants from the file total_immigrants = int( df_immigration[df_immigration.columns[4:]].sum().sum()) simulation._data.write("cause.all_causes.cause_specific_immigration_rate", asfr_data_immigration) simulation._data.write( "cause.all_causes.cause_specific_total_immigrants_per_year", total_immigrants) df_immigration_MSOA = pd.read_csv(config.path_to_immigration_MSOA) simulation._data.write("cause.all_causes.immigration_to_MSOA", df_immigration_MSOA) simulation.setup() time_start = simulation._clock.time assert 'last_birth_time' in simulation.get_population().columns, \ 'expect Fertility module to update state table.' assert 'parent_id' in simulation.get_population().columns, \ 'expect Fertility module to update state table.' simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() print('alive', len(pop[pop['alive'] == 'alive'])) print('dead', len(pop[pop['alive'] == 'dead'])) print('emigrated', len(pop[pop['alive'] == 'emigrated'])) assert (np.all(pop.alive == 'alive') == False) assert len(pop[pop['emigrated'] == 'Yes']) > 0, 'expect migration' assert len(pop.age) > start_population_size, 'expect new simulants' for i in range(start_population_size, len(pop)): # skip immigrated population if pop.loc[i].immigrated == 'Yes': continue assert pop.loc[pop.loc[i].parent_id].last_birth_time >= time_start, 'expect all children to have mothers who' \ ' gave birth after the simulation starts.'
def RunPipeline(config, start_population_size): """ Run the daedalus Microsimulation pipeline Parameters ---------- config : ConfigTree Config file to run the pipeline start_population_size: int Size of the starting population Returns: -------- A dataframe with the resulting simulation """ # Set up the components using the config. config.update({'population': { 'population_size': start_population_size, }}, source=str(Path(__file__).resolve())) num_years = config.time.num_years components = [eval(x) for x in config.components] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=utils.base_plugins(), setup=False) if 'InternalMigration()' in config.components: # setup internal migration matrices OD_matrices = InternalMigrationMatrix(configuration=config) OD_matrices.set_matrix_tables() simulation._data.write("internal_migration.MSOA_index", OD_matrices.MSOA_location_index) simulation._data.write("internal_migration.LAD_index", OD_matrices.LAD_location_index) simulation._data.write("internal_migration.MSOA_LAD_indices", OD_matrices.df_OD_matrix_with_LAD) simulation._data.write("internal_migration.path_to_OD_matrices", config.path_to_OD_matrices) # setup internal migraionts rates asfr_int_migration = InternalMigrationRateTable(configuration=config) asfr_int_migration.set_rate_table() simulation._data.write("cause.age_specific_internal_outmigration_rate", asfr_int_migration.rate_table) if 'Mortality()' in config.components: # setup mortality rates asfr_mortality = MortalityRateTable(configuration=config) asfr_mortality.set_rate_table() simulation._data.write( "cause.all_causes.cause_specific_mortality_rate", asfr_mortality.rate_table) if 'FertilityAgeSpecificRates()' in config.components: # setup fertility rates asfr_fertility = FertilityRateTable(configuration=config) asfr_fertility.set_rate_table() simulation._data.write( "covariate.age_specific_fertility_rate.estimate", asfr_fertility.rate_table) if 'Emigration()' in config.components: # setup emigration rates asfr_emigration = EmigrationRateTable(configuration=config) asfr_emigration.set_rate_table() simulation._data.write( "covariate.age_specific_migration_rate.estimate", asfr_emigration.rate_table) if 'Immigration()' in config.components: # setup immigration rates asfr_immigration = ImmigrationRateTable(configuration=config) asfr_immigration.set_rate_table() asfr_immigration.set_total_immigrants() simulation._data.write("cause.all_causes.immigration_to_MSOA", pd.read_csv(config.path_to_immigration_MSOA)) simulation._data.write( "cause.all_causes.cause_specific_immigration_rate", asfr_immigration.rate_table) simulation._data.write( "cause.all_causes.cause_specific_total_immigrants_per_year", asfr_immigration.total_immigrants) print('Start simulation setup') print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) simulation.setup() print('Start running simulation') print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) for year in range(1, num_years + 1): simulation.run_for(duration=pd.Timedelta(days=365.25)) print('Finished running simulation for year:', year) print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) pop = simulation.get_population() # assign age brackets to the individuals pop = utils.get_age_bucket(pop) # save the output file to csv year_output_dir = os.path.join( os.path.join(config.output_dir, config.location, 'year_' + str(year))) os.makedirs(year_output_dir, exist_ok=True) output_data_filename = 'ssm_' + config.location + '_MSOA11_ppp_2011_simulation_year_' + str( year) + '.csv' pop.to_csv(os.path.join(year_output_dir, output_data_filename)) print() print('In year: ', config.time.start.year + year) # print some summary stats on the simulation print('alive', len(pop[pop['alive'] == 'alive'])) if 'Mortality()' in config.components: print('dead', len(pop[pop['alive'] == 'dead'])) if 'Emigration()' in config.components: print('emigrated', len(pop[pop['alive'] == 'emigrated'])) if 'InternalMigration()' in config.components: print('internal migration', len(pop[pop['internal_outmigration'] != ''])) if 'FertilityAgeSpecificRates()' in config.components: print('New children', len(pop[pop['parent_id'] != -1])) if 'Immigration()' in config.components: print('Immigrants', len(pop[pop['immigrated'].astype(str) == 'Yes'])) return pop