def save_y_star(eps_version, arima_version, years, measure, draws, decay, gbd_round_id): """ apply random walk and save the output """ ds = open_xr(eps_path).data try: eps_preds = open_xr(f"{mig_dir}/eps_star.nc").data except Exception: eps_preds = arima_migration(ds, years, draws, decay) epsilon_hat_out = mig_dir / "eps_star.nc" save_xr(eps_preds, epsilon_hat_out, metric="rate", space="identity") # cap residuals between 10 and -10 # the population forecasts to 2100 is decreasing to 0 with current # forecasts from migration for Syria, Latvia and Jamaica, the capping # method helps to make things more reasonable eps_past = eps_preds.sel(year_id=years.past_years) eps_preds = eps_preds.sel(year_id=years.forecast_years) eps_preds = eps_preds.clip(min=-10, max=10) eps_preds = xr.concat([eps_past, eps_preds], dim="year_id") pred_path = mig_dir / "mig_hat.nc" preds = open_xr(pred_path).data preds = preds.sel(year_id=years.years) preds = expand_dimensions(preds, draw=range(0, draws)) y_star = preds + eps_preds save_xr(y_star, ystar_out, metric="rate", space="identity")
def get_mortality(forecast_mortality_version, gbd_round_id, draws, years, past_mortality_version): """Pulls specified version of mortality. Args: forecast_mortality_version (str): The version name of the future mortality file used in FBDPath. past_mortality_version (str): The version name of the past mortality file used in FBDPath. gbd_round_id (int): The GBD round fed into FBDPath to pull the correct version of mortality. draws (int): The number of desired draws. Returns: (xarray.DataArray): Mortality rate. """ forecast_mot_path = FBDPath( f"{gbd_round_id}/future/death/{forecast_mortality_version}") forecast_mot = open_xr(forecast_mot_path / "_all.nc").data past_mot_path = FBDPath( f"{gbd_round_id}/past/death/{past_mortality_version}") past_mot_file = past_mot_path / "_all.nc" past_mot = open_xr(past_mot_file).data past_mot = past_mot.drop("acause") forecast_mot = concat_past_future(past_mot, forecast_mot, draws, years) return forecast_mot
def get_asfr(forecast_asfr_version, gbd_round_id, draws, years, past_asfr_version): """Pulls specified version of ASFR, subsets to females only if sex_id is a dimension. Args: forecast_asfr_version (str): The future version name of the ASFR file used in FBDPath. past_asfr_version (str): The past version name of the ASFR file used in FBDPath. gbd_round_id (int): The GBD round fed into FBDPath to pull the correct version of ASFR. draws (int): The number of desired draws. Returns: (xarray.DataArray): Age-specific fertility rate. """ forecast_asfr_path = FBDPath( f"{gbd_round_id}/future/asfr/{forecast_asfr_version}") forecast_asfr = open_xr(forecast_asfr_path / "asfr.nc").data past_asfr_path = FBDPath(f"{gbd_round_id}/past/asfr/{past_asfr_version}") past_asfr_file = past_asfr_path / "asfr.nc" past_asfr = open_xr(past_asfr_file).data forecast_asfr = concat_past_future(past_asfr, forecast_asfr, draws, years) if 'sex_id' in forecast_asfr.dims: # sex_id is dimension forecast_asfr = forecast_asfr.sel(sex_id=2, drop=True) elif 'sex_id' in forecast_asfr.coords: #sex-id is point coordinate forecast_asfr = forecast_asfr.drop('sex_id') else: pass # do nothing -- sex_id doesn't exist return forecast_asfr
def load_forecast_pop(gbd_round_id, version, years, draws): """ Load forecast population data. Aggregates if necessary. Args: gbd_round_id (int): The gbd round ID that the past population is from version (str): The version of forecast population to read from years (YearRange): The Forecasting format years to use. Returns: xarray.DataArray: The past population xarray dataarray """ forecast_pop_dir = FBDPath(f"/{gbd_round_id}/future/population/{version}") try: forecast_pop_path = forecast_pop_dir / "population_agg.nc" forecast_pop_da = open_xr(forecast_pop_path).data except: # Need to make agg version forecast_pop_path = forecast_pop_dir / "population.nc" forecast_pop_da = open_xr(forecast_pop_path).data forecast_pop_da = Aggregator.aggregate_everything( forecast_pop_da, gbd_round_id).pop forecast_pop_out_path = forecast_pop_dir / "population_agg.nc" save_xr(forecast_pop_da, forecast_pop_out_path, metric="number", space="identity") # slice to correct years and number of draws forecast_pop_da = forecast_pop_da.sel(year_id=years.forecast_years) forecast_pop_da = resample(forecast_pop_da, draws) return forecast_pop_da
def wpp_fhs_diff(): wpp_pop_dir = FBDPath( f"/{settings.WPP_VERSIONS['population_aggs'].gbd_round_id}/" f"future/population/" f"{settings.WPP_VERSIONS['population_aggs'].version}") wpp_pop_path = wpp_pop_dir / "2019_fhs_agg_allage_bothsex_only.nc" wpp_pop_da = open_xr(wpp_pop_path).data.sel(year_id=2100) fhs_pop_dir = FBDPath( f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/" f"future/population/" f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}") fhs_pop_path = fhs_pop_dir / "population_combined.nc" fhs_pop_da = open_xr(fhs_pop_path).data.sel( sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, scenario=0, quantile="mean", year_id=2100) # Sub-Saharan 166 sub_saharan_diff = _helper_wpp_fhs_diff(wpp_pop_da, fhs_pop_da, 166) # South Asia 158 south_asia_diff = _helper_wpp_fhs_diff(wpp_pop_da, fhs_pop_da, 158) # Southeast Asia, East Asia, and Oceania 4 se_e_oceania_diff = _helper_wpp_fhs_diff(wpp_pop_da, fhs_pop_da, 4) sub_saharan_diff = _helper_round(sub_saharan_diff, 1e6) south_asia_diff = _helper_round(south_asia_diff, 1e6) se_e_oceania_diff = _helper_round(se_e_oceania_diff, 1e6) print(f"The difference with UNPD in 2100 is due to {sub_saharan_diff} " f"million fewer people in the reference scenario in sub-Saharan " f"Africa, {south_asia_diff} million fewer in South Asia, and " f"{se_e_oceania_diff} million fewer in Southeast Asia, East Asia, " f"and Oceania, primarily due to the level of fertility achieved in " f"below replacement populations. \n")
def pop_declines(): forecast_pop_dir = FBDPath( f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/" f"future/population/" f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}") forecast_pop_path = forecast_pop_dir / "population_combined.nc" forecast_pop_da = open_xr(forecast_pop_path).data.sel( sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, scenario=0, quantile="mean", location_id=COUNTRIES) past_pop_dir = FBDPath( f"/{settings.PAST_VERSIONS['population'].gbd_round_id}/" f"past/population/" f"{settings.PAST_VERSIONS['population'].version}") past_pop_path = past_pop_dir / "population.nc" past_pop_da = open_xr(past_pop_path).data.sel( sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, location_id=COUNTRIES) pct_decline_da = 1-(forecast_pop_da.sel(year_id=2100)/past_pop_da.sel( year_id=2017)) decline_over_50_val = (pct_decline_da > .5).sum().values.item(0) decline_over_50_locs = list(pct_decline_da.where( pct_decline_da > .5, drop=True).location_id.values) decline_over_50_locs = _location_id_to_name(decline_over_50_locs) # compute % decline at draw level for uncertainty forecast_draw_pop_dir = FBDPath( f"/{settings.BASELINE_VERSIONS['population'].gbd_round_id}/" f"future/population/" f"{settings.BASELINE_VERSIONS['population'].version}") forecast_draw_pop_path = forecast_draw_pop_dir / "population_agg.nc" forecast_draw_pop_da = open_xr(forecast_draw_pop_path).data.sel( sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, scenario=0, location_id=COUNTRIES) past_draw_pop_dir = FBDPath( f"/{settings.PAST_VERSIONS['population_draw2017'].gbd_round_id}/" f"past/population/" f"{settings.PAST_VERSIONS['population_draw2017'].version}") past_draw_pop_path = past_draw_pop_dir / "population.nc" past_draw_pop_da = open_xr(past_draw_pop_path).data.sel( sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, location_id=COUNTRIES) da_china_2100 = forecast_draw_pop_da.sel(year_id=2100,location_id=6) da_china_2017 = past_draw_pop_da.sel(year_id=2017,location_id=6) pct_draw_decline_da = 1-(da_china_2100/da_china_2017) decline_china = pct_draw_decline_da*100 decline_china_mean = _helper_round( decline_china.mean("draw").values.item(0), 1) decline_china_lower = _helper_round( decline_china.quantile(dim="draw", q=[0.025]).values.item(0), 1) decline_china_upper = _helper_round( decline_china.quantile(dim="draw", q=[0.975]).values.item(0), 1) print(f"{decline_over_50_val} countries including {decline_over_50_locs} " f"in the reference scenario will have declines of greater than " f"50% from 2017 by 2100; China will decline by {decline_china_mean} " f"({decline_china_lower} to {decline_china_upper}).%\n")
def return_pop_differences(fbd_pop_version, unpd_pop_version): GBD_LOC_DF = get_location_metadata(gbd_round_id=5, location_set_id=39) gbd_locs_df = GBD_LOC_DF.query("level < 4") fbdpoppath = FBDPath(f"/5/future/population/{fbd_pop_version}/" "population_combined.nc") unpdpoppath = FBDPath(f"/wpp/future/population/{unpd_pop_version}/" "population_all_age.nc") unpd_pop_xr = open_xr(unpdpoppath).data unpd_pop_100 = unpd_pop_xr.sel(year_id=2100, sex_id=3).drop([ "year_id", "sex_id", "age_group_id" ]).squeeze().rename("unpd_pop").to_dataframe().reset_index() ihme_pop_100 = open_xr(fbdpoppath).data.sel( age_group_id=22, sex_id=3, scenario=0, year_id=2100, quantile="mean").drop([ "age_group_id", "sex_id", "scenario", "year_id", "quantile" ]).squeeze().rename("ihme_pop").to_dataframe().reset_index() ihme_pop_100["ihme_pop_int"] = ihme_pop_100["ihme_pop"].astype(int) unpd_pop_100["unpd_pop_int"] = unpd_pop_100["unpd_pop"].astype(int) pop_2100_df = ihme_pop_100.merge(unpd_pop_100, how="left") final_pop_df = pop_2100_df.merge( gbd_locs_df[["location_id", "lancet_label"]]) location_ids = [166, 158, 4] locs_of_interest = final_pop_df[final_pop_df.location_id.isin( location_ids)] new_pop_diff = {} for loc_id in location_ids: ihme_num = (locs_of_interest[locs_of_interest["location_id"] == loc_id] ["ihme_pop"].values[0]) unpd_num = (locs_of_interest[locs_of_interest["location_id"] == loc_id] ["unpd_pop"].values[0]) location_name = locs_of_interest[locs_of_interest["location_id"] == loc_id].lancet_label.values[0] difference = ((unpd_num - ihme_num) / 1e6).round(2) new_pop_diff[loc_id] = difference print(f"Figure 9 shows a global comparison of the TFR, life expectancy at" f" birth, and population between our reference scenario, the UNPD " f"median variant, and the Wittgenstein SSP2 scenario. Differences" f" between 2100 population forecasts from the UNPD and our reference" f" scenario are largely explained by differences in population" f" levels in sub-Saharan Africa; south Asia and ; southeast Asia," f" east Asia, and Oceania ({new_pop_diff[location_ids[0]]} million, " f"{new_pop_diff[location_ids[1]]} million, and" f" {new_pop_diff[location_ids[2]]} million fewer people in our" f" reference scenario, respectively)")
def pull_pop(past_pop_version, future_pop_version, scenarios = 0, age_groups=ALL_AGE_ID): past_path = FBDPath("{gbd_round_id}/past/population/{version}".format( gbd_round_id=GBD_ROUND, version=past_pop_version)) future_path = FBDPath("{gbd_round_id}/future/population/{version}".format( gbd_round_id=GBD_ROUND, version=future_pop_version)) future_pop = open_xr(future_path / "population_combined.nc").data past_pop = open_xr(past_path / "population.nc").data.sel( location_id=future_pop.location_id) pop = past_pop.combine_first(future_pop).sel( age_group_id=age_groups, sex_id=BOTH_SEX_ID, scenario=scenarios) return pop
def load_data(version): gdp_path = FBDPath(f"{GBD_ROUND_ID}/future/gdp/{version}/gdp.nc") gdp_da = open_xr(gdp_path).data.sel(scenario=0) gdp_da.name = "gdp" gdp_df = gdp_da.to_dataframe().reset_index() return gdp_df
def sing_tai_tfr(): tfr_dir = FBDPath(f"/{settings.PAST_VERSIONS['tfr'].gbd_round_id}/" f"past/tfr/" f"{settings.PAST_VERSIONS['tfr'].version}") tfr_path = tfr_dir / "tfr.nc" tfr_da = open_xr(tfr_path).data.sel(year_id=2017) singapore_tfr = tfr_da.sel(location_id=69).mean('draw').values.item(0) taiwan_tfr = tfr_da.sel(location_id=8).mean('draw').values.item(0) singapore_upper = tfr_da.sel(location_id=69).quantile( .975, dim='draw').values.item(0) singapore_lower = tfr_da.sel(location_id=69).quantile( .025, dim='draw').values.item(0) taiwan_upper = tfr_da.sel(location_id=8).quantile( .975, dim='draw').values.item(0) taiwan_lower = tfr_da.sel(location_id=8).quantile( .025, dim='draw').values.item(0) singapore_tfr = _helper_round(singapore_tfr, 1) taiwan_tfr = _helper_round(taiwan_tfr, 1) singapore_upper = _helper_round(singapore_upper, 1) singapore_lower = _helper_round(singapore_lower, 1) taiwan_upper = _helper_round(taiwan_upper, 1) taiwan_lower = _helper_round(taiwan_lower, 1) print(f"Discussion: In contrast, positive incentives have had little " f"effect in Singapore and Taiwan, where 2017 TFR levels were " f"{singapore_tfr} ({singapore_lower}–{singapore_upper}) and " f"{taiwan_tfr} ({taiwan_lower}–{taiwan_upper})\n")
def pull_reshape_tfr(gbd_round_id, tfr_version, location_ids): """Pulls year 2017 GBD round 5 TFR, converts it an xarray dataarray, pulls forecast TFR, and concatenates the dataarrays. The new array is then converted to a pandas dataframe. All required data are then reshaped and merged for downstream table production. Args: gbd_round_id (int): GBD round. tfr_version (str): Forecast TFR version. location_ids (list): List of location IDs to pull from both past and future data. Returns: tfr_final_df (pandas dataframe): Dataframe with all required TFR data, reshaped for downstream table production. """ p_end = YEARS.past_end f_end = YEARS.forecast_end # Get 2017 GBD TFR tfr_2017 = get_covariate_estimates(covariate_id=149, gbd_round_id=gbd_round_id, location_id=location_ids, year_id=p_end, status="best")[[ "year_id", "location_id","mean_value", "lower_value", "upper_value" ]].rename(columns={"mean_value":"mean", "lower_value":"lower", "upper_value":"upper"}) tfr_2017_da = melt_to_xarray(tfr_2017) # Get future TFR tfr_fut = open_xr(f"{gbd_round_id}/future/tfr/" f"{tfr_version}/tfr_combined.nc").data tfr_fut_sel = tfr_fut.sel(location_id=location_ids, scenario=SCENARIOS, year_id=YEARS.forecast_years) # Concat and make quantile wide tfr_da = xr.concat([tfr_2017_da, tfr_fut_sel], dim="year_id") tfr_df = tfr_da.to_dataframe().reset_index() tfr_df = tfr_df.pivot_table(values="value", index=["location_id", "year_id", "scenario"], columns="quantile").reset_index() # Combine value and UI into one column tfr_df = combine_mean_ui(tfr_df, df_type="tfr") # Get 2017 and 2100 values tfr2017 = tfr_df.query(f"year_id == {p_end} and scenario==0") tfr2100 = tfr_df.query(f"year_id == {f_end}") tfr2017 = pivot_scenarios(tfr2017, f"{p_end}", SCENARIO_MAP, df_type="tfr") tfr2100 = pivot_scenarios(tfr2100, f"{f_end}", SCENARIO_MAP, df_type="tfr") # Merge tfr_final_df = tfr2017.merge(tfr2100) return tfr_final_df
def read_paf(acause, risk, gbd_round_id, past_or_future, version): """ Read past or forecast PAF. Args: acause (str): cause name. risk (str): risk name. gbd_round_id (int): gbd round id. past_or_future (str): "past" or "forecast". version (str): str indiciating folder where data comes from. Returns paf (xr.DataArray): dataframe of PAF. Raises: ValueError: if upstream flat file does not exist. """ infile_fbd_path = (FBDPath(gbd_round_id=gbd_round_id, past_or_future=past_or_future, stage="paf", version=version) / "risk_acause_specific" / "{}_{}.nc".format(acause, risk)) paf = open_xr(infile_fbd_path).data return paf
def largest_gdp(): gdp_dir = FBDPath( f"/{settings.BASELINE_VERSIONS['gdp'].gbd_round_id}/" f"future/gdp/" f"{settings.BASELINE_VERSIONS['gdp'].version}") gdp_path = gdp_dir / "gdp.nc" gdp_da = open_xr(gdp_path).data.sel(scenario=0) max_da = gdp_da.where(gdp_da==gdp_da.max('location_id'), drop=True) # Find years where china is top china_years = max_da.sel( location_id=6).dropna(dim='year_id').year_id.values # Find years where USA is top usa_years = max_da.sel( location_id=102).dropna(dim='year_id').year_id.values # check no other location is ever top missing_years = np.setdiff1d( gdp_da.coords["year_id"].values, np.concatenate([china_years, usa_years])) assert missing_years.size == 0 # find first year in future where china takes lead china_year = china_years.min() # find first year when US regains lead usa_year = usa_years[usa_years > china_year].min() print(f"China is expected to become the largest economy by {china_year} " f"but in the reference scenario the USA would once again become in " f"the largest economy in {usa_year}.\n")
def working_age(): pop_dir = FBDPath( f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/" f"future/population/" f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}") pop_path = pop_dir / "population_combined.nc" # working age is 20-64 age id 163, but have to combine 9-17 to get 163 working_age_ids = list(range(9, 18)) pop_da = open_xr(pop_path).data.sel( sex_id=BOTH_SEX_ID, scenario=0, quantile="mean", age_group_id=working_age_ids, year_id=2100, location_id=COUNTRIES) pop_da = pop_da.sum('age_group_id') pop_df = pop_da.to_dataframe() pop_df = _add_location_name(pop_df) pop_df = pop_df.sort_values(by='population', ascending=False).reset_index() first = pop_df.location_name[0] second = pop_df.location_name[1] third = pop_df.location_name[2] fourth = pop_df.location_name[3] print(f"By 2100, {first} will still have the largest working age " f"population followed by {second} and {third}. Coming in fourth in " f"the world will be {fourth}.\n")
def pop_peak(): pop_dir = FBDPath( f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/" f"future/population/" f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}") pop_path = pop_dir / "population_combined.nc" pop_da = open_xr(pop_path).data.sel( location_id=1, age_group_id=ALL_AGE_ID, sex_id=BOTH_SEX_ID, scenario=0) # Max values max_da = pop_da.sel(quantile="mean") max_val = max_da.max().values.item(0) max_year = max_da.where(max_da==max_val, drop=True).year_id.values[0] max_upper = pop_da.sel(quantile="upper", year_id=max_year).values.item(0) max_lower = pop_da.sel(quantile="lower", year_id=max_year).values.item(0) # Values in 2100 end_val = pop_da.sel(quantile="mean", year_id=2100).values.item(0) end_upper = pop_da.sel(quantile="upper", year_id=2100).values.item(0) end_lower = pop_da.sel(quantile="lower", year_id=2100).values.item(0) max_val = _helper_round(max_val, 1e9) max_upper = _helper_round(max_upper, 1e9) max_lower = _helper_round(max_lower, 1e9) end_val = _helper_round(end_val, 1e9) end_upper = _helper_round(end_upper, 1e9) end_lower = _helper_round(end_lower, 1e9) print(f"In the reference forecast, the global population is projected to " f"peak in {max_year} at {max_val} " f"({max_lower}-{max_upper}) billion people, and decline to " f"{end_val} ({end_lower}-{end_upper}) in 2100." f"\n")
def age_pops(): pop_dir = FBDPath( f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/" f"future/population/" f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}") pop_path = pop_dir / "population_combined.nc" pop_da = open_xr(pop_path).data.sel( sex_id=BOTH_SEX_ID, scenario=0, year_id=2100, quantile="mean", location_id=1) # 65 to 69, 70 to 74, 75 to 79, 80 plus above_65_ages = [18, 19, 20, 21] above_65_da = pop_da.sel(age_group_id=above_65_ages) above_65_val = above_65_da.sum('age_group_id').values.item(0) # <20 years under_20_val = pop_da.sel(age_group_id=158).values.item(0) above_65_val = _helper_round(above_65_val, 1e9) under_20_val = _helper_round(under_20_val, 1e9) print(f"Findings also suggest a shifting age structure in many parts of " f"the world, with {above_65_val} billion individuals above the age " f"of 65, and {under_20_val} billion individuals below the age of 20, " f"globally in 2100.\n")
def load_pop(gbd_round_id, past_version, forecast_version): forecast_file = FBDPath( f"/{gbd_round_id}/future/population/" f"{forecast_version}/population_combined.nc") past_file = FBDPath( f"/{gbd_round_id}/past/population/{past_version}/population.nc") future_pop = open_xr(forecast_file).data past_pop = expand_dimensions(open_xr(past_file).data, scenario=future_pop.scenario, quantile = future_pop["quantile"]) pop = xr.concat([past_pop, future_pop], "year_id") return pop
def prep_pop_da(past_version, forecast_version, gbd_round_id, years): forecast_pop_file = FBDPath( f"/{gbd_round_id}/future/population/{forecast_version}/" f"population_combined.nc") forecast_fhs = open_xr(forecast_pop_file).data.sel(quantile='mean', drop=True) past_fhs_file = FBDPath( f"/{gbd_round_id}/past/population/{past_version}/population.nc") past_fhs = expand_dimensions(open_xr(past_fhs_file).data.sel( year_id=years.past_years, sex_id=forecast_fhs["sex_id"], age_group_id=forecast_fhs["age_group_id"], location_id=forecast_fhs["location_id"]), scenario=forecast_fhs.scenario.values) fhs_all_scenarios = xr.concat([past_fhs, forecast_fhs], dim="year_id") fhs = fhs_all_scenarios.sel(scenario=[-1, 0, 1]) alt_sdg = fhs_all_scenarios.sel(scenario=[3]) alt_99 = fhs_all_scenarios.sel(scenario=[2]) ages = db.get_ages().query("age_group_id in @ALL_AGE_GROUP_IDS") days = ages[["age_group_id", "age_group_days_start", "age_group_days_end"]] days["mean_age"] = (days["age_group_days_end"] - (days["age_group_days_end"] - days["age_group_days_start"]) / 2) / 365.25 mean_age = days.set_index("age_group_id")["mean_age"].to_xarray() data_fhs = fhs.sel(age_group_id=mean_age["age_group_id"], sex_id=SEX_IDS) data_sdg = alt_sdg.sel(age_group_id=mean_age["age_group_id"], sex_id=SEX_IDS) data_99 = alt_99.sel(age_group_id=mean_age["age_group_id"], sex_id=SEX_IDS) avg_age_fhs = (data_fhs * mean_age).sum("age_group_id") / data_fhs.sum("age_group_id") avg_age_sdg = (data_sdg * mean_age).sum("age_group_id") / data_sdg.sum("age_group_id") avg_age_99 = (data_99 * mean_age).sum("age_group_id") / data_99.sum("age_group_id") ds = data_fhs.rename("population").to_dataset() ds_sdg = data_sdg.rename("population").to_dataset() ds_99 = data_99.rename("population").to_dataset() return avg_age_fhs, avg_age_sdg, avg_age_99, ds, ds_sdg, ds_99
def all_weights_main(reference_scenario, diff_over_mean, truncate, truncate_quantiles, replace_with_mean, use_past_uncertainty, transform, max_weight, weight_step_size, past_version, pv_version, years, gbd_round_id, test_mode, **kwargs): """Predictive validity for one weight of the range of weights at a time.""" LOGGER.debug("diff_over_mean:{}".format(diff_over_mean)) LOGGER.debug("truncate:{}".format(truncate)) LOGGER.debug("truncate_quantiles:{}".format(truncate_quantiles)) LOGGER.debug("replace_with_mean:{}".format(replace_with_mean)) LOGGER.debug("reference_scenario:{}".format(reference_scenario)) LOGGER.debug("use_past_uncertainty:{}".format(use_past_uncertainty)) LOGGER.debug("Reading in the past") past_path = FBDPath("".format()) past = open_xr(past_path / "education.nc").data past = past.transpose(*list(past.coords)) if not use_past_uncertainty: LOGGER.debug("Using past means for PV") past = past.mean("draw") else: LOGGER.debug("Using past draws for PV") if test_mode: past = past.sel( age_group_id=past["age_group_id"].values[:5], draw=past["draw"].values[:5], location_id=past["location_id"].values[:5]) else: pass # Use full data set. holdouts = past.sel(year_id=years.past_years) observed = past.sel(year_id=years.forecast_years) LOGGER.debug("Calculating RMSE for all weights") weights_to_test = np.arange(0, max_weight, weight_step_size) rmse_results = [] for weight_exp in weights_to_test: predicted = arc_forecast_education( holdouts, gbd_round_id, transform, weight_exp, years, reference_scenario, diff_over_mean, truncate, truncate_quantiles, replace_with_mean) rmse = calc_rmse(predicted.sel(scenario=REFERENCE_SCENARIO, drop=True), observed, years) rmse_da = xr.DataArray( [rmse.values], [[weight_exp]], dims=["weight"]) rmse_results.append(rmse_da) rmse_results = xr.concat(rmse_results, dim="weight") pv_path = FBDPath("".format()) pv_path.mkdir(parents=True, exist_ok=True) rmse_results.to_netcdf(str(pv_path / "education_arc_weight_rmse.nc")) LOGGER.info("RMSE is saved")
def _load_migration_rate(): pop_dir = FBDPath( f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/" f"future/population/" f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}") pop_path = pop_dir / "population_combined.nc" pop_da = open_xr(pop_path).data.sel(scenario=0, quantile="mean", location_id=COUNTRIES, sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID) mig_dir = FBDPath( f"/{settings.BASELINE_VERSIONS['migration'].gbd_round_id}/" f"future/migration/{settings.BASELINE_VERSIONS['migration'].version}") mig_path = mig_dir / "migration.nc" mig_da = open_xr(mig_path).data mig_da = mig_da.mean('draw').sum(['sex_id', 'age_group_id']) mig_rate_da = mig_da / pop_da return mig_rate_da
def one_weight_main(reference_scenario, transform, diff_over_mean, truncate, truncate_quantiles, replace_with_mean, use_past_uncertainty, weight_exp, past_version, pv_version, years, gbd_round_id, test_mode, **kwargs): """Predictive validity for one one weight of the range of weights""" LOGGER.debug("diff_over_mean:{}".format(diff_over_mean)) LOGGER.debug("truncate:{}".format(truncate)) LOGGER.debug("truncate_quantiles:{}".format(truncate_quantiles)) LOGGER.debug("replace_with_mean:{}".format(replace_with_mean)) LOGGER.debug("reference_scenario:{}".format(reference_scenario)) LOGGER.debug("use_past_uncertainty:{}".format(use_past_uncertainty)) LOGGER.debug("Reading in the past") past_path = FBDPath("".format()) # Path structure removed for security past = open_xr(past_path / "education.nc").data past = past.transpose(*list(past.coords)) if not use_past_uncertainty: LOGGER.debug("Using past means for PV") past = past.mean("draw") else: LOGGER.debug("Using past draws for PV") if test_mode: past = past.sel( age_group_id=past["age_group_id"].values[:5], draw=past["draw"].values[:5], location_id=past["location_id"].values[:5]) else: pass # Use full data set. holdouts = past.sel(year_id=years.past_years) observed = past.sel(year_id=years.forecast_years) LOGGER.debug("Calculating RMSE for {}".format(weight_exp)) predicted = arc_forecast_education( holdouts, gbd_round_id, transform, weight_exp, years, reference_scenario, diff_over_mean, truncate, truncate_quantiles, replace_with_mean) rmse = calc_rmse(predicted.sel(scenario=REFERENCE_SCENARIO, drop=True), observed, years) rmse_da = xr.DataArray( [rmse.values], [[weight_exp]], dims=["weight"]) pv_path = FBDPath("".format()) # Path structure removed for security separate_weights_path = pv_path / "each_weight" separate_weights_path.mkdir(parents=True, exist_ok=True) rmse_da.to_netcdf( str(separate_weights_path / "{}_rmse.nc".format(weight_exp))) LOGGER.info("Saving RMSE for {}".format(weight_exp))
def wpp_witt_pops(): wpp_pop_dir = FBDPath( f"/{settings.WPP_VERSIONS['population_aggs'].gbd_round_id}/" f"future/population/" f"{settings.WPP_VERSIONS['population_aggs'].version}") wpp_pop_path = wpp_pop_dir / "2019_fhs_agg_allage_bothsex_only.nc" wpp_pop_da = open_xr(wpp_pop_path).data.sel(location_id=1, year_id=2100) witt_pop_dir = FBDPath( f"/{settings.WITT_VERSIONS['population'].gbd_round_id}/" f"future/population/" f"{settings.WITT_VERSIONS['population'].version}") witt_pop_path = witt_pop_dir / "population_ssp2.nc" witt_pop_da = open_xr(witt_pop_path).data.sel( location_id=1, year_id=2100, sex_id=3, age_group_id=22) wpp_pop_val = _helper_round(wpp_pop_da.values.item(0), 1e9) witt_pop_val = _helper_round(witt_pop_da.values.item(0), 1e9) print(f"Forecasts from this study differ from the UNPD and the " f"Wittgenstein Centre, which project {wpp_pop_val} billion and " f"{witt_pop_val} billion people globally in 2100, respectively.\n")
def get_pop(forecast_pop_version, gbd_round_id, measure, draws, years, past_pop_version): """Pulls specified version of populations, subsets to fertile age groups and females only if meausre is live_births. Args: gbd_round_id (int): The GBD round fed into FBDPath to pull the correct version of pops forecast_pop_version (str): The version name of the populations file used in FBDPath. draws (int): The number of desired draws. This goes into resample, so we get pops with the correct number of draws. Returns: (xarray.DataArray): Fertile forecast population. The ``age_group_id`` dimension includes coordinates for each of the fertile age-groups. """ forecast_pop_path = FBDPath( f"{gbd_round_id}/future/population/{forecast_pop_version}") forecast_pop_file = forecast_pop_path / "population.nc" forecast_pop = open_xr(forecast_pop_file).data past_pop_path = FBDPath( f"{gbd_round_id}/past/population/{past_pop_version}") past_pop_file = past_pop_path / "population.nc" past_pop = open_xr(past_pop_file).data past_pop = past_pop.sel(sex_id=forecast_pop.sex_id.values) past_pop = expand_dimensions(past_pop, draw=range(draws)) forecast_pop = concat_past_future(past_pop, forecast_pop, draws, years) if measure == "live_births": forecast_pop = forecast_pop.sel( age_group_id=list(FERTILE_AGE_GROUP_IDS), sex_id=2).drop(["sex_id"]) else: forecast_pop = forecast_pop.sel(sex_id=[1, 2]) return forecast_pop
def taiwan_tfr(): tfr_dir = FBDPath( f"/{settings.BASELINE_VERSIONS['tfr_mean_ui'].gbd_round_id}/" f"future/tfr/" f"{settings.BASELINE_VERSIONS['tfr_mean_ui'].version}") tfr_path = tfr_dir / "tfr_combined.nc" tfr_da = open_xr(tfr_path).data.sel(scenario=0, location_id=8, quantile="mean", year_id=2017) taiwan_tfr = tfr_da.values.item(0) print(f"country like Taiwan, with a current TFR of {taiwan_tfr}\n")
def main(migration_version, gbd_round_id): # load age-sex pattern (loc, draw, age, sex) LOGGER.debug("Loading age-sex migration pattern") try: pattern_dir = FBDPath(f'/{gbd_round_id}/future/migration/' f'{PATTERN_VERSION}') pattern_path = pattern_dir / "combined_age_sex_pattern.nc" pattern = open_xr(pattern_path).data except FileNotFoundError: # Data doesn't yet exist pattern = create_age_sex_xarray() # load migration counts (loc, draw, year) LOGGER.debug("Loading migration data") mig_dir = FBDPath(f"/{gbd_round_id}/future/migration/{migration_version}/") mig_path = mig_dir / "mig_counts.nc" migration = open_xr(mig_path).data migration = migration.squeeze(drop=True) # end up with migration counts with age and sex (loc, draw, year, age, sex) split_data = migration * pattern # Save it! LOGGER.debug("Saving age-sex split migration data") split_path = mig_dir / "migration_split.nc" save_xr(split_data, split_path, metric="number", space="identity")
def tfr_below_replacement(): tfr_dir = FBDPath( f"/{settings.BASELINE_VERSIONS['tfr_mean_ui'].gbd_round_id}/" f"future/tfr/" f"{settings.BASELINE_VERSIONS['tfr_mean_ui'].version}") tfr_path = tfr_dir / "tfr_combined.nc" tfr_da = open_xr(tfr_path).data.sel(scenario=0, location_id=COUNTRIES, quantile="mean") # Replacement TFR is 2.1 below_2050 = (tfr_da.sel(year_id=2050) < 2.1).sum().values.item(0) below_2100 = (tfr_da.sel(year_id=2100) < 2.1).sum().values.item(0) print(f"By 2050, {below_2050} countries will have a TFR below replacement " f"and {below_2100} below replacement by 2100.\n")
def read_sev(rei, sev, vaccine_sev, gbd_round_id, years, draws): """ Reads in SEV for vaccine. Args: rei (str): risk, could also be vaccine intervention. gbd_round_id (int): gbd round id sev (str): upstrem sev version vaccine_sev (str): upstream vaccine sev version. gbd_round_id (int): gbd round id. years (YearRange): [past_start, forecast_start, forecast_end] years. draws (int): number of draws for output file. This means input files will be up/down-sampled to meet this criterion. Returns: (xr.DataArray): SEV in dataarray form. """ if rei in get_vaccine_reis(gbd_round_id): # vaccine treated as anti-risk infile_fbd_path =\ FBDPath(gbd_round_id=gbd_round_id, past_or_future="future", stage="vaccine", version=vaccine_sev) / (rei + "_new_ref.nc") out = 1.0 - open_xr(infile_fbd_path).data # anti-risk else: infile_fbd_path =\ FBDPath(gbd_round_id=gbd_round_id, past_or_future="future", stage="sev", version=sev) / (rei + ".nc") out = open_xr(infile_fbd_path).data out = conditionally_triggered_transformations(out, gbd_round_id, years) if len(out["draw"]) != draws: out = resample(out, draws) return out
def alt_scenario_pops(): pop_dir = FBDPath( f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/" f"future/population/" f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}") pop_path = pop_dir / "population_combined.nc" # Scenario 3 is SDG sdg_pop_da = open_xr(pop_path).data.sel( location_id=1, age_group_id=22, sex_id=3, scenario=3, quantile="mean") sdg_2100_pop = sdg_pop_da.sel(year_id=2100).values.item(0) # Scenario 2 is the 99 fastest_pop_da = open_xr(pop_path).data.sel( location_id=1, age_group_id=22, sex_id=3, scenario=2, quantile="mean") fastest_2100_pop = fastest_pop_da.sel(year_id=2100).values.item(0) sdg_2100_pop = _helper_round(sdg_2100_pop, 1e9) fastest_2100_pop = _helper_round(fastest_2100_pop, 1e9) print(f"Alternative scenarios suggest meeting the SDG targets for " f"education and contraceptive met need will result in a global " f"population of {sdg_2100_pop} billion in 2100, and " f"{fastest_2100_pop} billion assuming 99th percentile rates of " f"change in educational attainment and met need for contraception.\n")
def japan_econ(): gdp_dir = FBDPath( f"/{settings.BASELINE_VERSIONS['gdp'].gbd_round_id}/" f"future/gdp/" f"{settings.BASELINE_VERSIONS['gdp'].version}") gdp_path = gdp_dir / "gdp.nc" gdp_da = open_xr(gdp_path).data.sel(scenario=0, year_id=2100) gdp_df = gdp_da.to_dataframe() gdp_df = _add_location_name(gdp_df) gdp_df = gdp_df.sort_values(by='value', ascending=False).reset_index() japan_index = gdp_df[gdp_df['location_name']=="Japan"].index[0] japan_rank = japan_index + 1 print(f"Despite huge declines in population expected this century, Japan " f"remains the {japan_rank} largest economy in 2100.\n")
def tfr_2100(): tfr_dir = FBDPath( f"/{settings.BASELINE_VERSIONS['tfr_mean_ui'].gbd_round_id}/" f"future/tfr/" f"{settings.BASELINE_VERSIONS['tfr_mean_ui'].version}") tfr_path = tfr_dir / "tfr_combined.nc" tfr_da = open_xr(tfr_path).data.sel(scenario=0, location_id=1, year_id=2100) tfr_val = tfr_da.sel(quantile="mean").values.item(0) tfr_upper = tfr_da.sel(quantile="upper").values.item(0) tfr_lower = tfr_da.sel(quantile="lower").values.item(0) tfr_val = _helper_round(tfr_val, 1) tfr_upper = _helper_round(tfr_upper, 1) tfr_lower = _helper_round(tfr_lower, 1) print(f"The global total fertility rate (TFR) in 2100 is forecasted to be " f"{tfr_val} (95% UI {tfr_lower}–{tfr_upper}).\n")