def get_asfr(forecast_asfr_version, gbd_round_id, draws, years,
             past_asfr_version):
    """Pulls specified version of ASFR, subsets to females only if sex_id
        is a dimension.

    Args:
        forecast_asfr_version (str):
            The future version name of the ASFR file used in FBDPath.
        past_asfr_version (str):
            The past version name of the ASFR file used in FBDPath.
        gbd_round_id (int):
            The GBD round fed into FBDPath to pull the correct version of ASFR.
        draws (int):
            The number of desired draws.
    Returns:
        (xarray.DataArray):
            Age-specific fertility rate.
    """
    forecast_asfr_path = FBDPath(
        f"{gbd_round_id}/future/asfr/{forecast_asfr_version}")
    forecast_asfr = open_xr(forecast_asfr_path / "asfr.nc").data
    past_asfr_path = FBDPath(f"{gbd_round_id}/past/asfr/{past_asfr_version}")
    past_asfr_file = past_asfr_path / "asfr.nc"
    past_asfr = open_xr(past_asfr_file).data
    forecast_asfr = concat_past_future(past_asfr, forecast_asfr, draws, years)

    if 'sex_id' in forecast_asfr.dims:  # sex_id is dimension
        forecast_asfr = forecast_asfr.sel(sex_id=2, drop=True)
    elif 'sex_id' in forecast_asfr.coords:  #sex-id is point coordinate
        forecast_asfr = forecast_asfr.drop('sex_id')
    else:
        pass  # do nothing -- sex_id doesn't exist

    return forecast_asfr
Esempio n. 2
0
def wpp_fhs_diff():
    wpp_pop_dir = FBDPath(
        f"/{settings.WPP_VERSIONS['population_aggs'].gbd_round_id}/"
        f"future/population/"
        f"{settings.WPP_VERSIONS['population_aggs'].version}")
    wpp_pop_path = wpp_pop_dir / "2019_fhs_agg_allage_bothsex_only.nc"
    wpp_pop_da = open_xr(wpp_pop_path).data.sel(year_id=2100)

    fhs_pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}")
    fhs_pop_path = fhs_pop_dir / "population_combined.nc"
    fhs_pop_da = open_xr(fhs_pop_path).data.sel(
        sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, scenario=0,
        quantile="mean", year_id=2100)

    # Sub-Saharan 166
    sub_saharan_diff = _helper_wpp_fhs_diff(wpp_pop_da, fhs_pop_da, 166)
    # South Asia 158
    south_asia_diff = _helper_wpp_fhs_diff(wpp_pop_da, fhs_pop_da, 158)
    # Southeast Asia, East Asia, and Oceania 4
    se_e_oceania_diff = _helper_wpp_fhs_diff(wpp_pop_da, fhs_pop_da, 4)

    sub_saharan_diff = _helper_round(sub_saharan_diff, 1e6)
    south_asia_diff = _helper_round(south_asia_diff, 1e6)
    se_e_oceania_diff = _helper_round(se_e_oceania_diff, 1e6)

    print(f"The difference with UNPD in 2100 is due to {sub_saharan_diff} "
          f"million fewer people in the reference scenario in sub-Saharan "
          f"Africa, {south_asia_diff} million fewer in South Asia, and "
          f"{se_e_oceania_diff} million fewer in Southeast Asia, East Asia, "
          f"and Oceania, primarily due to the level of fertility achieved in "
          f"below replacement populations. \n")
def get_mortality(forecast_mortality_version, gbd_round_id, draws, years,
                  past_mortality_version):
    """Pulls specified version of mortality.

    Args:
        forecast_mortality_version (str):
            The version name of the future mortality file used in FBDPath.
        past_mortality_version (str):
            The version name of the past mortality file used in FBDPath.
        gbd_round_id (int):
            The GBD round fed into FBDPath to pull the correct version of mortality.
        draws (int):
            The number of desired draws.
    Returns:
        (xarray.DataArray):
            Mortality rate.
    """
    forecast_mot_path = FBDPath(
        f"{gbd_round_id}/future/death/{forecast_mortality_version}")
    forecast_mot = open_xr(forecast_mot_path / "_all.nc").data
    past_mot_path = FBDPath(
        f"{gbd_round_id}/past/death/{past_mortality_version}")
    past_mot_file = past_mot_path / "_all.nc"
    past_mot = open_xr(past_mot_file).data
    past_mot = past_mot.drop("acause")
    forecast_mot = concat_past_future(past_mot, forecast_mot, draws, years)
    return forecast_mot
Esempio n. 4
0
def main(past_version, forecast_version, gbd_round_id, years):
    avg_age_fhs, avg_age_sdg, avg_age_99, ds, ds_sdg, ds_99 = prep_pop_da(
        past_version, forecast_version, gbd_round_id, years)
    plot_file = FBDPath(
        f"/{gbd_round_id}/future/population/{forecast_version}",
        root_dir="plot")
    plot_file.mkdir(exist_ok=True)
    pdf_file = plot_file / "figure_7_population_pyramids.pdf"

    location_metadata = db.get_locations_by_max_level(3)

    location_hierarchy = location_metadata.set_index(
        "location_id").to_xarray()["parent_id"]

    with PdfPages(pdf_file) as pdf:
        for l in location_hierarchy["location_id"]:
            fig = pop_plot(avg_age_fhs,
                           avg_age_sdg,
                           avg_age_99,
                           ds,
                           ds_sdg,
                           ds_99,
                           years,
                           location_id=l)
            pdf.savefig(fig)
Esempio n. 5
0
def pop_declines():
    forecast_pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}")
    forecast_pop_path = forecast_pop_dir / "population_combined.nc"
    forecast_pop_da = open_xr(forecast_pop_path).data.sel(
        sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, scenario=0,
        quantile="mean", location_id=COUNTRIES)

    past_pop_dir = FBDPath(
        f"/{settings.PAST_VERSIONS['population'].gbd_round_id}/"
        f"past/population/"
        f"{settings.PAST_VERSIONS['population'].version}")
    past_pop_path = past_pop_dir / "population.nc"
    past_pop_da = open_xr(past_pop_path).data.sel(
        sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, location_id=COUNTRIES)

    pct_decline_da = 1-(forecast_pop_da.sel(year_id=2100)/past_pop_da.sel(
        year_id=2017))
    decline_over_50_val = (pct_decline_da > .5).sum().values.item(0)
    decline_over_50_locs = list(pct_decline_da.where(
        pct_decline_da > .5, drop=True).location_id.values)
    decline_over_50_locs = _location_id_to_name(decline_over_50_locs)

    # compute % decline at draw level for uncertainty
    forecast_draw_pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population'].version}")
    forecast_draw_pop_path = forecast_draw_pop_dir / "population_agg.nc"
    forecast_draw_pop_da = open_xr(forecast_draw_pop_path).data.sel(
        sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, scenario=0,
        location_id=COUNTRIES)
    past_draw_pop_dir = FBDPath(
        f"/{settings.PAST_VERSIONS['population_draw2017'].gbd_round_id}/"
        f"past/population/"
        f"{settings.PAST_VERSIONS['population_draw2017'].version}")
    past_draw_pop_path = past_draw_pop_dir / "population.nc"
    past_draw_pop_da = open_xr(past_draw_pop_path).data.sel(
        sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, location_id=COUNTRIES)

    da_china_2100 = forecast_draw_pop_da.sel(year_id=2100,location_id=6)
    da_china_2017 = past_draw_pop_da.sel(year_id=2017,location_id=6)
    pct_draw_decline_da = 1-(da_china_2100/da_china_2017)

    decline_china = pct_draw_decline_da*100
    decline_china_mean = _helper_round(
        decline_china.mean("draw").values.item(0), 1)
    decline_china_lower = _helper_round(
        decline_china.quantile(dim="draw", q=[0.025]).values.item(0), 1)
    decline_china_upper = _helper_round(
        decline_china.quantile(dim="draw", q=[0.975]).values.item(0), 1)

    print(f"{decline_over_50_val} countries including {decline_over_50_locs} "
          f"in the reference scenario will have declines of greater than "
          f"50% from 2017 by 2100; China will decline by {decline_china_mean} "
          f"({decline_china_lower} to {decline_china_upper}).%\n")
def all_weights_main(reference_scenario, diff_over_mean, truncate,
                     truncate_quantiles, replace_with_mean,
                     use_past_uncertainty, transform, max_weight,
                     weight_step_size, past_version, pv_version, years,
                     gbd_round_id, test_mode, **kwargs):
    """Predictive validity for one weight of the range of weights at a time."""
    LOGGER.debug("diff_over_mean:{}".format(diff_over_mean))
    LOGGER.debug("truncate:{}".format(truncate))
    LOGGER.debug("truncate_quantiles:{}".format(truncate_quantiles))
    LOGGER.debug("replace_with_mean:{}".format(replace_with_mean))
    LOGGER.debug("reference_scenario:{}".format(reference_scenario))
    LOGGER.debug("use_past_uncertainty:{}".format(use_past_uncertainty))

    LOGGER.debug("Reading in the past")
    past_path = FBDPath("".format())
    past = open_xr(past_path / "education.nc").data
    past = past.transpose(*list(past.coords))

    if not use_past_uncertainty:
        LOGGER.debug("Using past means for PV")
        past = past.mean("draw")
    else:
        LOGGER.debug("Using past draws for PV")

    if test_mode:
        past = past.sel(
            age_group_id=past["age_group_id"].values[:5],
            draw=past["draw"].values[:5],
            location_id=past["location_id"].values[:5])
    else:
        pass  # Use full data set.

    holdouts = past.sel(year_id=years.past_years)
    observed = past.sel(year_id=years.forecast_years)

    LOGGER.debug("Calculating RMSE for all weights")
    weights_to_test = np.arange(0, max_weight, weight_step_size)
    rmse_results = []
    for weight_exp in weights_to_test:
        predicted = arc_forecast_education(
            holdouts, gbd_round_id, transform, weight_exp, years,
            reference_scenario,
            diff_over_mean, truncate, truncate_quantiles, replace_with_mean)
        rmse = calc_rmse(predicted.sel(scenario=REFERENCE_SCENARIO, drop=True),
                         observed,
                         years)

        rmse_da = xr.DataArray(
            [rmse.values], [[weight_exp]], dims=["weight"])
        rmse_results.append(rmse_da)
    rmse_results = xr.concat(rmse_results, dim="weight")

    pv_path = FBDPath("".format())
    pv_path.mkdir(parents=True, exist_ok=True)
    rmse_results.to_netcdf(str(pv_path / "education_arc_weight_rmse.nc"))
    LOGGER.info("RMSE is saved")
Esempio n. 7
0
def return_pop_differences(fbd_pop_version, unpd_pop_version):
    GBD_LOC_DF = get_location_metadata(gbd_round_id=5, location_set_id=39)
    gbd_locs_df = GBD_LOC_DF.query("level < 4")

    fbdpoppath = FBDPath(f"/5/future/population/{fbd_pop_version}/"
                         "population_combined.nc")

    unpdpoppath = FBDPath(f"/wpp/future/population/{unpd_pop_version}/"
                          "population_all_age.nc")

    unpd_pop_xr = open_xr(unpdpoppath).data

    unpd_pop_100 = unpd_pop_xr.sel(year_id=2100, sex_id=3).drop([
        "year_id", "sex_id", "age_group_id"
    ]).squeeze().rename("unpd_pop").to_dataframe().reset_index()

    ihme_pop_100 = open_xr(fbdpoppath).data.sel(
        age_group_id=22, sex_id=3, scenario=0, year_id=2100,
        quantile="mean").drop([
            "age_group_id", "sex_id", "scenario", "year_id", "quantile"
        ]).squeeze().rename("ihme_pop").to_dataframe().reset_index()

    ihme_pop_100["ihme_pop_int"] = ihme_pop_100["ihme_pop"].astype(int)
    unpd_pop_100["unpd_pop_int"] = unpd_pop_100["unpd_pop"].astype(int)

    pop_2100_df = ihme_pop_100.merge(unpd_pop_100, how="left")

    final_pop_df = pop_2100_df.merge(
        gbd_locs_df[["location_id", "lancet_label"]])

    location_ids = [166, 158, 4]
    locs_of_interest = final_pop_df[final_pop_df.location_id.isin(
        location_ids)]
    new_pop_diff = {}
    for loc_id in location_ids:
        ihme_num = (locs_of_interest[locs_of_interest["location_id"] == loc_id]
                    ["ihme_pop"].values[0])
        unpd_num = (locs_of_interest[locs_of_interest["location_id"] == loc_id]
                    ["unpd_pop"].values[0])
        location_name = locs_of_interest[locs_of_interest["location_id"] ==
                                         loc_id].lancet_label.values[0]
        difference = ((unpd_num - ihme_num) / 1e6).round(2)
        new_pop_diff[loc_id] = difference

    print(f"Figure 9 shows a global comparison of the TFR, life expectancy at"
          f" birth, and population between our reference scenario, the UNPD "
          f"median variant, and the Wittgenstein SSP2 scenario. Differences"
          f" between 2100 population forecasts from the UNPD and our reference"
          f" scenario are largely explained by differences in population"
          f" levels in sub-Saharan Africa; south Asia and ; southeast Asia,"
          f" east Asia, and Oceania ({new_pop_diff[location_ids[0]]} million, "
          f"{new_pop_diff[location_ids[1]]}  million, and"
          f" {new_pop_diff[location_ids[2]]} million fewer people in our"
          f" reference scenario, respectively)")
def one_weight_main(reference_scenario, transform, diff_over_mean, truncate,
                    truncate_quantiles, replace_with_mean,
                    use_past_uncertainty, weight_exp, past_version, pv_version,
                    years, gbd_round_id, test_mode, **kwargs):
    """Predictive validity for one one weight of the range of weights"""

    LOGGER.debug("diff_over_mean:{}".format(diff_over_mean))
    LOGGER.debug("truncate:{}".format(truncate))
    LOGGER.debug("truncate_quantiles:{}".format(truncate_quantiles))
    LOGGER.debug("replace_with_mean:{}".format(replace_with_mean))
    LOGGER.debug("reference_scenario:{}".format(reference_scenario))
    LOGGER.debug("use_past_uncertainty:{}".format(use_past_uncertainty))

    LOGGER.debug("Reading in the past")
    past_path = FBDPath("".format())  # Path structure removed for security
    past = open_xr(past_path / "education.nc").data
    past = past.transpose(*list(past.coords))

    if not use_past_uncertainty:
        LOGGER.debug("Using past means for PV")
        past = past.mean("draw")
    else:
        LOGGER.debug("Using past draws for PV")

    if test_mode:
        past = past.sel(
            age_group_id=past["age_group_id"].values[:5],
            draw=past["draw"].values[:5],
            location_id=past["location_id"].values[:5])
    else:
        pass  # Use full data set.

    holdouts = past.sel(year_id=years.past_years)
    observed = past.sel(year_id=years.forecast_years)

    LOGGER.debug("Calculating RMSE for {}".format(weight_exp))
    predicted = arc_forecast_education(
        holdouts, gbd_round_id, transform, weight_exp, years,
        reference_scenario,
        diff_over_mean, truncate, truncate_quantiles, replace_with_mean)
    rmse = calc_rmse(predicted.sel(scenario=REFERENCE_SCENARIO, drop=True),
                     observed,
                     years)

    rmse_da = xr.DataArray(
        [rmse.values], [[weight_exp]], dims=["weight"])

    pv_path = FBDPath("".format())  # Path structure removed for security
    separate_weights_path = pv_path / "each_weight"
    separate_weights_path.mkdir(parents=True, exist_ok=True)
    rmse_da.to_netcdf(
        str(separate_weights_path / "{}_rmse.nc".format(weight_exp)))
    LOGGER.info("Saving RMSE for {}".format(weight_exp))
Esempio n. 9
0
def pull_pop(past_pop_version, future_pop_version,
             scenarios = 0, age_groups=ALL_AGE_ID):
    past_path = FBDPath("{gbd_round_id}/past/population/{version}".format(
        gbd_round_id=GBD_ROUND, version=past_pop_version))
    future_path = FBDPath("{gbd_round_id}/future/population/{version}".format(
        gbd_round_id=GBD_ROUND, version=future_pop_version))

    future_pop = open_xr(future_path / "population_combined.nc").data
    past_pop = open_xr(past_path / "population.nc").data.sel(
        location_id=future_pop.location_id)

    pop = past_pop.combine_first(future_pop).sel(
        age_group_id=age_groups, sex_id=BOTH_SEX_ID, scenario=scenarios)

    return pop
Esempio n. 10
0
def save_past_and_future(da, gbd_round_id, stage, version, file_name,
                         year_args, sub_dir=None):
    """
    Given dataarray, and relevant FBDPath metadata, saves file as .nc in both
    "past" and "future".

    Args:
        da (xr.DataArray): dataarray to be split and saved separately.
        gbd_round_id (int): gbd round id.
        stage (str): FBDPath stage.
        version (str): FBDPath version.
        file_name (str): name of file, without extension.
        year_args (iterable): [past_start, forecast_start, forecast_end] years.
        sub_dir (str, optional): sub-directory after FBDPath
    """
    for i, past_or_future in enumerate(["past", "future"]):
        out_fbd_path = FBDPath(gbd_round_id=gbd_round_id,
                               past_or_future=past_or_future,
                               stage=stage,
                               version=version)
        if sub_dir:
            out_root = str(out_fbd_path / sub_dir)
        else:
            out_root = str(out_fbd_path)

        if not os.path.exists(os.path.dirname(out_root)):
            os.makedirs(os.path.dirname(out_root))

        outpath = out_root + "/" + (file_name + ".nc")
        out = da.loc[{YEAR_DIM: range(year_args[i], year_args[i+1])}]
        out.to_netcdf(outpath)
def load_data(version):
    gdp_path = FBDPath(f"{GBD_ROUND_ID}/future/gdp/{version}/gdp.nc")
    gdp_da = open_xr(gdp_path).data.sel(scenario=0)
    gdp_da.name = "gdp"
    gdp_df = gdp_da.to_dataframe().reset_index()

    return gdp_df
Esempio n. 12
0
def read_lifetable(gbd_round_id, lifetable_version, draws):
    """

    Args:
        gbd_round_id (int):
        lifetable_version (str): Of the form "past/versionname"
           or "future/versionname"
        draws (int): desired number of draws

    Returns:
        The life table.
    """
    if "/" in lifetable_version:
        past_or_future, version = lifetable_version.split("/")
    else:
        past_or_future = "future"
        version = lifetable_version

    # lifetable from the future includes last year of the past.
    lifetable_file = FBDPath("/{}/{}/life_expectancy/{}".format(
        gbd_round_id, past_or_future, version)) / "lifetable_ds.nc"
    try:
        LOGGER.info("Reading {}".format(lifetable_file))
        lifetable = xr.open_dataset(str(lifetable_file))
    except OSError as ose:
        LOGGER.error("Cannot open lifetable {}: {}".format(
            lifetable_file, ose))
        exit()

    if "draw" in lifetable.dims:
        lifetable = resample(lifetable.sortby("draw"), draws)

    return _drop_point_coordinates(lifetable)
Esempio n. 13
0
def sing_tai_tfr():
    tfr_dir = FBDPath(f"/{settings.PAST_VERSIONS['tfr'].gbd_round_id}/"
                      f"past/tfr/"
                      f"{settings.PAST_VERSIONS['tfr'].version}")
    tfr_path = tfr_dir / "tfr.nc"
    tfr_da = open_xr(tfr_path).data.sel(year_id=2017)

    singapore_tfr = tfr_da.sel(location_id=69).mean('draw').values.item(0)
    taiwan_tfr = tfr_da.sel(location_id=8).mean('draw').values.item(0)

    singapore_upper = tfr_da.sel(location_id=69).quantile(
        .975, dim='draw').values.item(0)
    singapore_lower = tfr_da.sel(location_id=69).quantile(
        .025, dim='draw').values.item(0)

    taiwan_upper = tfr_da.sel(location_id=8).quantile(
        .975, dim='draw').values.item(0)
    taiwan_lower = tfr_da.sel(location_id=8).quantile(
        .025, dim='draw').values.item(0)

    singapore_tfr = _helper_round(singapore_tfr, 1)
    taiwan_tfr = _helper_round(taiwan_tfr, 1)
    singapore_upper = _helper_round(singapore_upper, 1)
    singapore_lower = _helper_round(singapore_lower, 1)
    taiwan_upper = _helper_round(taiwan_upper, 1)
    taiwan_lower = _helper_round(taiwan_lower, 1)

    print(f"Discussion: In contrast, positive incentives have had little "
          f"effect in Singapore and Taiwan, where 2017 TFR levels were "
          f"{singapore_tfr} ({singapore_lower}–{singapore_upper}) and "
          f"{taiwan_tfr} ({taiwan_lower}–{taiwan_upper})\n")
Esempio n. 14
0
def load_forecast_pop(gbd_round_id, version, years, draws):
    """
    Load forecast population data. Aggregates if necessary.

    Args:
        gbd_round_id (int):
            The gbd round ID that the past population is from
        version (str):
            The version of forecast population to read from
        years (YearRange):
            The Forecasting format years to use.

    Returns:
        xarray.DataArray: The past population xarray dataarray
    """
    forecast_pop_dir = FBDPath(f"/{gbd_round_id}/future/population/{version}")
    try:
        forecast_pop_path = forecast_pop_dir / "population_agg.nc"
        forecast_pop_da = open_xr(forecast_pop_path).data
    except:  # Need to make agg version
        forecast_pop_path = forecast_pop_dir / "population.nc"
        forecast_pop_da = open_xr(forecast_pop_path).data
        forecast_pop_da = Aggregator.aggregate_everything(
            forecast_pop_da, gbd_round_id).pop
        forecast_pop_out_path = forecast_pop_dir / "population_agg.nc"
        save_xr(forecast_pop_da,
                forecast_pop_out_path,
                metric="number",
                space="identity")

    # slice to correct years and number of draws
    forecast_pop_da = forecast_pop_da.sel(year_id=years.forecast_years)
    forecast_pop_da = resample(forecast_pop_da, draws)

    return forecast_pop_da
Esempio n. 15
0
def read_paf(acause, risk, gbd_round_id, past_or_future, version):
    """
    Read past or forecast PAF.

    Args:
        acause (str): cause name.
        risk (str): risk name.
        gbd_round_id (int): gbd round id.
        past_or_future (str): "past" or "forecast".
        version (str): str indiciating folder where data comes from.

    Returns
        paf (xr.DataArray): dataframe of PAF.

    Raises:
        ValueError: if upstream flat file does not exist.
    """
    infile_fbd_path = (FBDPath(gbd_round_id=gbd_round_id,
                               past_or_future=past_or_future,
                               stage="paf",
                               version=version) / "risk_acause_specific" /
                       "{}_{}.nc".format(acause, risk))
    paf = open_xr(infile_fbd_path).data

    return paf
Esempio n. 16
0
def pop_peak():
    pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}")
    pop_path = pop_dir / "population_combined.nc"
    pop_da = open_xr(pop_path).data.sel(
        location_id=1, age_group_id=ALL_AGE_ID, sex_id=BOTH_SEX_ID, scenario=0)

    # Max values
    max_da = pop_da.sel(quantile="mean")
    max_val = max_da.max().values.item(0)
    max_year = max_da.where(max_da==max_val, drop=True).year_id.values[0]
    max_upper = pop_da.sel(quantile="upper", year_id=max_year).values.item(0)
    max_lower = pop_da.sel(quantile="lower", year_id=max_year).values.item(0)

    # Values in 2100
    end_val = pop_da.sel(quantile="mean", year_id=2100).values.item(0)
    end_upper = pop_da.sel(quantile="upper", year_id=2100).values.item(0)
    end_lower = pop_da.sel(quantile="lower", year_id=2100).values.item(0)

    max_val = _helper_round(max_val, 1e9)
    max_upper = _helper_round(max_upper, 1e9)
    max_lower = _helper_round(max_lower, 1e9)
    end_val = _helper_round(end_val, 1e9)
    end_upper = _helper_round(end_upper, 1e9)
    end_lower = _helper_round(end_lower, 1e9)


    print(f"In the reference forecast, the global population is projected to "
          f"peak in {max_year} at {max_val} "
          f"({max_lower}-{max_upper}) billion people, and decline to "
          f"{end_val} ({end_lower}-{end_upper}) in 2100."
          f"\n")
Esempio n. 17
0
def working_age():
    pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}")
    pop_path = pop_dir / "population_combined.nc"
    # working age is 20-64 age id 163, but have to combine 9-17 to get 163
    working_age_ids = list(range(9, 18))
    pop_da = open_xr(pop_path).data.sel(
        sex_id=BOTH_SEX_ID, scenario=0, quantile="mean",
        age_group_id=working_age_ids, year_id=2100, location_id=COUNTRIES)
   
    pop_da = pop_da.sum('age_group_id')
    
    pop_df = pop_da.to_dataframe()
    pop_df = _add_location_name(pop_df)
    pop_df = pop_df.sort_values(by='population', ascending=False).reset_index()
    
    first = pop_df.location_name[0]
    second = pop_df.location_name[1]
    third = pop_df.location_name[2]
    fourth = pop_df.location_name[3]

    print(f"By 2100, {first} will still have the largest working age "
          f"population followed by {second} and {third}. Coming in fourth in "
          f"the world will be {fourth}.\n")
Esempio n. 18
0
def largest_gdp():
    gdp_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['gdp'].gbd_round_id}/"
        f"future/gdp/"
        f"{settings.BASELINE_VERSIONS['gdp'].version}")
    gdp_path = gdp_dir / "gdp.nc"
    gdp_da = open_xr(gdp_path).data.sel(scenario=0)

    max_da = gdp_da.where(gdp_da==gdp_da.max('location_id'), drop=True)
    # Find years where china is top
    china_years = max_da.sel(
        location_id=6).dropna(dim='year_id').year_id.values
    # Find years where USA is top
    usa_years = max_da.sel(
        location_id=102).dropna(dim='year_id').year_id.values
    # check no other location is ever top
    missing_years = np.setdiff1d(
        gdp_da.coords["year_id"].values,
        np.concatenate([china_years, usa_years]))
    assert missing_years.size == 0

    # find first year in future where china takes lead
    china_year = china_years.min()
    # find first year when US regains lead
    usa_year = usa_years[usa_years > china_year].min()

    print(f"China is expected to become the largest economy by {china_year} "
          f"but in the reference scenario the USA would once again become in "
          f"the largest economy in {usa_year}.\n")
Esempio n. 19
0
def age_pops():
    pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}")
    pop_path = pop_dir / "population_combined.nc"
    pop_da = open_xr(pop_path).data.sel(
        sex_id=BOTH_SEX_ID, scenario=0, year_id=2100, quantile="mean",
        location_id=1)

    # 65 to 69, 70 to 74, 75 to 79, 80 plus
    above_65_ages = [18, 19, 20, 21]
    above_65_da = pop_da.sel(age_group_id=above_65_ages)
    above_65_val = above_65_da.sum('age_group_id').values.item(0)

    # <20 years
    under_20_val = pop_da.sel(age_group_id=158).values.item(0)

    above_65_val = _helper_round(above_65_val, 1e9)
    under_20_val = _helper_round(under_20_val, 1e9)

    print(f"Findings also suggest a shifting age structure in many parts of "
          f"the world, with {above_65_val} billion individuals above the age "
          f"of 65, and {under_20_val} billion individuals below the age of 20, "
          f"globally in 2100.\n")
def create_age_sex_xarray():
    LOGGER.debug("Creating xarray of age-sex patterns for migration")
    # load patterns
    qatar = pd.read_csv(QATAR_PATTERN)
    eurostat = pd.read_csv(EUROSTAT_PATTERN)
    # convert to xarrays
    qatar = df_to_xr(qatar, dims=PATTERN_ID_VARS)
    eurostat = df_to_xr(eurostat, dims=PATTERN_ID_VARS)
    # create superarray to hold all locs
    all_locs_xr_list = []
    # Put dataframes for each location into a list
    for loc in WPP_LOCATION_IDS:
        if loc in QATAR_LOCS:
            data = qatar
        else:
            data = eurostat
        data = expand_dimensions(data, location_id=[loc])
        all_locs_xr_list.append(data)
    # Concat all locations together
    result = xr.concat(all_locs_xr_list, dim='location_id')
    # Save all locs pattern
    LOGGER.debug("Saving age-sex pattern xarray")
    pattern_dir = FBDPath(f'/{gbd_round_id}/future/migration/'
                          f'{PATTERN_VERSION}')
    pattern_path = pattern_dir / f"combined_age_sex_pattern.nc"
    save_xr(pattern, pattern_path, metric="percent", space="identity")
    LOGGER.debug("Saved age-sex pattern xarray")
    return result
Esempio n. 21
0
def save_paf(paf,
             gbd_round_id,
             past_or_future,
             version,
             acause,
             cluster_risk=None):
    """
    Save mediated PAF at cause level.

    Args:
        paf (pandas.DataFrame): dataframe of PAF.
        gbd_round_id (int): gbd round id.
        past_or_future (str): 'past' or 'future'.
        version (str): version, dated.
        acause (str): analytical cause.
        cluster_risk (str, optional): if none, it will be just risk.
    """
    if cluster_risk is not None:
        out_fbd_path = (FBDPath(gbd_round_id=gbd_round_id,
                                past_or_future=past_or_future,
                                stage="paf",
                                version=version) / "risk_acause_specific" /
                        "{}_{}.nc".format(acause, cluster_risk))

        LOGGER.info("Saving cause-agg risk paf: {}".format(out_fbd_path))
        save_xr(paf,
                out_fbd_path,
                metric="percent",
                space="identity",
                acause=acause,
                risk=cluster_risk,
                version=version,
                gbd_round_id=gbd_round_id)
    else:
        out_fbd_path = (FBDPath(gbd_round_id=gbd_round_id,
                                past_or_future=past_or_future,
                                stage="paf",
                                version=version) / "{}.nc".format(acause))

        LOGGER.info("Saving cause-only paf: {}".format(out_fbd_path))
        save_xr(paf,
                out_fbd_path,
                metric="percent",
                space="identity",
                acause=acause,
                version=version,
                gbd_round_id=gbd_round_id)
Esempio n. 22
0
def load_pop(gbd_round_id, past_version, forecast_version):

    forecast_file = FBDPath(
        f"/{gbd_round_id}/future/population/"
        f"{forecast_version}/population_combined.nc")

    past_file = FBDPath(
        f"/{gbd_round_id}/past/population/{past_version}/population.nc")

    future_pop = open_xr(forecast_file).data
    past_pop = expand_dimensions(open_xr(past_file).data,
                                 scenario=future_pop.scenario,
                                 quantile = future_pop["quantile"])
    pop = xr.concat([past_pop, future_pop],
                    "year_id")

    return pop
Esempio n. 23
0
def prep_pop_da(past_version, forecast_version, gbd_round_id, years):
    forecast_pop_file = FBDPath(
        f"/{gbd_round_id}/future/population/{forecast_version}/"
        f"population_combined.nc")
    forecast_fhs = open_xr(forecast_pop_file).data.sel(quantile='mean',
                                                       drop=True)

    past_fhs_file = FBDPath(
        f"/{gbd_round_id}/past/population/{past_version}/population.nc")
    past_fhs = expand_dimensions(open_xr(past_fhs_file).data.sel(
        year_id=years.past_years,
        sex_id=forecast_fhs["sex_id"],
        age_group_id=forecast_fhs["age_group_id"],
        location_id=forecast_fhs["location_id"]),
                                 scenario=forecast_fhs.scenario.values)

    fhs_all_scenarios = xr.concat([past_fhs, forecast_fhs], dim="year_id")

    fhs = fhs_all_scenarios.sel(scenario=[-1, 0, 1])
    alt_sdg = fhs_all_scenarios.sel(scenario=[3])
    alt_99 = fhs_all_scenarios.sel(scenario=[2])

    ages = db.get_ages().query("age_group_id in @ALL_AGE_GROUP_IDS")
    days = ages[["age_group_id", "age_group_days_start", "age_group_days_end"]]
    days["mean_age"] = (days["age_group_days_end"] -
                        (days["age_group_days_end"] -
                         days["age_group_days_start"]) / 2) / 365.25
    mean_age = days.set_index("age_group_id")["mean_age"].to_xarray()

    data_fhs = fhs.sel(age_group_id=mean_age["age_group_id"], sex_id=SEX_IDS)
    data_sdg = alt_sdg.sel(age_group_id=mean_age["age_group_id"],
                           sex_id=SEX_IDS)
    data_99 = alt_99.sel(age_group_id=mean_age["age_group_id"], sex_id=SEX_IDS)

    avg_age_fhs = (data_fhs *
                   mean_age).sum("age_group_id") / data_fhs.sum("age_group_id")
    avg_age_sdg = (data_sdg *
                   mean_age).sum("age_group_id") / data_sdg.sum("age_group_id")
    avg_age_99 = (data_99 *
                  mean_age).sum("age_group_id") / data_99.sum("age_group_id")

    ds = data_fhs.rename("population").to_dataset()
    ds_sdg = data_sdg.rename("population").to_dataset()
    ds_99 = data_99.rename("population").to_dataset()

    return avg_age_fhs, avg_age_sdg, avg_age_99, ds, ds_sdg, ds_99
Esempio n. 24
0
def symlink_directly_modeled_paf_file(
        acause, rei, calculated_paf_version, directly_modeled_paf, gbd_round_id
        ):
    """Creates symlink to files with directly-modeled PAF data.

    Creates symlinks of past and future directly-modeled PAF data files to the
    directory with PAFs calculated from SEVs and RRmaxes.

    Args:
        acause (str):
            Indicates the cause of the cause-risk pair
        rei (str):
            Indicates the risk of the cause-risk pair
        calculated_paf_version (str):
            Output version of this script where directly-modeled PAFs are
            symlinked, and calculated PAFs are saved.
        directly_modeled_paf (str):
            The version of PAFs with the directly-modeled PAF to be symlinked
            resides.
        gbd_round_id (int):
            The numeric ID representing the GBD round.

    Raises:
        RuntimeError:
            If symlink sub-process fails.
    """
    for p_or_f in ("past", "future"):
        calculated_paf_dir = FBDPath(
            gbd_round_id=gbd_round_id,
            past_or_future=p_or_f,
            stage="paf",
            version=calculated_paf_version) / "risk_acause_specific"
        calculated_paf_dir.mkdir(parents=True, exist_ok=True)

        directly_modeled_paf_file = (
                FBDPath(
                    gbd_round_id=gbd_round_id,
                    past_or_future=p_or_f,
                    stage="paf",
                    version=directly_modeled_paf)
                / "risk_acause_specific" / f"{acause}_{rei}.nc")

        symlink_file_to_directory(
            directly_modeled_paf_file, calculated_paf_dir)
def make_run_log_file(version):
    """
    Right now just copies the settings file to the output directory so
    people can see which versions and whatnot were used.

    Args:
        version (str): version name where the current mortality run is to be
        saved
    """
    # make the run directory so this log can be saved
    run_dir = FBDPath("/{gri}/future/death/{v}/".format(
        gri=settings.GBD_ROUND_ID, v=version))
    run_dir.mkdir(exist_ok=True)

    # get the source and destination paths
    source_path = os.path.join(os.pardir, "fbd_cod/settings.py")
    dest_path = os.path.join(str(run_dir), "versions.py")

    copyfile(source_path, dest_path)
def merge_main(output_version, gbd_round_id):
    """Combine all of the netcdf files generated by one_draw_main and save
    the combined file as `education.nc` in the same directory.

    Args:
        output_version (str):
            Cohort corrected version.
        gbd_round_id (int):
            The gbd round id.
    """
    input_dir = FBDPath("".format())  # Path removed for security reasons
    file_names = list(input_dir.glob('corrected_edu_draw*.nc'))
    edu_ds = xr.open_mfdataset(file_names, concat_dim="draw")
    edu_da = list(edu_ds.data_vars.values())[0]

    LOGGER.info("Saving corrected education.")
    edu_da.name = "value"
    edu_path = input_dir / "education.nc"
    edu_da.to_netcdf(str(edu_path))
Esempio n. 27
0
def output_to_xarray(gbd_round, out, version_out):
    asfr_path = FBDPath("/{gri}/future/asfr/{version}".format(
        gri=gbd_round, version=version_out))
    dims = ['location_id', 'year_id', 'scenario', 'age_group_id', 'sex_id', 'draw']
    out_xr = df_to_xr(out, dims = dims)
    save_xr(out_xr,
        fbdpath = asfr_path / "asfr.nc",
        metric="rate",
        space="identity",
        version="version",
        model="asfr_adjusted_to_tfr_plus_point1_if_below2")
def _load_migration_rate():
    pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}")
    pop_path = pop_dir / "population_combined.nc"
    pop_da = open_xr(pop_path).data.sel(scenario=0,
                                        quantile="mean",
                                        location_id=COUNTRIES,
                                        sex_id=BOTH_SEX_ID,
                                        age_group_id=ALL_AGE_ID)

    mig_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['migration'].gbd_round_id}/"
        f"future/migration/{settings.BASELINE_VERSIONS['migration'].version}")
    mig_path = mig_dir / "migration.nc"
    mig_da = open_xr(mig_path).data
    mig_da = mig_da.mean('draw').sum(['sex_id', 'age_group_id'])

    mig_rate_da = mig_da / pop_da
    return mig_rate_da
Esempio n. 29
0
def wpp_witt_pops():
    wpp_pop_dir = FBDPath(
        f"/{settings.WPP_VERSIONS['population_aggs'].gbd_round_id}/"
        f"future/population/"
        f"{settings.WPP_VERSIONS['population_aggs'].version}")
    wpp_pop_path = wpp_pop_dir / "2019_fhs_agg_allage_bothsex_only.nc"
    wpp_pop_da = open_xr(wpp_pop_path).data.sel(location_id=1, year_id=2100)

    witt_pop_dir = FBDPath(
        f"/{settings.WITT_VERSIONS['population'].gbd_round_id}/"
        f"future/population/"
        f"{settings.WITT_VERSIONS['population'].version}")
    witt_pop_path = witt_pop_dir / "population_ssp2.nc"
    witt_pop_da = open_xr(witt_pop_path).data.sel(
        location_id=1, year_id=2100, sex_id=3, age_group_id=22)

    wpp_pop_val = _helper_round(wpp_pop_da.values.item(0), 1e9)
    witt_pop_val = _helper_round(witt_pop_da.values.item(0), 1e9)

    print(f"Forecasts from this study differ from the UNPD and the "
          f"Wittgenstein Centre, which project {wpp_pop_val} billion and "
          f"{witt_pop_val} billion people globally in 2100, respectively.\n")
def get_pop(forecast_pop_version, gbd_round_id, measure, draws, years,
            past_pop_version):
    """Pulls specified version of populations, subsets to fertile age groups
    and females only if meausre is live_births.

    Args:
        gbd_round_id (int):
            The GBD round fed into FBDPath to pull the correct version of pops
        forecast_pop_version (str):
            The version name of the populations file used in FBDPath.
        draws (int):
            The number of desired draws. This goes into resample, so we get
            pops with the correct number of draws.
    Returns:
        (xarray.DataArray):
            Fertile forecast population. The ``age_group_id`` dimension
            includes coordinates for each of the fertile age-groups.
    """
    forecast_pop_path = FBDPath(
        f"{gbd_round_id}/future/population/{forecast_pop_version}")
    forecast_pop_file = forecast_pop_path / "population.nc"
    forecast_pop = open_xr(forecast_pop_file).data
    past_pop_path = FBDPath(
        f"{gbd_round_id}/past/population/{past_pop_version}")
    past_pop_file = past_pop_path / "population.nc"
    past_pop = open_xr(past_pop_file).data
    past_pop = past_pop.sel(sex_id=forecast_pop.sex_id.values)
    past_pop = expand_dimensions(past_pop, draw=range(draws))
    forecast_pop = concat_past_future(past_pop, forecast_pop, draws, years)

    if measure == "live_births":
        forecast_pop = forecast_pop.sel(
            age_group_id=list(FERTILE_AGE_GROUP_IDS),
            sex_id=2).drop(["sex_id"])
    else:
        forecast_pop = forecast_pop.sel(sex_id=[1, 2])

    return forecast_pop