def save_y_star(eps_version, arima_version, years, measure, draws, decay,
                gbd_round_id):
    """
    apply random walk and save the output
    """

    ds = open_xr(eps_path).data
    try:
        eps_preds = open_xr(f"{mig_dir}/eps_star.nc").data
    except Exception:
        eps_preds = arima_migration(ds, years, draws, decay)
        epsilon_hat_out = mig_dir / "eps_star.nc"
        save_xr(eps_preds, epsilon_hat_out, metric="rate", space="identity")

    # cap residuals between 10 and -10
    # the population forecasts to 2100 is decreasing to 0 with current
    # forecasts from migration for Syria, Latvia and Jamaica, the capping
    # method helps to make things more reasonable
    eps_past = eps_preds.sel(year_id=years.past_years)
    eps_preds = eps_preds.sel(year_id=years.forecast_years)
    eps_preds = eps_preds.clip(min=-10, max=10)
    eps_preds = xr.concat([eps_past, eps_preds], dim="year_id")

    pred_path = mig_dir / "mig_hat.nc"
    preds = open_xr(pred_path).data
    preds = preds.sel(year_id=years.years)
    preds = expand_dimensions(preds, draw=range(0, draws))
    y_star = preds + eps_preds

    save_xr(y_star, ystar_out, metric="rate", space="identity")
def get_mortality(forecast_mortality_version, gbd_round_id, draws, years,
                  past_mortality_version):
    """Pulls specified version of mortality.

    Args:
        forecast_mortality_version (str):
            The version name of the future mortality file used in FBDPath.
        past_mortality_version (str):
            The version name of the past mortality file used in FBDPath.
        gbd_round_id (int):
            The GBD round fed into FBDPath to pull the correct version of mortality.
        draws (int):
            The number of desired draws.
    Returns:
        (xarray.DataArray):
            Mortality rate.
    """
    forecast_mot_path = FBDPath(
        f"{gbd_round_id}/future/death/{forecast_mortality_version}")
    forecast_mot = open_xr(forecast_mot_path / "_all.nc").data
    past_mot_path = FBDPath(
        f"{gbd_round_id}/past/death/{past_mortality_version}")
    past_mot_file = past_mot_path / "_all.nc"
    past_mot = open_xr(past_mot_file).data
    past_mot = past_mot.drop("acause")
    forecast_mot = concat_past_future(past_mot, forecast_mot, draws, years)
    return forecast_mot
def get_asfr(forecast_asfr_version, gbd_round_id, draws, years,
             past_asfr_version):
    """Pulls specified version of ASFR, subsets to females only if sex_id
        is a dimension.

    Args:
        forecast_asfr_version (str):
            The future version name of the ASFR file used in FBDPath.
        past_asfr_version (str):
            The past version name of the ASFR file used in FBDPath.
        gbd_round_id (int):
            The GBD round fed into FBDPath to pull the correct version of ASFR.
        draws (int):
            The number of desired draws.
    Returns:
        (xarray.DataArray):
            Age-specific fertility rate.
    """
    forecast_asfr_path = FBDPath(
        f"{gbd_round_id}/future/asfr/{forecast_asfr_version}")
    forecast_asfr = open_xr(forecast_asfr_path / "asfr.nc").data
    past_asfr_path = FBDPath(f"{gbd_round_id}/past/asfr/{past_asfr_version}")
    past_asfr_file = past_asfr_path / "asfr.nc"
    past_asfr = open_xr(past_asfr_file).data
    forecast_asfr = concat_past_future(past_asfr, forecast_asfr, draws, years)

    if 'sex_id' in forecast_asfr.dims:  # sex_id is dimension
        forecast_asfr = forecast_asfr.sel(sex_id=2, drop=True)
    elif 'sex_id' in forecast_asfr.coords:  #sex-id is point coordinate
        forecast_asfr = forecast_asfr.drop('sex_id')
    else:
        pass  # do nothing -- sex_id doesn't exist

    return forecast_asfr
Exemple #4
0
def load_forecast_pop(gbd_round_id, version, years, draws):
    """
    Load forecast population data. Aggregates if necessary.

    Args:
        gbd_round_id (int):
            The gbd round ID that the past population is from
        version (str):
            The version of forecast population to read from
        years (YearRange):
            The Forecasting format years to use.

    Returns:
        xarray.DataArray: The past population xarray dataarray
    """
    forecast_pop_dir = FBDPath(f"/{gbd_round_id}/future/population/{version}")
    try:
        forecast_pop_path = forecast_pop_dir / "population_agg.nc"
        forecast_pop_da = open_xr(forecast_pop_path).data
    except:  # Need to make agg version
        forecast_pop_path = forecast_pop_dir / "population.nc"
        forecast_pop_da = open_xr(forecast_pop_path).data
        forecast_pop_da = Aggregator.aggregate_everything(
            forecast_pop_da, gbd_round_id).pop
        forecast_pop_out_path = forecast_pop_dir / "population_agg.nc"
        save_xr(forecast_pop_da,
                forecast_pop_out_path,
                metric="number",
                space="identity")

    # slice to correct years and number of draws
    forecast_pop_da = forecast_pop_da.sel(year_id=years.forecast_years)
    forecast_pop_da = resample(forecast_pop_da, draws)

    return forecast_pop_da
Exemple #5
0
def wpp_fhs_diff():
    wpp_pop_dir = FBDPath(
        f"/{settings.WPP_VERSIONS['population_aggs'].gbd_round_id}/"
        f"future/population/"
        f"{settings.WPP_VERSIONS['population_aggs'].version}")
    wpp_pop_path = wpp_pop_dir / "2019_fhs_agg_allage_bothsex_only.nc"
    wpp_pop_da = open_xr(wpp_pop_path).data.sel(year_id=2100)

    fhs_pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}")
    fhs_pop_path = fhs_pop_dir / "population_combined.nc"
    fhs_pop_da = open_xr(fhs_pop_path).data.sel(
        sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, scenario=0,
        quantile="mean", year_id=2100)

    # Sub-Saharan 166
    sub_saharan_diff = _helper_wpp_fhs_diff(wpp_pop_da, fhs_pop_da, 166)
    # South Asia 158
    south_asia_diff = _helper_wpp_fhs_diff(wpp_pop_da, fhs_pop_da, 158)
    # Southeast Asia, East Asia, and Oceania 4
    se_e_oceania_diff = _helper_wpp_fhs_diff(wpp_pop_da, fhs_pop_da, 4)

    sub_saharan_diff = _helper_round(sub_saharan_diff, 1e6)
    south_asia_diff = _helper_round(south_asia_diff, 1e6)
    se_e_oceania_diff = _helper_round(se_e_oceania_diff, 1e6)

    print(f"The difference with UNPD in 2100 is due to {sub_saharan_diff} "
          f"million fewer people in the reference scenario in sub-Saharan "
          f"Africa, {south_asia_diff} million fewer in South Asia, and "
          f"{se_e_oceania_diff} million fewer in Southeast Asia, East Asia, "
          f"and Oceania, primarily due to the level of fertility achieved in "
          f"below replacement populations. \n")
Exemple #6
0
def pop_declines():
    forecast_pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}")
    forecast_pop_path = forecast_pop_dir / "population_combined.nc"
    forecast_pop_da = open_xr(forecast_pop_path).data.sel(
        sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, scenario=0,
        quantile="mean", location_id=COUNTRIES)

    past_pop_dir = FBDPath(
        f"/{settings.PAST_VERSIONS['population'].gbd_round_id}/"
        f"past/population/"
        f"{settings.PAST_VERSIONS['population'].version}")
    past_pop_path = past_pop_dir / "population.nc"
    past_pop_da = open_xr(past_pop_path).data.sel(
        sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, location_id=COUNTRIES)

    pct_decline_da = 1-(forecast_pop_da.sel(year_id=2100)/past_pop_da.sel(
        year_id=2017))
    decline_over_50_val = (pct_decline_da > .5).sum().values.item(0)
    decline_over_50_locs = list(pct_decline_da.where(
        pct_decline_da > .5, drop=True).location_id.values)
    decline_over_50_locs = _location_id_to_name(decline_over_50_locs)

    # compute % decline at draw level for uncertainty
    forecast_draw_pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population'].version}")
    forecast_draw_pop_path = forecast_draw_pop_dir / "population_agg.nc"
    forecast_draw_pop_da = open_xr(forecast_draw_pop_path).data.sel(
        sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, scenario=0,
        location_id=COUNTRIES)
    past_draw_pop_dir = FBDPath(
        f"/{settings.PAST_VERSIONS['population_draw2017'].gbd_round_id}/"
        f"past/population/"
        f"{settings.PAST_VERSIONS['population_draw2017'].version}")
    past_draw_pop_path = past_draw_pop_dir / "population.nc"
    past_draw_pop_da = open_xr(past_draw_pop_path).data.sel(
        sex_id=BOTH_SEX_ID, age_group_id=ALL_AGE_ID, location_id=COUNTRIES)

    da_china_2100 = forecast_draw_pop_da.sel(year_id=2100,location_id=6)
    da_china_2017 = past_draw_pop_da.sel(year_id=2017,location_id=6)
    pct_draw_decline_da = 1-(da_china_2100/da_china_2017)

    decline_china = pct_draw_decline_da*100
    decline_china_mean = _helper_round(
        decline_china.mean("draw").values.item(0), 1)
    decline_china_lower = _helper_round(
        decline_china.quantile(dim="draw", q=[0.025]).values.item(0), 1)
    decline_china_upper = _helper_round(
        decline_china.quantile(dim="draw", q=[0.975]).values.item(0), 1)

    print(f"{decline_over_50_val} countries including {decline_over_50_locs} "
          f"in the reference scenario will have declines of greater than "
          f"50% from 2017 by 2100; China will decline by {decline_china_mean} "
          f"({decline_china_lower} to {decline_china_upper}).%\n")
Exemple #7
0
def return_pop_differences(fbd_pop_version, unpd_pop_version):
    GBD_LOC_DF = get_location_metadata(gbd_round_id=5, location_set_id=39)
    gbd_locs_df = GBD_LOC_DF.query("level < 4")

    fbdpoppath = FBDPath(f"/5/future/population/{fbd_pop_version}/"
                         "population_combined.nc")

    unpdpoppath = FBDPath(f"/wpp/future/population/{unpd_pop_version}/"
                          "population_all_age.nc")

    unpd_pop_xr = open_xr(unpdpoppath).data

    unpd_pop_100 = unpd_pop_xr.sel(year_id=2100, sex_id=3).drop([
        "year_id", "sex_id", "age_group_id"
    ]).squeeze().rename("unpd_pop").to_dataframe().reset_index()

    ihme_pop_100 = open_xr(fbdpoppath).data.sel(
        age_group_id=22, sex_id=3, scenario=0, year_id=2100,
        quantile="mean").drop([
            "age_group_id", "sex_id", "scenario", "year_id", "quantile"
        ]).squeeze().rename("ihme_pop").to_dataframe().reset_index()

    ihme_pop_100["ihme_pop_int"] = ihme_pop_100["ihme_pop"].astype(int)
    unpd_pop_100["unpd_pop_int"] = unpd_pop_100["unpd_pop"].astype(int)

    pop_2100_df = ihme_pop_100.merge(unpd_pop_100, how="left")

    final_pop_df = pop_2100_df.merge(
        gbd_locs_df[["location_id", "lancet_label"]])

    location_ids = [166, 158, 4]
    locs_of_interest = final_pop_df[final_pop_df.location_id.isin(
        location_ids)]
    new_pop_diff = {}
    for loc_id in location_ids:
        ihme_num = (locs_of_interest[locs_of_interest["location_id"] == loc_id]
                    ["ihme_pop"].values[0])
        unpd_num = (locs_of_interest[locs_of_interest["location_id"] == loc_id]
                    ["unpd_pop"].values[0])
        location_name = locs_of_interest[locs_of_interest["location_id"] ==
                                         loc_id].lancet_label.values[0]
        difference = ((unpd_num - ihme_num) / 1e6).round(2)
        new_pop_diff[loc_id] = difference

    print(f"Figure 9 shows a global comparison of the TFR, life expectancy at"
          f" birth, and population between our reference scenario, the UNPD "
          f"median variant, and the Wittgenstein SSP2 scenario. Differences"
          f" between 2100 population forecasts from the UNPD and our reference"
          f" scenario are largely explained by differences in population"
          f" levels in sub-Saharan Africa; south Asia and ; southeast Asia,"
          f" east Asia, and Oceania ({new_pop_diff[location_ids[0]]} million, "
          f"{new_pop_diff[location_ids[1]]}  million, and"
          f" {new_pop_diff[location_ids[2]]} million fewer people in our"
          f" reference scenario, respectively)")
Exemple #8
0
def pull_pop(past_pop_version, future_pop_version,
             scenarios = 0, age_groups=ALL_AGE_ID):
    past_path = FBDPath("{gbd_round_id}/past/population/{version}".format(
        gbd_round_id=GBD_ROUND, version=past_pop_version))
    future_path = FBDPath("{gbd_round_id}/future/population/{version}".format(
        gbd_round_id=GBD_ROUND, version=future_pop_version))

    future_pop = open_xr(future_path / "population_combined.nc").data
    past_pop = open_xr(past_path / "population.nc").data.sel(
        location_id=future_pop.location_id)

    pop = past_pop.combine_first(future_pop).sel(
        age_group_id=age_groups, sex_id=BOTH_SEX_ID, scenario=scenarios)

    return pop
def load_data(version):
    gdp_path = FBDPath(f"{GBD_ROUND_ID}/future/gdp/{version}/gdp.nc")
    gdp_da = open_xr(gdp_path).data.sel(scenario=0)
    gdp_da.name = "gdp"
    gdp_df = gdp_da.to_dataframe().reset_index()

    return gdp_df
Exemple #10
0
def sing_tai_tfr():
    tfr_dir = FBDPath(f"/{settings.PAST_VERSIONS['tfr'].gbd_round_id}/"
                      f"past/tfr/"
                      f"{settings.PAST_VERSIONS['tfr'].version}")
    tfr_path = tfr_dir / "tfr.nc"
    tfr_da = open_xr(tfr_path).data.sel(year_id=2017)

    singapore_tfr = tfr_da.sel(location_id=69).mean('draw').values.item(0)
    taiwan_tfr = tfr_da.sel(location_id=8).mean('draw').values.item(0)

    singapore_upper = tfr_da.sel(location_id=69).quantile(
        .975, dim='draw').values.item(0)
    singapore_lower = tfr_da.sel(location_id=69).quantile(
        .025, dim='draw').values.item(0)

    taiwan_upper = tfr_da.sel(location_id=8).quantile(
        .975, dim='draw').values.item(0)
    taiwan_lower = tfr_da.sel(location_id=8).quantile(
        .025, dim='draw').values.item(0)

    singapore_tfr = _helper_round(singapore_tfr, 1)
    taiwan_tfr = _helper_round(taiwan_tfr, 1)
    singapore_upper = _helper_round(singapore_upper, 1)
    singapore_lower = _helper_round(singapore_lower, 1)
    taiwan_upper = _helper_round(taiwan_upper, 1)
    taiwan_lower = _helper_round(taiwan_lower, 1)

    print(f"Discussion: In contrast, positive incentives have had little "
          f"effect in Singapore and Taiwan, where 2017 TFR levels were "
          f"{singapore_tfr} ({singapore_lower}–{singapore_upper}) and "
          f"{taiwan_tfr} ({taiwan_lower}–{taiwan_upper})\n")
Exemple #11
0
def pull_reshape_tfr(gbd_round_id, tfr_version, location_ids):
    """Pulls year 2017 GBD round 5 TFR, converts it an xarray dataarray,
    pulls forecast TFR, and concatenates the dataarrays. The new array is
    then converted to a pandas dataframe. All required data are then reshaped
    and merged for downstream table production.

    Args:
        gbd_round_id (int):
            GBD round.
        tfr_version (str):
            Forecast TFR version.
        location_ids (list):
            List of location IDs to pull from both past and future data.
    Returns:
        tfr_final_df (pandas dataframe):
            Dataframe with all required TFR data, reshaped for downstream table
            production.
    """

    p_end = YEARS.past_end
    f_end = YEARS.forecast_end
    # Get 2017 GBD TFR
    tfr_2017 = get_covariate_estimates(covariate_id=149,
                                       gbd_round_id=gbd_round_id,
                                       location_id=location_ids, year_id=p_end,
                                       status="best")[[
    "year_id", "location_id","mean_value", "lower_value", "upper_value"
    ]].rename(columns={"mean_value":"mean", "lower_value":"lower",
                       "upper_value":"upper"})
    
    tfr_2017_da = melt_to_xarray(tfr_2017)
    
    # Get future TFR
    tfr_fut = open_xr(f"{gbd_round_id}/future/tfr/"
                      f"{tfr_version}/tfr_combined.nc").data
    
    tfr_fut_sel = tfr_fut.sel(location_id=location_ids, scenario=SCENARIOS,
                              year_id=YEARS.forecast_years)
    
    # Concat and make quantile wide
    tfr_da = xr.concat([tfr_2017_da, tfr_fut_sel], dim="year_id")
    
    tfr_df = tfr_da.to_dataframe().reset_index()
    tfr_df = tfr_df.pivot_table(values="value",
                                index=["location_id", "year_id", "scenario"],
                                columns="quantile").reset_index()
    
    # Combine value and UI into one column
    tfr_df = combine_mean_ui(tfr_df, df_type="tfr")
    
    # Get 2017 and 2100 values
    tfr2017 = tfr_df.query(f"year_id == {p_end} and scenario==0")
    tfr2100 = tfr_df.query(f"year_id == {f_end}")
    tfr2017 = pivot_scenarios(tfr2017, f"{p_end}", SCENARIO_MAP, df_type="tfr")
    tfr2100 = pivot_scenarios(tfr2100, f"{f_end}", SCENARIO_MAP, df_type="tfr")
    
    # Merge
    tfr_final_df = tfr2017.merge(tfr2100)
    
    return tfr_final_df
Exemple #12
0
def read_paf(acause, risk, gbd_round_id, past_or_future, version):
    """
    Read past or forecast PAF.

    Args:
        acause (str): cause name.
        risk (str): risk name.
        gbd_round_id (int): gbd round id.
        past_or_future (str): "past" or "forecast".
        version (str): str indiciating folder where data comes from.

    Returns
        paf (xr.DataArray): dataframe of PAF.

    Raises:
        ValueError: if upstream flat file does not exist.
    """
    infile_fbd_path = (FBDPath(gbd_round_id=gbd_round_id,
                               past_or_future=past_or_future,
                               stage="paf",
                               version=version) / "risk_acause_specific" /
                       "{}_{}.nc".format(acause, risk))
    paf = open_xr(infile_fbd_path).data

    return paf
Exemple #13
0
def largest_gdp():
    gdp_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['gdp'].gbd_round_id}/"
        f"future/gdp/"
        f"{settings.BASELINE_VERSIONS['gdp'].version}")
    gdp_path = gdp_dir / "gdp.nc"
    gdp_da = open_xr(gdp_path).data.sel(scenario=0)

    max_da = gdp_da.where(gdp_da==gdp_da.max('location_id'), drop=True)
    # Find years where china is top
    china_years = max_da.sel(
        location_id=6).dropna(dim='year_id').year_id.values
    # Find years where USA is top
    usa_years = max_da.sel(
        location_id=102).dropna(dim='year_id').year_id.values
    # check no other location is ever top
    missing_years = np.setdiff1d(
        gdp_da.coords["year_id"].values,
        np.concatenate([china_years, usa_years]))
    assert missing_years.size == 0

    # find first year in future where china takes lead
    china_year = china_years.min()
    # find first year when US regains lead
    usa_year = usa_years[usa_years > china_year].min()

    print(f"China is expected to become the largest economy by {china_year} "
          f"but in the reference scenario the USA would once again become in "
          f"the largest economy in {usa_year}.\n")
Exemple #14
0
def working_age():
    pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}")
    pop_path = pop_dir / "population_combined.nc"
    # working age is 20-64 age id 163, but have to combine 9-17 to get 163
    working_age_ids = list(range(9, 18))
    pop_da = open_xr(pop_path).data.sel(
        sex_id=BOTH_SEX_ID, scenario=0, quantile="mean",
        age_group_id=working_age_ids, year_id=2100, location_id=COUNTRIES)
   
    pop_da = pop_da.sum('age_group_id')
    
    pop_df = pop_da.to_dataframe()
    pop_df = _add_location_name(pop_df)
    pop_df = pop_df.sort_values(by='population', ascending=False).reset_index()
    
    first = pop_df.location_name[0]
    second = pop_df.location_name[1]
    third = pop_df.location_name[2]
    fourth = pop_df.location_name[3]

    print(f"By 2100, {first} will still have the largest working age "
          f"population followed by {second} and {third}. Coming in fourth in "
          f"the world will be {fourth}.\n")
Exemple #15
0
def pop_peak():
    pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}")
    pop_path = pop_dir / "population_combined.nc"
    pop_da = open_xr(pop_path).data.sel(
        location_id=1, age_group_id=ALL_AGE_ID, sex_id=BOTH_SEX_ID, scenario=0)

    # Max values
    max_da = pop_da.sel(quantile="mean")
    max_val = max_da.max().values.item(0)
    max_year = max_da.where(max_da==max_val, drop=True).year_id.values[0]
    max_upper = pop_da.sel(quantile="upper", year_id=max_year).values.item(0)
    max_lower = pop_da.sel(quantile="lower", year_id=max_year).values.item(0)

    # Values in 2100
    end_val = pop_da.sel(quantile="mean", year_id=2100).values.item(0)
    end_upper = pop_da.sel(quantile="upper", year_id=2100).values.item(0)
    end_lower = pop_da.sel(quantile="lower", year_id=2100).values.item(0)

    max_val = _helper_round(max_val, 1e9)
    max_upper = _helper_round(max_upper, 1e9)
    max_lower = _helper_round(max_lower, 1e9)
    end_val = _helper_round(end_val, 1e9)
    end_upper = _helper_round(end_upper, 1e9)
    end_lower = _helper_round(end_lower, 1e9)


    print(f"In the reference forecast, the global population is projected to "
          f"peak in {max_year} at {max_val} "
          f"({max_lower}-{max_upper}) billion people, and decline to "
          f"{end_val} ({end_lower}-{end_upper}) in 2100."
          f"\n")
Exemple #16
0
def age_pops():
    pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}")
    pop_path = pop_dir / "population_combined.nc"
    pop_da = open_xr(pop_path).data.sel(
        sex_id=BOTH_SEX_ID, scenario=0, year_id=2100, quantile="mean",
        location_id=1)

    # 65 to 69, 70 to 74, 75 to 79, 80 plus
    above_65_ages = [18, 19, 20, 21]
    above_65_da = pop_da.sel(age_group_id=above_65_ages)
    above_65_val = above_65_da.sum('age_group_id').values.item(0)

    # <20 years
    under_20_val = pop_da.sel(age_group_id=158).values.item(0)

    above_65_val = _helper_round(above_65_val, 1e9)
    under_20_val = _helper_round(under_20_val, 1e9)

    print(f"Findings also suggest a shifting age structure in many parts of "
          f"the world, with {above_65_val} billion individuals above the age "
          f"of 65, and {under_20_val} billion individuals below the age of 20, "
          f"globally in 2100.\n")
Exemple #17
0
def load_pop(gbd_round_id, past_version, forecast_version):

    forecast_file = FBDPath(
        f"/{gbd_round_id}/future/population/"
        f"{forecast_version}/population_combined.nc")

    past_file = FBDPath(
        f"/{gbd_round_id}/past/population/{past_version}/population.nc")

    future_pop = open_xr(forecast_file).data
    past_pop = expand_dimensions(open_xr(past_file).data,
                                 scenario=future_pop.scenario,
                                 quantile = future_pop["quantile"])
    pop = xr.concat([past_pop, future_pop],
                    "year_id")

    return pop
Exemple #18
0
def prep_pop_da(past_version, forecast_version, gbd_round_id, years):
    forecast_pop_file = FBDPath(
        f"/{gbd_round_id}/future/population/{forecast_version}/"
        f"population_combined.nc")
    forecast_fhs = open_xr(forecast_pop_file).data.sel(quantile='mean',
                                                       drop=True)

    past_fhs_file = FBDPath(
        f"/{gbd_round_id}/past/population/{past_version}/population.nc")
    past_fhs = expand_dimensions(open_xr(past_fhs_file).data.sel(
        year_id=years.past_years,
        sex_id=forecast_fhs["sex_id"],
        age_group_id=forecast_fhs["age_group_id"],
        location_id=forecast_fhs["location_id"]),
                                 scenario=forecast_fhs.scenario.values)

    fhs_all_scenarios = xr.concat([past_fhs, forecast_fhs], dim="year_id")

    fhs = fhs_all_scenarios.sel(scenario=[-1, 0, 1])
    alt_sdg = fhs_all_scenarios.sel(scenario=[3])
    alt_99 = fhs_all_scenarios.sel(scenario=[2])

    ages = db.get_ages().query("age_group_id in @ALL_AGE_GROUP_IDS")
    days = ages[["age_group_id", "age_group_days_start", "age_group_days_end"]]
    days["mean_age"] = (days["age_group_days_end"] -
                        (days["age_group_days_end"] -
                         days["age_group_days_start"]) / 2) / 365.25
    mean_age = days.set_index("age_group_id")["mean_age"].to_xarray()

    data_fhs = fhs.sel(age_group_id=mean_age["age_group_id"], sex_id=SEX_IDS)
    data_sdg = alt_sdg.sel(age_group_id=mean_age["age_group_id"],
                           sex_id=SEX_IDS)
    data_99 = alt_99.sel(age_group_id=mean_age["age_group_id"], sex_id=SEX_IDS)

    avg_age_fhs = (data_fhs *
                   mean_age).sum("age_group_id") / data_fhs.sum("age_group_id")
    avg_age_sdg = (data_sdg *
                   mean_age).sum("age_group_id") / data_sdg.sum("age_group_id")
    avg_age_99 = (data_99 *
                  mean_age).sum("age_group_id") / data_99.sum("age_group_id")

    ds = data_fhs.rename("population").to_dataset()
    ds_sdg = data_sdg.rename("population").to_dataset()
    ds_99 = data_99.rename("population").to_dataset()

    return avg_age_fhs, avg_age_sdg, avg_age_99, ds, ds_sdg, ds_99
def all_weights_main(reference_scenario, diff_over_mean, truncate,
                     truncate_quantiles, replace_with_mean,
                     use_past_uncertainty, transform, max_weight,
                     weight_step_size, past_version, pv_version, years,
                     gbd_round_id, test_mode, **kwargs):
    """Predictive validity for one weight of the range of weights at a time."""
    LOGGER.debug("diff_over_mean:{}".format(diff_over_mean))
    LOGGER.debug("truncate:{}".format(truncate))
    LOGGER.debug("truncate_quantiles:{}".format(truncate_quantiles))
    LOGGER.debug("replace_with_mean:{}".format(replace_with_mean))
    LOGGER.debug("reference_scenario:{}".format(reference_scenario))
    LOGGER.debug("use_past_uncertainty:{}".format(use_past_uncertainty))

    LOGGER.debug("Reading in the past")
    past_path = FBDPath("".format())
    past = open_xr(past_path / "education.nc").data
    past = past.transpose(*list(past.coords))

    if not use_past_uncertainty:
        LOGGER.debug("Using past means for PV")
        past = past.mean("draw")
    else:
        LOGGER.debug("Using past draws for PV")

    if test_mode:
        past = past.sel(
            age_group_id=past["age_group_id"].values[:5],
            draw=past["draw"].values[:5],
            location_id=past["location_id"].values[:5])
    else:
        pass  # Use full data set.

    holdouts = past.sel(year_id=years.past_years)
    observed = past.sel(year_id=years.forecast_years)

    LOGGER.debug("Calculating RMSE for all weights")
    weights_to_test = np.arange(0, max_weight, weight_step_size)
    rmse_results = []
    for weight_exp in weights_to_test:
        predicted = arc_forecast_education(
            holdouts, gbd_round_id, transform, weight_exp, years,
            reference_scenario,
            diff_over_mean, truncate, truncate_quantiles, replace_with_mean)
        rmse = calc_rmse(predicted.sel(scenario=REFERENCE_SCENARIO, drop=True),
                         observed,
                         years)

        rmse_da = xr.DataArray(
            [rmse.values], [[weight_exp]], dims=["weight"])
        rmse_results.append(rmse_da)
    rmse_results = xr.concat(rmse_results, dim="weight")

    pv_path = FBDPath("".format())
    pv_path.mkdir(parents=True, exist_ok=True)
    rmse_results.to_netcdf(str(pv_path / "education_arc_weight_rmse.nc"))
    LOGGER.info("RMSE is saved")
def _load_migration_rate():
    pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}")
    pop_path = pop_dir / "population_combined.nc"
    pop_da = open_xr(pop_path).data.sel(scenario=0,
                                        quantile="mean",
                                        location_id=COUNTRIES,
                                        sex_id=BOTH_SEX_ID,
                                        age_group_id=ALL_AGE_ID)

    mig_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['migration'].gbd_round_id}/"
        f"future/migration/{settings.BASELINE_VERSIONS['migration'].version}")
    mig_path = mig_dir / "migration.nc"
    mig_da = open_xr(mig_path).data
    mig_da = mig_da.mean('draw').sum(['sex_id', 'age_group_id'])

    mig_rate_da = mig_da / pop_da
    return mig_rate_da
def one_weight_main(reference_scenario, transform, diff_over_mean, truncate,
                    truncate_quantiles, replace_with_mean,
                    use_past_uncertainty, weight_exp, past_version, pv_version,
                    years, gbd_round_id, test_mode, **kwargs):
    """Predictive validity for one one weight of the range of weights"""

    LOGGER.debug("diff_over_mean:{}".format(diff_over_mean))
    LOGGER.debug("truncate:{}".format(truncate))
    LOGGER.debug("truncate_quantiles:{}".format(truncate_quantiles))
    LOGGER.debug("replace_with_mean:{}".format(replace_with_mean))
    LOGGER.debug("reference_scenario:{}".format(reference_scenario))
    LOGGER.debug("use_past_uncertainty:{}".format(use_past_uncertainty))

    LOGGER.debug("Reading in the past")
    past_path = FBDPath("".format())  # Path structure removed for security
    past = open_xr(past_path / "education.nc").data
    past = past.transpose(*list(past.coords))

    if not use_past_uncertainty:
        LOGGER.debug("Using past means for PV")
        past = past.mean("draw")
    else:
        LOGGER.debug("Using past draws for PV")

    if test_mode:
        past = past.sel(
            age_group_id=past["age_group_id"].values[:5],
            draw=past["draw"].values[:5],
            location_id=past["location_id"].values[:5])
    else:
        pass  # Use full data set.

    holdouts = past.sel(year_id=years.past_years)
    observed = past.sel(year_id=years.forecast_years)

    LOGGER.debug("Calculating RMSE for {}".format(weight_exp))
    predicted = arc_forecast_education(
        holdouts, gbd_round_id, transform, weight_exp, years,
        reference_scenario,
        diff_over_mean, truncate, truncate_quantiles, replace_with_mean)
    rmse = calc_rmse(predicted.sel(scenario=REFERENCE_SCENARIO, drop=True),
                     observed,
                     years)

    rmse_da = xr.DataArray(
        [rmse.values], [[weight_exp]], dims=["weight"])

    pv_path = FBDPath("".format())  # Path structure removed for security
    separate_weights_path = pv_path / "each_weight"
    separate_weights_path.mkdir(parents=True, exist_ok=True)
    rmse_da.to_netcdf(
        str(separate_weights_path / "{}_rmse.nc".format(weight_exp)))
    LOGGER.info("Saving RMSE for {}".format(weight_exp))
Exemple #22
0
def wpp_witt_pops():
    wpp_pop_dir = FBDPath(
        f"/{settings.WPP_VERSIONS['population_aggs'].gbd_round_id}/"
        f"future/population/"
        f"{settings.WPP_VERSIONS['population_aggs'].version}")
    wpp_pop_path = wpp_pop_dir / "2019_fhs_agg_allage_bothsex_only.nc"
    wpp_pop_da = open_xr(wpp_pop_path).data.sel(location_id=1, year_id=2100)

    witt_pop_dir = FBDPath(
        f"/{settings.WITT_VERSIONS['population'].gbd_round_id}/"
        f"future/population/"
        f"{settings.WITT_VERSIONS['population'].version}")
    witt_pop_path = witt_pop_dir / "population_ssp2.nc"
    witt_pop_da = open_xr(witt_pop_path).data.sel(
        location_id=1, year_id=2100, sex_id=3, age_group_id=22)

    wpp_pop_val = _helper_round(wpp_pop_da.values.item(0), 1e9)
    witt_pop_val = _helper_round(witt_pop_da.values.item(0), 1e9)

    print(f"Forecasts from this study differ from the UNPD and the "
          f"Wittgenstein Centre, which project {wpp_pop_val} billion and "
          f"{witt_pop_val} billion people globally in 2100, respectively.\n")
def get_pop(forecast_pop_version, gbd_round_id, measure, draws, years,
            past_pop_version):
    """Pulls specified version of populations, subsets to fertile age groups
    and females only if meausre is live_births.

    Args:
        gbd_round_id (int):
            The GBD round fed into FBDPath to pull the correct version of pops
        forecast_pop_version (str):
            The version name of the populations file used in FBDPath.
        draws (int):
            The number of desired draws. This goes into resample, so we get
            pops with the correct number of draws.
    Returns:
        (xarray.DataArray):
            Fertile forecast population. The ``age_group_id`` dimension
            includes coordinates for each of the fertile age-groups.
    """
    forecast_pop_path = FBDPath(
        f"{gbd_round_id}/future/population/{forecast_pop_version}")
    forecast_pop_file = forecast_pop_path / "population.nc"
    forecast_pop = open_xr(forecast_pop_file).data
    past_pop_path = FBDPath(
        f"{gbd_round_id}/past/population/{past_pop_version}")
    past_pop_file = past_pop_path / "population.nc"
    past_pop = open_xr(past_pop_file).data
    past_pop = past_pop.sel(sex_id=forecast_pop.sex_id.values)
    past_pop = expand_dimensions(past_pop, draw=range(draws))
    forecast_pop = concat_past_future(past_pop, forecast_pop, draws, years)

    if measure == "live_births":
        forecast_pop = forecast_pop.sel(
            age_group_id=list(FERTILE_AGE_GROUP_IDS),
            sex_id=2).drop(["sex_id"])
    else:
        forecast_pop = forecast_pop.sel(sex_id=[1, 2])

    return forecast_pop
def taiwan_tfr():
    tfr_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['tfr_mean_ui'].gbd_round_id}/"
        f"future/tfr/"
        f"{settings.BASELINE_VERSIONS['tfr_mean_ui'].version}")
    tfr_path = tfr_dir / "tfr_combined.nc"
    tfr_da = open_xr(tfr_path).data.sel(scenario=0,
                                        location_id=8,
                                        quantile="mean",
                                        year_id=2017)

    taiwan_tfr = tfr_da.values.item(0)

    print(f"country like Taiwan, with a current TFR of {taiwan_tfr}\n")
def main(migration_version, gbd_round_id):
    # load age-sex pattern (loc, draw, age, sex)
    LOGGER.debug("Loading age-sex migration pattern")
    try:
        pattern_dir = FBDPath(f'/{gbd_round_id}/future/migration/'
                              f'{PATTERN_VERSION}')
        pattern_path = pattern_dir / "combined_age_sex_pattern.nc"
        pattern = open_xr(pattern_path).data
    except FileNotFoundError:  # Data doesn't yet exist
        pattern = create_age_sex_xarray()
    # load migration counts (loc, draw, year)
    LOGGER.debug("Loading migration data")
    mig_dir = FBDPath(f"/{gbd_round_id}/future/migration/{migration_version}/")
    mig_path = mig_dir / "mig_counts.nc"
    migration = open_xr(mig_path).data
    migration = migration.squeeze(drop=True)
    # end up with migration counts with age and sex (loc, draw, year, age, sex)
    split_data = migration * pattern
    # Save it!
    LOGGER.debug("Saving age-sex split migration data")

    split_path = mig_dir / "migration_split.nc"
    save_xr(split_data, split_path, metric="number", space="identity")
Exemple #26
0
def tfr_below_replacement():
    tfr_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['tfr_mean_ui'].gbd_round_id}/"
        f"future/tfr/"
        f"{settings.BASELINE_VERSIONS['tfr_mean_ui'].version}")
    tfr_path = tfr_dir / "tfr_combined.nc"
    tfr_da = open_xr(tfr_path).data.sel(scenario=0, location_id=COUNTRIES,
        quantile="mean")

    # Replacement TFR is 2.1
    below_2050 = (tfr_da.sel(year_id=2050) < 2.1).sum().values.item(0)
    below_2100 = (tfr_da.sel(year_id=2100) < 2.1).sum().values.item(0)

    print(f"By 2050, {below_2050} countries will have a TFR below replacement "
        f"and {below_2100} below replacement by 2100.\n")
Exemple #27
0
def read_sev(rei, sev, vaccine_sev, gbd_round_id, years, draws):
    """
    Reads in SEV for vaccine.

    Args:
        rei (str): risk, could also be vaccine intervention.
        gbd_round_id (int): gbd round id
        sev (str): upstrem sev version
        vaccine_sev (str): upstream vaccine sev version.
        gbd_round_id (int): gbd round id.
        years (YearRange): [past_start, forecast_start, forecast_end] years.
        draws (int): number of draws for output file.  This means input files
            will be up/down-sampled to meet this criterion.

    Returns:
        (xr.DataArray): SEV in dataarray form.
    """
    if rei in get_vaccine_reis(gbd_round_id):  # vaccine treated as anti-risk
        infile_fbd_path =\
            FBDPath(gbd_round_id=gbd_round_id,
                    past_or_future="future",
                    stage="vaccine",
                    version=vaccine_sev) / (rei + "_new_ref.nc")
        out = 1.0 - open_xr(infile_fbd_path).data  # anti-risk
    else:
        infile_fbd_path =\
            FBDPath(gbd_round_id=gbd_round_id,
                    past_or_future="future",
                    stage="sev",
                    version=sev) / (rei + ".nc")
        out = open_xr(infile_fbd_path).data

    out = conditionally_triggered_transformations(out, gbd_round_id, years)
    if len(out["draw"]) != draws:
        out = resample(out, draws)
    return out
Exemple #28
0
def alt_scenario_pops():
    pop_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['population_mean_ui'].gbd_round_id}/"
        f"future/population/"
        f"{settings.BASELINE_VERSIONS['population_mean_ui'].version}")
    pop_path = pop_dir / "population_combined.nc"
    # Scenario 3 is SDG
    sdg_pop_da = open_xr(pop_path).data.sel(
        location_id=1, age_group_id=22, sex_id=3, scenario=3, quantile="mean")
    sdg_2100_pop = sdg_pop_da.sel(year_id=2100).values.item(0)

    # Scenario 2 is the 99
    fastest_pop_da = open_xr(pop_path).data.sel(
        location_id=1, age_group_id=22, sex_id=3, scenario=2, quantile="mean")
    fastest_2100_pop = fastest_pop_da.sel(year_id=2100).values.item(0)

    sdg_2100_pop = _helper_round(sdg_2100_pop, 1e9)
    fastest_2100_pop = _helper_round(fastest_2100_pop, 1e9)

    print(f"Alternative scenarios suggest meeting the SDG targets for "
          f"education and contraceptive met need will result in a global "
          f"population of {sdg_2100_pop} billion in 2100, and "
          f"{fastest_2100_pop} billion assuming 99th percentile rates of "
          f"change in educational attainment and met need for contraception.\n")
Exemple #29
0
def japan_econ():
    gdp_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['gdp'].gbd_round_id}/"
        f"future/gdp/"
        f"{settings.BASELINE_VERSIONS['gdp'].version}")
    gdp_path = gdp_dir / "gdp.nc"
    gdp_da = open_xr(gdp_path).data.sel(scenario=0, year_id=2100)
    
    gdp_df = gdp_da.to_dataframe()
    gdp_df = _add_location_name(gdp_df)
    gdp_df = gdp_df.sort_values(by='value', ascending=False).reset_index()

    japan_index = gdp_df[gdp_df['location_name']=="Japan"].index[0]
    japan_rank = japan_index + 1

    print(f"Despite huge declines in population expected this century, Japan "
          f"remains the {japan_rank} largest economy in 2100.\n")
Exemple #30
0
def tfr_2100():
    tfr_dir = FBDPath(
        f"/{settings.BASELINE_VERSIONS['tfr_mean_ui'].gbd_round_id}/"
        f"future/tfr/"
        f"{settings.BASELINE_VERSIONS['tfr_mean_ui'].version}")
    tfr_path = tfr_dir / "tfr_combined.nc"
    tfr_da = open_xr(tfr_path).data.sel(scenario=0, location_id=1, year_id=2100)

    tfr_val = tfr_da.sel(quantile="mean").values.item(0)
    tfr_upper = tfr_da.sel(quantile="upper").values.item(0)
    tfr_lower = tfr_da.sel(quantile="lower").values.item(0)

    tfr_val = _helper_round(tfr_val, 1)
    tfr_upper = _helper_round(tfr_upper, 1)
    tfr_lower = _helper_round(tfr_lower, 1)

    print(f"The global total fertility rate (TFR) in 2100 is forecasted to be "
          f"{tfr_val} (95% UI {tfr_lower}–{tfr_upper}).\n")