Example #1
0
def main(past_version, forecast_version, gbd_round_id, years):
    avg_age_fhs, avg_age_sdg, avg_age_99, ds, ds_sdg, ds_99 = prep_pop_da(
        past_version, forecast_version, gbd_round_id, years)
    plot_file = FBDPath(
        f"/{gbd_round_id}/future/population/{forecast_version}",
        root_dir="plot")
    plot_file.mkdir(exist_ok=True)
    pdf_file = plot_file / "figure_7_population_pyramids.pdf"

    location_metadata = db.get_locations_by_max_level(3)

    location_hierarchy = location_metadata.set_index(
        "location_id").to_xarray()["parent_id"]

    with PdfPages(pdf_file) as pdf:
        for l in location_hierarchy["location_id"]:
            fig = pop_plot(avg_age_fhs,
                           avg_age_sdg,
                           avg_age_99,
                           ds,
                           ds_sdg,
                           ds_99,
                           years,
                           location_id=l)
            pdf.savefig(fig)
def all_weights_main(reference_scenario, diff_over_mean, truncate,
                     truncate_quantiles, replace_with_mean,
                     use_past_uncertainty, transform, max_weight,
                     weight_step_size, past_version, pv_version, years,
                     gbd_round_id, test_mode, **kwargs):
    """Predictive validity for one weight of the range of weights at a time."""
    LOGGER.debug("diff_over_mean:{}".format(diff_over_mean))
    LOGGER.debug("truncate:{}".format(truncate))
    LOGGER.debug("truncate_quantiles:{}".format(truncate_quantiles))
    LOGGER.debug("replace_with_mean:{}".format(replace_with_mean))
    LOGGER.debug("reference_scenario:{}".format(reference_scenario))
    LOGGER.debug("use_past_uncertainty:{}".format(use_past_uncertainty))

    LOGGER.debug("Reading in the past")
    past_path = FBDPath("".format())
    past = open_xr(past_path / "education.nc").data
    past = past.transpose(*list(past.coords))

    if not use_past_uncertainty:
        LOGGER.debug("Using past means for PV")
        past = past.mean("draw")
    else:
        LOGGER.debug("Using past draws for PV")

    if test_mode:
        past = past.sel(
            age_group_id=past["age_group_id"].values[:5],
            draw=past["draw"].values[:5],
            location_id=past["location_id"].values[:5])
    else:
        pass  # Use full data set.

    holdouts = past.sel(year_id=years.past_years)
    observed = past.sel(year_id=years.forecast_years)

    LOGGER.debug("Calculating RMSE for all weights")
    weights_to_test = np.arange(0, max_weight, weight_step_size)
    rmse_results = []
    for weight_exp in weights_to_test:
        predicted = arc_forecast_education(
            holdouts, gbd_round_id, transform, weight_exp, years,
            reference_scenario,
            diff_over_mean, truncate, truncate_quantiles, replace_with_mean)
        rmse = calc_rmse(predicted.sel(scenario=REFERENCE_SCENARIO, drop=True),
                         observed,
                         years)

        rmse_da = xr.DataArray(
            [rmse.values], [[weight_exp]], dims=["weight"])
        rmse_results.append(rmse_da)
    rmse_results = xr.concat(rmse_results, dim="weight")

    pv_path = FBDPath("".format())
    pv_path.mkdir(parents=True, exist_ok=True)
    rmse_results.to_netcdf(str(pv_path / "education_arc_weight_rmse.nc"))
    LOGGER.info("RMSE is saved")
Example #3
0
def symlink_directly_modeled_paf_file(
        acause, rei, calculated_paf_version, directly_modeled_paf, gbd_round_id
        ):
    """Creates symlink to files with directly-modeled PAF data.

    Creates symlinks of past and future directly-modeled PAF data files to the
    directory with PAFs calculated from SEVs and RRmaxes.

    Args:
        acause (str):
            Indicates the cause of the cause-risk pair
        rei (str):
            Indicates the risk of the cause-risk pair
        calculated_paf_version (str):
            Output version of this script where directly-modeled PAFs are
            symlinked, and calculated PAFs are saved.
        directly_modeled_paf (str):
            The version of PAFs with the directly-modeled PAF to be symlinked
            resides.
        gbd_round_id (int):
            The numeric ID representing the GBD round.

    Raises:
        RuntimeError:
            If symlink sub-process fails.
    """
    for p_or_f in ("past", "future"):
        calculated_paf_dir = FBDPath(
            gbd_round_id=gbd_round_id,
            past_or_future=p_or_f,
            stage="paf",
            version=calculated_paf_version) / "risk_acause_specific"
        calculated_paf_dir.mkdir(parents=True, exist_ok=True)

        directly_modeled_paf_file = (
                FBDPath(
                    gbd_round_id=gbd_round_id,
                    past_or_future=p_or_f,
                    stage="paf",
                    version=directly_modeled_paf)
                / "risk_acause_specific" / f"{acause}_{rei}.nc")

        symlink_file_to_directory(
            directly_modeled_paf_file, calculated_paf_dir)
def make_run_log_file(version):
    """
    Right now just copies the settings file to the output directory so
    people can see which versions and whatnot were used.

    Args:
        version (str): version name where the current mortality run is to be
        saved
    """
    # make the run directory so this log can be saved
    run_dir = FBDPath("/{gri}/future/death/{v}/".format(
        gri=settings.GBD_ROUND_ID, v=version))
    run_dir.mkdir(exist_ok=True)

    # get the source and destination paths
    source_path = os.path.join(os.pardir, "fbd_cod/settings.py")
    dest_path = os.path.join(str(run_dir), "versions.py")

    copyfile(source_path, dest_path)
def main(gdp_version):
    gdp = load_data(gdp_version)
    data, ranked_data = prep_data(gdp, YEAR_LIST)

    plot_dir = FBDPath(f"/{GBD_ROUND_ID}/future/gdp/{gdp_version}/",
                       root_dir='plot')
    plot_dir.mkdir(parents=True, exist_ok=True)

    location_metadata = db.get_locations_by_max_level(3)

    region_dict = location_metadata.set_index(
        "location_name")["super_region_name"].to_dict()

    title = 'Top 25 Nations by Total GDP'
    plot_file = plot_dir / "table_1_2017_arrow_diagram.pdf"

    c = canvas.Canvas(str(plot_file), pagesize=(792.0, 612.0))

    # text size and style
    titletextsize = 12
    headertextsize = 10
    textsize = 8
    textgap = textsize * 2.0

    # # write title
    titley = 625

    row1 = titley - (2.0 * textgap)
    row1_and_ahalf = titley - (3.0 * textgap)
    row2 = row1 - (2.0 * textgap)

    c.setFont("Helvetica-Bold", titletextsize)
    c.drawString(315, titley, "{title}".format(title=title))

    # write column headers
    c.setFont("Helvetica-Bold", textsize)
    year2017_columnwidth = 100
    gap = 500
    year2030_columnwidth = 100
    year2050_columnwidth = 100
    year2100_columnwidth = 100

    # set columns widths (counting from left to right)
    year2017_column = 70 + 3

    year2030_column = (year2017_column + year2017_columnwidth + 80)
    year2050_column = (year2030_column + year2030_columnwidth + 80)
    year2100_column = (year2050_column + year2050_columnwidth + 80)

    c.setFont("Helvetica-Bold", headertextsize)

    # name columns
    textobject_year2017 = c.beginText(year2017_column + 40, row1_and_ahalf)
    for line in ["", f"{2017}"]:
        textobject_year2017.textLine(line)
    c.drawText(textobject_year2017)

    textobject_year2030 = c.beginText(year2030_column + 40, row1_and_ahalf)
    for line in ["", f"{2030}"]:
        textobject_year2030.textLine(line)
    c.drawText(textobject_year2030)

    textobject_year2050 = c.beginText(year2050_column + 40, row1_and_ahalf)
    for line in ["", f"{2050}"]:
        textobject_year2050.textLine(line)
    c.drawText(textobject_year2050)

    textobject_year2100 = c.beginText(year2100_column + 40, row1_and_ahalf)
    for line in ["", f"{2100}"]:
        textobject_year2100.textLine(line)
    c.drawText(textobject_year2100)

    # unknown territory

    total_iter = 1

    # country position (after top 25)
    countryposition_2017 = 26
    countryposition_2030 = 26
    countryposition_2050 = 26
    countryposition_2100 = 26

    lineposition_2017 = 26
    lineposition_2030 = 26
    lineposition_2050 = 26
    lineposition_2100 = 26

    for index in ranked_data['rank_2017'].unique():
        row_data = ranked_data.query(
            'rank_2017 == @index').reset_index().iloc[0]

        rank2017 = row_data['index'] + 1
        label2017 = row_data['rank_2017']

        rank2030 = row_data['index'] + 1
        label2030 = row_data['rank_2030']

        rank2050 = row_data['index'] + 1
        label2050 = row_data['rank_2050']

        rank2100 = row_data['index'] + 1
        label2100 = row_data['rank_2100']

        c.setFont("Helvetica", textsize)

        # determine rank change
        this_rank = label2017

        line_start_2017 = data.query(
            "location_id == @this_rank")['rank_2017'].values[0]
        line_end_2030 = data.query(
            "location_id == @this_rank")['rank_2030'].values[0]
        line_end_2050 = data.query(
            "location_id == @this_rank")['rank_2050'].values[0]
        line_end_2100 = data.query(
            "location_id == @this_rank")['rank_2100'].values[0]

        # draw rectangles
        if total_iter < 26:
            # line style
            c.setDash(1, 0)
            # stroke colour
            c.setStrokeColorRGB(0, 0, 0)

            # 2017
            region = region_dict[label2017]
            c.setFillColorRGB(FILL[region][0], FILL[region][1],
                              FILL[region][2])

            c.rect(year2017_column,
                   row2 - (total_iter * textgap) - 2.5,
                   year2017_columnwidth,
                   textsize * 2.0,
                   stroke=1,
                   fill=1)
            # 2030
            region = region_dict[label2030]
            c.setFillColorRGB(FILL[region][0], FILL[region][1],
                              FILL[region][2])

            c.rect(year2030_column,
                   row2 - (total_iter * textgap) - 2.5,
                   year2030_columnwidth,
                   textsize * 2.0,
                   stroke=1,
                   fill=1)
            # 2050
            region = region_dict[label2050]
            c.setFillColorRGB(FILL[region][0], FILL[region][1],
                              FILL[region][2])

            c.rect(year2050_column,
                   row2 - (total_iter * textgap) - 2.5,
                   year2050_columnwidth,
                   textsize * 2.0,
                   stroke=1,
                   fill=1)
            # 2100
            region = region_dict[label2100]
            c.setFillColorRGB(FILL[region][0], FILL[region][1],
                              FILL[region][2])

            c.rect(year2100_column,
                   row2 - (total_iter * textgap) - 2.5,
                   year2100_columnwidth,
                   textsize * 2.0,
                   stroke=1,
                   fill=1)

        # draw country names

        c.setStrokeColorRGB(0, 0, 0)
        c.setFillColorRGB(0, 0, 0)
        c.setStrokeAlpha(1)
        c.setFillAlpha(1)

        if (line_start_2017 > 25 and line_end_2030 < 26):
            c.drawString(year2017_column + 20,
                         row2 - (countryposition_2017 * textgap) + 2.5,
                         f"{int(rank2017)} {label2017}")
            countryposition_2017 += 1

        if ((line_start_2017 < 26) and (line_end_2030 > 25)):
            c.drawString(year2030_column + 20,
                         row2 - (countryposition_2030 * textgap) + 2.5,
                         f"{int(line_end_2030)} {label2017}")
            countryposition_2030 += 1

        if ((line_end_2030 > 25) and (line_end_2050 < 26)):
            label = data.query(
                "rank_2050 == @line_end_2050")['location_id'].values[0]
            c.drawString(year2030_column + 20,
                         row2 - (countryposition_2030 * textgap) + 2.5,
                         f"{int(line_end_2030)} {label}")
            countryposition_2030 += 1

        if ((line_end_2030 < 26) and (line_end_2050 > 25)):
            label = data.query(
                "rank_2030 == @line_end_2030")['location_id'].values[0]
            c.drawString(year2050_column + 20,
                         row2 - (countryposition_2050 * textgap) + 2.5,
                         f"{int(line_end_2050)} {label}")
            countryposition_2050 += 1

        if ((line_end_2050 > 25) and (line_end_2100 < 26)):
            label = data.query(
                "rank_2100 == @line_end_2100")['location_id'].values[0]
            c.drawString(year2050_column + 20,
                         row2 - (countryposition_2050 * textgap) + 2.5,
                         f"{int(line_end_2050)} {label}")
            countryposition_2050 += 1

        if ((line_end_2050 < 26) and (line_end_2100 > 25)):
            label = data.query(
                "rank_2050 == @line_end_2050")['location_id'].values[0]
            c.drawString(year2100_column + 20,
                         row2 - (countryposition_2100 * textgap) + 2.5,
                         f"{int(line_end_2100)} {label}")
            countryposition_2100 += 1

        if total_iter < 26:
            c.drawString(year2017_column + 20,
                         row2 - (total_iter * textgap) + 2.5,
                         f"{rank2017} {label2017}")
            c.drawString(year2030_column + 20,
                         row2 - (total_iter * textgap) + 2.5,
                         f"{rank2030} {label2030}")
            c.drawString(year2050_column + 20,
                         row2 - (total_iter * textgap) + 2.5,
                         f"{rank2050} {label2050}")
            c.drawString(year2100_column + 20,
                         row2 - (total_iter * textgap) + 2.5,
                         f"{rank2100} {label2100}")

        # determine line type and draw

        c.setStrokeColorRGB(0, 0, 0)

        if line_start_2017 > line_end_2030:
            c.setDash(1, 0)
        else:
            c.setDash(3, 1)
        if (line_start_2017 > 25) and (line_end_2030 < 26):
            c.line(year2017_column + year2017_columnwidth,
                   row2 - (lineposition_2017 * textgap) + (0.33 * textsize),
                   year2030_column,
                   row2 - (line_end_2030 * textgap) + (0.33 * textsize))
            lineposition_2017 += 1
        elif (line_start_2017 < 26) and (line_end_2030 > 25):
            c.line(year2017_column + year2017_columnwidth,
                   row2 - (line_start_2017 * textgap) + (0.33 * textsize),
                   year2030_column,
                   row2 - (lineposition_2030 * textgap) + (0.33 * textsize))
            lineposition_2030 += 1
        elif (line_start_2017 < 26) or (line_end_2030 < 26):
            c.line(year2017_column + year2017_columnwidth,
                   row2 - (line_start_2017 * textgap) + (0.33 * textsize),
                   year2030_column,
                   row2 - (line_end_2030 * textgap) + (0.33 * textsize))

        # col2-3

        if line_end_2030 > line_end_2050:
            c.setDash(1, 0)
        else:
            c.setDash(3, 1)
        if (line_end_2030 > 25) and (line_end_2050 < 26):
            c.line(year2030_column + year2030_columnwidth,
                   row2 - (lineposition_2030 * textgap) + (0.33 * textsize),
                   year2050_column,
                   row2 - (line_end_2050 * textgap) + (0.33 * textsize))
            lineposition_2030 += 1
        elif ((line_end_2030 < 26) and (line_end_2050 > 25)):
            c.line(year2030_column + year2030_columnwidth,
                   row2 - (line_end_2030 * textgap) + (0.33 * textsize),
                   year2050_column,
                   row2 - (lineposition_2050 * textgap) + (0.33 * textsize))
            lineposition_2050 += 1
        elif (line_end_2030 < 26) or (line_end_2050 < 26):
            c.line(year2030_column + year2030_columnwidth,
                   row2 - (line_end_2030 * textgap) + (0.33 * textsize),
                   year2050_column,
                   row2 - (line_end_2050 * textgap) + (0.33 * textsize))

        # col3-4

        if line_end_2050 > line_end_2100:
            c.setDash(1, 0)
        else:
            c.setDash(3, 1)
        if ((line_end_2050 > 25) and (line_end_2100 < 26)):
            c.line(year2050_column + year2050_columnwidth,
                   row2 - (lineposition_2050 * textgap) + (0.33 * textsize),
                   year2100_column,
                   row2 - (line_end_2100 * textgap) + (0.33 * textsize))
            lineposition_2050 += 1
        elif ((line_end_2050 < 26) and (line_end_2100 > 25)):
            c.line(year2050_column + year2050_columnwidth,
                   row2 - (line_end_2050 * textgap) + (0.33 * textsize),
                   year2100_column,
                   row2 - (lineposition_2100 * textgap) + (0.33 * textsize))
            lineposition_2100 += 1
        elif (line_end_2050 < 26) or (line_end_2100 < 26):
            c.line(year2050_column + year2050_columnwidth,
                   row2 - (line_end_2050 * textgap) + (0.33 * textsize),
                   year2100_column,
                   row2 - (line_end_2100 * textgap) + (0.33 * textsize))

        # iterate
        total_iter = total_iter + 1

    # 2017
    rect_loc = 31

    region = "High-income"
    c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2])

    c.rect(year2017_column,
           row2 - (rect_loc * textgap) - 2.5,
           year2017_columnwidth + 20,
           textsize * 2.0,
           stroke=1,
           fill=1)

    c.setStrokeColorRGB(0, 0, 0)
    c.setFillColorRGB(0, 0, 0)
    c.setStrokeAlpha(1)
    c.setFillAlpha(1)

    c.drawString(year2017_column + 10, row2 - (rect_loc * textgap) + 2.5,
                 "High-income")

    region = "Southeast Asia, East Asia, and Oceania"
    c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2])

    c.rect(year2030_column - 60,
           row2 - (rect_loc * textgap) - 2.5,
           year2017_columnwidth + 90,
           textsize * 2.0,
           stroke=1,
           fill=1)

    c.setStrokeColorRGB(0, 0, 0)
    c.setFillColorRGB(0, 0, 0)
    c.setStrokeAlpha(1)
    c.setFillAlpha(1)

    c.drawString(year2030_column - 50, row2 - (rect_loc * textgap) + 2.5,
                 f"{region}")

    region = "South Asia"
    c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2])

    c.rect(year2017_column,
           row2 - ((rect_loc + 1) * textgap) - 2.5,
           year2017_columnwidth + 20,
           textsize * 2.0,
           stroke=1,
           fill=1)

    c.setStrokeColorRGB(0, 0, 0)
    c.setFillColorRGB(0, 0, 0)
    c.setStrokeAlpha(1)
    c.setFillAlpha(1)

    c.drawString(year2017_column + 10, row2 - ((rect_loc + 1) * textgap) + 2.5,
                 "South Asia")

    region = "Latin America and Caribbean"
    c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2])

    c.rect(year2030_column - 60,
           row2 - ((rect_loc + 1) * textgap) - 2.5,
           year2017_columnwidth + 90,
           textsize * 2.0,
           stroke=1,
           fill=1)

    c.setStrokeColorRGB(0, 0, 0)
    c.setFillColorRGB(0, 0, 0)
    c.setStrokeAlpha(1)
    c.setFillAlpha(1)

    c.drawString(year2030_column - 50, row2 - ((rect_loc + 1) * textgap) + 2.5,
                 "Latin America and Caribbean")

    region = "Central Europe, Eastern Europe, and Central Asia"
    c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2])

    c.rect(year2030_column - 60,
           row2 - ((rect_loc + 2) * textgap) - 2.5,
           year2017_columnwidth + 90,
           textsize * 2.0,
           stroke=1,
           fill=1)

    c.setStrokeColorRGB(0, 0, 0)
    c.setFillColorRGB(0, 0, 0)
    c.setStrokeAlpha(1)
    c.setFillAlpha(1)

    c.drawString(year2030_column - 50, row2 - ((rect_loc + 2) * textgap) + 2.5,
                 "Central Europe, Eastern Europe, and Central Asia")

    region = "North Africa and Middle East"
    c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2])

    c.rect(year2017_column,
           row2 - ((rect_loc + 2) * textgap) - 2.5,
           year2017_columnwidth + 20,
           textsize * 2.0,
           stroke=1,
           fill=1)

    c.setStrokeColorRGB(0, 0, 0)
    c.setFillColorRGB(0, 0, 0)
    c.setStrokeAlpha(1)
    c.setFillAlpha(1)

    c.drawString(year2017_column + 10, row2 - ((rect_loc + 2) * textgap) + 2.5,
                 "North Africa and Middle East")

    region = "Sub-Saharan Africa"
    c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2])

    c.rect(year2017_column,
           row2 - ((rect_loc + 3) * textgap) - 2.5,
           year2017_columnwidth + 20,
           textsize * 2.0,
           stroke=1,
           fill=1)

    c.setStrokeColorRGB(0, 0, 0)
    c.setFillColorRGB(0, 0, 0)
    c.setStrokeAlpha(1)
    c.setFillAlpha(1)

    c.drawString(year2017_column + 10, row2 - ((rect_loc + 3) * textgap) + 2.5,
                 "Sub-Saharan Africa")

    c.save()
def run_against(version,
                pop_version,
                asfr_version,
                lifetable_version,
                migration_version,
                srb_version,
                gbd_round_id,
                location_idx,
                years,
                location_id,
                draws,
                test=False):
    """
    Takes versions for files, finds the files, and computes future
    populations. It then saves those files. This is what you call from
    the pipeline.

    Args:
        version (str): Version name for output
        pop_version (str): Version for population
        asfr_version (str): version for asfr
        lifetable_version (list[str]): List of versions for lifetable
        migration_version (list[str]): List of versions for migration
        gbd_round_id (int): GBD Round ID, 4 is 2016
        location_idx (int|None): Zero-based index into list of locations.
        years (YearRange): years for past and forecast.
        location_id (int|None): A location ID.
        test (bool): Run a reduced subset of locations and draws.

    Returns:
        None
    """
    out_path = FBDPath("/{}/future/population/{}".format(
        gbd_round_id, version))
    try:
        out_path.mkdir(parents=True, exist_ok=True)
    except OSError as ose:
        LOGGER.error("Could not create output directory {}: {}".format(
            out_path, ose))

    asfr_lim, lifetable_lim, pop, migration, srb =\
        agreement_rules(
            *read_datasets(
                asfr_version, gbd_round_id,
                lifetable_version, pop_version, migration_version, years,
                srb_version, draws),
            years
        )

    ruler = timeline(pop.age_group_id.values, asfr_lim.age_group_id.values)

    locations = pop.location_id.values
    if location_idx is not None:
        try:
            locations = [locations[location_idx]]
            LOGGER.info("Using location_id {} from location_idx {}".format(
                locations, location_idx))
        except IndexError:
            LOGGER.warning("Asked for out-of-bounds location {} of {}".format(
                location_idx, locations.shape[0]))
            exit(0)  # Maybe you ask for 200 jobs but have 195 countries. OK.
    elif location_id is not None:
        locations = [location_id]
    else:
        locations = pop.location_id.values

    for location in locations:
        begin_time = perf_time()
        loc_idx = dict(location_id=location)

        future = one_location(pop.loc[loc_idx], asfr_lim.loc[loc_idx],
                              lifetable_lim.loc[loc_idx],
                              migration.loc[loc_idx], srb.loc[loc_idx], ruler,
                              gbd_round_id, years, test)
        out_name = out_path / "{}.nc".format(location)
        future.coords["location_id"] = location
        summary = summarize_pop(future)
        elapsed = perf_time() - begin_time
        LOGGER.info("Elapsed {}".format(elapsed))
        write_begin = perf_time()
        save_xr(summary,
                out_name,
                metric="number",
                space="identity",
                death=version,
                pop=pop_version,
                asfr=asfr_version,
                lifetable=lifetable_version,
                migration=migration_version,
                srb=srb_version)
        LOGGER.info("Wrote {}".format(out_name))
        LOGGER.info("Write time Elapsed {}".format(perf_time() - write_begin))