def main(past_version, forecast_version, gbd_round_id, years): avg_age_fhs, avg_age_sdg, avg_age_99, ds, ds_sdg, ds_99 = prep_pop_da( past_version, forecast_version, gbd_round_id, years) plot_file = FBDPath( f"/{gbd_round_id}/future/population/{forecast_version}", root_dir="plot") plot_file.mkdir(exist_ok=True) pdf_file = plot_file / "figure_7_population_pyramids.pdf" location_metadata = db.get_locations_by_max_level(3) location_hierarchy = location_metadata.set_index( "location_id").to_xarray()["parent_id"] with PdfPages(pdf_file) as pdf: for l in location_hierarchy["location_id"]: fig = pop_plot(avg_age_fhs, avg_age_sdg, avg_age_99, ds, ds_sdg, ds_99, years, location_id=l) pdf.savefig(fig)
def all_weights_main(reference_scenario, diff_over_mean, truncate, truncate_quantiles, replace_with_mean, use_past_uncertainty, transform, max_weight, weight_step_size, past_version, pv_version, years, gbd_round_id, test_mode, **kwargs): """Predictive validity for one weight of the range of weights at a time.""" LOGGER.debug("diff_over_mean:{}".format(diff_over_mean)) LOGGER.debug("truncate:{}".format(truncate)) LOGGER.debug("truncate_quantiles:{}".format(truncate_quantiles)) LOGGER.debug("replace_with_mean:{}".format(replace_with_mean)) LOGGER.debug("reference_scenario:{}".format(reference_scenario)) LOGGER.debug("use_past_uncertainty:{}".format(use_past_uncertainty)) LOGGER.debug("Reading in the past") past_path = FBDPath("".format()) past = open_xr(past_path / "education.nc").data past = past.transpose(*list(past.coords)) if not use_past_uncertainty: LOGGER.debug("Using past means for PV") past = past.mean("draw") else: LOGGER.debug("Using past draws for PV") if test_mode: past = past.sel( age_group_id=past["age_group_id"].values[:5], draw=past["draw"].values[:5], location_id=past["location_id"].values[:5]) else: pass # Use full data set. holdouts = past.sel(year_id=years.past_years) observed = past.sel(year_id=years.forecast_years) LOGGER.debug("Calculating RMSE for all weights") weights_to_test = np.arange(0, max_weight, weight_step_size) rmse_results = [] for weight_exp in weights_to_test: predicted = arc_forecast_education( holdouts, gbd_round_id, transform, weight_exp, years, reference_scenario, diff_over_mean, truncate, truncate_quantiles, replace_with_mean) rmse = calc_rmse(predicted.sel(scenario=REFERENCE_SCENARIO, drop=True), observed, years) rmse_da = xr.DataArray( [rmse.values], [[weight_exp]], dims=["weight"]) rmse_results.append(rmse_da) rmse_results = xr.concat(rmse_results, dim="weight") pv_path = FBDPath("".format()) pv_path.mkdir(parents=True, exist_ok=True) rmse_results.to_netcdf(str(pv_path / "education_arc_weight_rmse.nc")) LOGGER.info("RMSE is saved")
def symlink_directly_modeled_paf_file( acause, rei, calculated_paf_version, directly_modeled_paf, gbd_round_id ): """Creates symlink to files with directly-modeled PAF data. Creates symlinks of past and future directly-modeled PAF data files to the directory with PAFs calculated from SEVs and RRmaxes. Args: acause (str): Indicates the cause of the cause-risk pair rei (str): Indicates the risk of the cause-risk pair calculated_paf_version (str): Output version of this script where directly-modeled PAFs are symlinked, and calculated PAFs are saved. directly_modeled_paf (str): The version of PAFs with the directly-modeled PAF to be symlinked resides. gbd_round_id (int): The numeric ID representing the GBD round. Raises: RuntimeError: If symlink sub-process fails. """ for p_or_f in ("past", "future"): calculated_paf_dir = FBDPath( gbd_round_id=gbd_round_id, past_or_future=p_or_f, stage="paf", version=calculated_paf_version) / "risk_acause_specific" calculated_paf_dir.mkdir(parents=True, exist_ok=True) directly_modeled_paf_file = ( FBDPath( gbd_round_id=gbd_round_id, past_or_future=p_or_f, stage="paf", version=directly_modeled_paf) / "risk_acause_specific" / f"{acause}_{rei}.nc") symlink_file_to_directory( directly_modeled_paf_file, calculated_paf_dir)
def make_run_log_file(version): """ Right now just copies the settings file to the output directory so people can see which versions and whatnot were used. Args: version (str): version name where the current mortality run is to be saved """ # make the run directory so this log can be saved run_dir = FBDPath("/{gri}/future/death/{v}/".format( gri=settings.GBD_ROUND_ID, v=version)) run_dir.mkdir(exist_ok=True) # get the source and destination paths source_path = os.path.join(os.pardir, "fbd_cod/settings.py") dest_path = os.path.join(str(run_dir), "versions.py") copyfile(source_path, dest_path)
def main(gdp_version): gdp = load_data(gdp_version) data, ranked_data = prep_data(gdp, YEAR_LIST) plot_dir = FBDPath(f"/{GBD_ROUND_ID}/future/gdp/{gdp_version}/", root_dir='plot') plot_dir.mkdir(parents=True, exist_ok=True) location_metadata = db.get_locations_by_max_level(3) region_dict = location_metadata.set_index( "location_name")["super_region_name"].to_dict() title = 'Top 25 Nations by Total GDP' plot_file = plot_dir / "table_1_2017_arrow_diagram.pdf" c = canvas.Canvas(str(plot_file), pagesize=(792.0, 612.0)) # text size and style titletextsize = 12 headertextsize = 10 textsize = 8 textgap = textsize * 2.0 # # write title titley = 625 row1 = titley - (2.0 * textgap) row1_and_ahalf = titley - (3.0 * textgap) row2 = row1 - (2.0 * textgap) c.setFont("Helvetica-Bold", titletextsize) c.drawString(315, titley, "{title}".format(title=title)) # write column headers c.setFont("Helvetica-Bold", textsize) year2017_columnwidth = 100 gap = 500 year2030_columnwidth = 100 year2050_columnwidth = 100 year2100_columnwidth = 100 # set columns widths (counting from left to right) year2017_column = 70 + 3 year2030_column = (year2017_column + year2017_columnwidth + 80) year2050_column = (year2030_column + year2030_columnwidth + 80) year2100_column = (year2050_column + year2050_columnwidth + 80) c.setFont("Helvetica-Bold", headertextsize) # name columns textobject_year2017 = c.beginText(year2017_column + 40, row1_and_ahalf) for line in ["", f"{2017}"]: textobject_year2017.textLine(line) c.drawText(textobject_year2017) textobject_year2030 = c.beginText(year2030_column + 40, row1_and_ahalf) for line in ["", f"{2030}"]: textobject_year2030.textLine(line) c.drawText(textobject_year2030) textobject_year2050 = c.beginText(year2050_column + 40, row1_and_ahalf) for line in ["", f"{2050}"]: textobject_year2050.textLine(line) c.drawText(textobject_year2050) textobject_year2100 = c.beginText(year2100_column + 40, row1_and_ahalf) for line in ["", f"{2100}"]: textobject_year2100.textLine(line) c.drawText(textobject_year2100) # unknown territory total_iter = 1 # country position (after top 25) countryposition_2017 = 26 countryposition_2030 = 26 countryposition_2050 = 26 countryposition_2100 = 26 lineposition_2017 = 26 lineposition_2030 = 26 lineposition_2050 = 26 lineposition_2100 = 26 for index in ranked_data['rank_2017'].unique(): row_data = ranked_data.query( 'rank_2017 == @index').reset_index().iloc[0] rank2017 = row_data['index'] + 1 label2017 = row_data['rank_2017'] rank2030 = row_data['index'] + 1 label2030 = row_data['rank_2030'] rank2050 = row_data['index'] + 1 label2050 = row_data['rank_2050'] rank2100 = row_data['index'] + 1 label2100 = row_data['rank_2100'] c.setFont("Helvetica", textsize) # determine rank change this_rank = label2017 line_start_2017 = data.query( "location_id == @this_rank")['rank_2017'].values[0] line_end_2030 = data.query( "location_id == @this_rank")['rank_2030'].values[0] line_end_2050 = data.query( "location_id == @this_rank")['rank_2050'].values[0] line_end_2100 = data.query( "location_id == @this_rank")['rank_2100'].values[0] # draw rectangles if total_iter < 26: # line style c.setDash(1, 0) # stroke colour c.setStrokeColorRGB(0, 0, 0) # 2017 region = region_dict[label2017] c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2017_column, row2 - (total_iter * textgap) - 2.5, year2017_columnwidth, textsize * 2.0, stroke=1, fill=1) # 2030 region = region_dict[label2030] c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2030_column, row2 - (total_iter * textgap) - 2.5, year2030_columnwidth, textsize * 2.0, stroke=1, fill=1) # 2050 region = region_dict[label2050] c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2050_column, row2 - (total_iter * textgap) - 2.5, year2050_columnwidth, textsize * 2.0, stroke=1, fill=1) # 2100 region = region_dict[label2100] c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2100_column, row2 - (total_iter * textgap) - 2.5, year2100_columnwidth, textsize * 2.0, stroke=1, fill=1) # draw country names c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) if (line_start_2017 > 25 and line_end_2030 < 26): c.drawString(year2017_column + 20, row2 - (countryposition_2017 * textgap) + 2.5, f"{int(rank2017)} {label2017}") countryposition_2017 += 1 if ((line_start_2017 < 26) and (line_end_2030 > 25)): c.drawString(year2030_column + 20, row2 - (countryposition_2030 * textgap) + 2.5, f"{int(line_end_2030)} {label2017}") countryposition_2030 += 1 if ((line_end_2030 > 25) and (line_end_2050 < 26)): label = data.query( "rank_2050 == @line_end_2050")['location_id'].values[0] c.drawString(year2030_column + 20, row2 - (countryposition_2030 * textgap) + 2.5, f"{int(line_end_2030)} {label}") countryposition_2030 += 1 if ((line_end_2030 < 26) and (line_end_2050 > 25)): label = data.query( "rank_2030 == @line_end_2030")['location_id'].values[0] c.drawString(year2050_column + 20, row2 - (countryposition_2050 * textgap) + 2.5, f"{int(line_end_2050)} {label}") countryposition_2050 += 1 if ((line_end_2050 > 25) and (line_end_2100 < 26)): label = data.query( "rank_2100 == @line_end_2100")['location_id'].values[0] c.drawString(year2050_column + 20, row2 - (countryposition_2050 * textgap) + 2.5, f"{int(line_end_2050)} {label}") countryposition_2050 += 1 if ((line_end_2050 < 26) and (line_end_2100 > 25)): label = data.query( "rank_2050 == @line_end_2050")['location_id'].values[0] c.drawString(year2100_column + 20, row2 - (countryposition_2100 * textgap) + 2.5, f"{int(line_end_2100)} {label}") countryposition_2100 += 1 if total_iter < 26: c.drawString(year2017_column + 20, row2 - (total_iter * textgap) + 2.5, f"{rank2017} {label2017}") c.drawString(year2030_column + 20, row2 - (total_iter * textgap) + 2.5, f"{rank2030} {label2030}") c.drawString(year2050_column + 20, row2 - (total_iter * textgap) + 2.5, f"{rank2050} {label2050}") c.drawString(year2100_column + 20, row2 - (total_iter * textgap) + 2.5, f"{rank2100} {label2100}") # determine line type and draw c.setStrokeColorRGB(0, 0, 0) if line_start_2017 > line_end_2030: c.setDash(1, 0) else: c.setDash(3, 1) if (line_start_2017 > 25) and (line_end_2030 < 26): c.line(year2017_column + year2017_columnwidth, row2 - (lineposition_2017 * textgap) + (0.33 * textsize), year2030_column, row2 - (line_end_2030 * textgap) + (0.33 * textsize)) lineposition_2017 += 1 elif (line_start_2017 < 26) and (line_end_2030 > 25): c.line(year2017_column + year2017_columnwidth, row2 - (line_start_2017 * textgap) + (0.33 * textsize), year2030_column, row2 - (lineposition_2030 * textgap) + (0.33 * textsize)) lineposition_2030 += 1 elif (line_start_2017 < 26) or (line_end_2030 < 26): c.line(year2017_column + year2017_columnwidth, row2 - (line_start_2017 * textgap) + (0.33 * textsize), year2030_column, row2 - (line_end_2030 * textgap) + (0.33 * textsize)) # col2-3 if line_end_2030 > line_end_2050: c.setDash(1, 0) else: c.setDash(3, 1) if (line_end_2030 > 25) and (line_end_2050 < 26): c.line(year2030_column + year2030_columnwidth, row2 - (lineposition_2030 * textgap) + (0.33 * textsize), year2050_column, row2 - (line_end_2050 * textgap) + (0.33 * textsize)) lineposition_2030 += 1 elif ((line_end_2030 < 26) and (line_end_2050 > 25)): c.line(year2030_column + year2030_columnwidth, row2 - (line_end_2030 * textgap) + (0.33 * textsize), year2050_column, row2 - (lineposition_2050 * textgap) + (0.33 * textsize)) lineposition_2050 += 1 elif (line_end_2030 < 26) or (line_end_2050 < 26): c.line(year2030_column + year2030_columnwidth, row2 - (line_end_2030 * textgap) + (0.33 * textsize), year2050_column, row2 - (line_end_2050 * textgap) + (0.33 * textsize)) # col3-4 if line_end_2050 > line_end_2100: c.setDash(1, 0) else: c.setDash(3, 1) if ((line_end_2050 > 25) and (line_end_2100 < 26)): c.line(year2050_column + year2050_columnwidth, row2 - (lineposition_2050 * textgap) + (0.33 * textsize), year2100_column, row2 - (line_end_2100 * textgap) + (0.33 * textsize)) lineposition_2050 += 1 elif ((line_end_2050 < 26) and (line_end_2100 > 25)): c.line(year2050_column + year2050_columnwidth, row2 - (line_end_2050 * textgap) + (0.33 * textsize), year2100_column, row2 - (lineposition_2100 * textgap) + (0.33 * textsize)) lineposition_2100 += 1 elif (line_end_2050 < 26) or (line_end_2100 < 26): c.line(year2050_column + year2050_columnwidth, row2 - (line_end_2050 * textgap) + (0.33 * textsize), year2100_column, row2 - (line_end_2100 * textgap) + (0.33 * textsize)) # iterate total_iter = total_iter + 1 # 2017 rect_loc = 31 region = "High-income" c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2017_column, row2 - (rect_loc * textgap) - 2.5, year2017_columnwidth + 20, textsize * 2.0, stroke=1, fill=1) c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) c.drawString(year2017_column + 10, row2 - (rect_loc * textgap) + 2.5, "High-income") region = "Southeast Asia, East Asia, and Oceania" c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2030_column - 60, row2 - (rect_loc * textgap) - 2.5, year2017_columnwidth + 90, textsize * 2.0, stroke=1, fill=1) c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) c.drawString(year2030_column - 50, row2 - (rect_loc * textgap) + 2.5, f"{region}") region = "South Asia" c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2017_column, row2 - ((rect_loc + 1) * textgap) - 2.5, year2017_columnwidth + 20, textsize * 2.0, stroke=1, fill=1) c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) c.drawString(year2017_column + 10, row2 - ((rect_loc + 1) * textgap) + 2.5, "South Asia") region = "Latin America and Caribbean" c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2030_column - 60, row2 - ((rect_loc + 1) * textgap) - 2.5, year2017_columnwidth + 90, textsize * 2.0, stroke=1, fill=1) c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) c.drawString(year2030_column - 50, row2 - ((rect_loc + 1) * textgap) + 2.5, "Latin America and Caribbean") region = "Central Europe, Eastern Europe, and Central Asia" c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2030_column - 60, row2 - ((rect_loc + 2) * textgap) - 2.5, year2017_columnwidth + 90, textsize * 2.0, stroke=1, fill=1) c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) c.drawString(year2030_column - 50, row2 - ((rect_loc + 2) * textgap) + 2.5, "Central Europe, Eastern Europe, and Central Asia") region = "North Africa and Middle East" c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2017_column, row2 - ((rect_loc + 2) * textgap) - 2.5, year2017_columnwidth + 20, textsize * 2.0, stroke=1, fill=1) c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) c.drawString(year2017_column + 10, row2 - ((rect_loc + 2) * textgap) + 2.5, "North Africa and Middle East") region = "Sub-Saharan Africa" c.setFillColorRGB(FILL[region][0], FILL[region][1], FILL[region][2]) c.rect(year2017_column, row2 - ((rect_loc + 3) * textgap) - 2.5, year2017_columnwidth + 20, textsize * 2.0, stroke=1, fill=1) c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setStrokeAlpha(1) c.setFillAlpha(1) c.drawString(year2017_column + 10, row2 - ((rect_loc + 3) * textgap) + 2.5, "Sub-Saharan Africa") c.save()
def run_against(version, pop_version, asfr_version, lifetable_version, migration_version, srb_version, gbd_round_id, location_idx, years, location_id, draws, test=False): """ Takes versions for files, finds the files, and computes future populations. It then saves those files. This is what you call from the pipeline. Args: version (str): Version name for output pop_version (str): Version for population asfr_version (str): version for asfr lifetable_version (list[str]): List of versions for lifetable migration_version (list[str]): List of versions for migration gbd_round_id (int): GBD Round ID, 4 is 2016 location_idx (int|None): Zero-based index into list of locations. years (YearRange): years for past and forecast. location_id (int|None): A location ID. test (bool): Run a reduced subset of locations and draws. Returns: None """ out_path = FBDPath("/{}/future/population/{}".format( gbd_round_id, version)) try: out_path.mkdir(parents=True, exist_ok=True) except OSError as ose: LOGGER.error("Could not create output directory {}: {}".format( out_path, ose)) asfr_lim, lifetable_lim, pop, migration, srb =\ agreement_rules( *read_datasets( asfr_version, gbd_round_id, lifetable_version, pop_version, migration_version, years, srb_version, draws), years ) ruler = timeline(pop.age_group_id.values, asfr_lim.age_group_id.values) locations = pop.location_id.values if location_idx is not None: try: locations = [locations[location_idx]] LOGGER.info("Using location_id {} from location_idx {}".format( locations, location_idx)) except IndexError: LOGGER.warning("Asked for out-of-bounds location {} of {}".format( location_idx, locations.shape[0])) exit(0) # Maybe you ask for 200 jobs but have 195 countries. OK. elif location_id is not None: locations = [location_id] else: locations = pop.location_id.values for location in locations: begin_time = perf_time() loc_idx = dict(location_id=location) future = one_location(pop.loc[loc_idx], asfr_lim.loc[loc_idx], lifetable_lim.loc[loc_idx], migration.loc[loc_idx], srb.loc[loc_idx], ruler, gbd_round_id, years, test) out_name = out_path / "{}.nc".format(location) future.coords["location_id"] = location summary = summarize_pop(future) elapsed = perf_time() - begin_time LOGGER.info("Elapsed {}".format(elapsed)) write_begin = perf_time() save_xr(summary, out_name, metric="number", space="identity", death=version, pop=pop_version, asfr=asfr_version, lifetable=lifetable_version, migration=migration_version, srb=srb_version) LOGGER.info("Wrote {}".format(out_name)) LOGGER.info("Write time Elapsed {}".format(perf_time() - write_begin))