def run_pipeline_upload(como_dir, component, location_id): cv = ComoVersion(como_dir) cv.load_cache() if component == 'cause': upload_cause_summaries(cv, location_id) elif component == 'sequela': upload_sequela_summaries(cv, location_id) elif component == 'impairment': upload_rei_summaries(cv, location_id) elif component == "injuries": upload_inj_summaries(cv, location_id)
def run_pipeline_aggregate_locations(como_dir, component, year_id, sex_id, measure_id, location_set_id): # resume the como version with stored parameters cv = ComoVersion(como_dir) cv.load_cache() if component == "cause": agg_causes(cv, year_id, sex_id, measure_id, location_set_id) if component == "sequela": agg_sequelae(cv, year_id, sex_id, measure_id, location_set_id) if component == "impairment": agg_impairment(cv, year_id, sex_id, measure_id, location_set_id) if component == "injuries": agg_injuries(cv, year_id, sex_id, measure_id, location_set_id)
def run_pipeline_nonfatal(como_dir, location_id=[], year_id=[], sex_id=[], age_group_id=[], measure_id=[], n_processes=23, n_simulants=40000, *args, **kwargs): """run the nonfatal calculation on most detailed demographics Args: como_dir (str): location_id (list, optional): year_id (list, optional): sex_id (list, optional): measure_id (list, optional): n_processes (int, optional): n_simulants (int, optional): *args and **kwargs are passed into the simulation as parameters """ # resume the como version with stored parameters cv = ComoVersion(como_dir) cv.load_cache() # set up the nonfatal computation object for our demographic set cnf = ComputeNonfatal(cv, location_id=location_id, year_id=year_id, sex_id=sex_id, age_group_id=age_group_id, measure_id=measure_id) # import data cnf.import_data(n_processes=n_processes) # compute all results cnf.compute_results(n_simulants=n_simulants, n_processes=n_processes, *args, **kwargs) # write results to disk cnf.write_results()
def run_como( como_dir=None, root_dir="FILEPATH", gbd_round_id=5, location_set_id=35, year_id=list(range(1990, 2018)), measure_id=[3, 5, 6], n_draws=1000, n_simulants=20000, components=["cause", "sequela", "injuries", "impairment"], change_years=[(1990, 2007), (2007, 2017), (1990, 2017)], agg_loc_sets=[35, 83], project="proj_como"): special_sets = set(agg_loc_sets) - set([location_set_id]) all_sets = set(agg_loc_sets) | set([location_set_id]) if como_dir is not None: cv = ComoVersion(como_dir) cv.load_cache() else: cv = ComoVersion.new( root_dir, gbd_round_id, location_set_id, year_id, measure_id, n_draws, components, change_years, special_sets) cwf = ComoWorkFlow(cv) cwf.add_tasks_to_dag(n_simulants=n_simulants, agg_loc_sets=all_sets) if cwf.run_workflow(project=project): all_locs = [] for location_set_id in all_sets: loc_tree = loctree(location_set_id=location_set_id, gbd_round_id=cv.gbd_round_id) all_locs.extend(loc_tree.node_ids) all_locs = list(set(all_locs)) run_upload(cv, all_locs) else: raise RuntimeError("como unsuccessful")
"sex_id": dimensions.index_dim.get_level("sex_id") }, n_processes=self.chunksize[component]) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Compute nonfatal aggregate for a year-sex-measure") parser.add_argument("--como_dir", type=str, help="directory of como run") parser.add_argument("--component", type=str, help="which component to aggregate") parser.add_argument("--year_id", type=int, help="year_id to aggregate") parser.add_argument("--sex_id", type=int, help="sex_id to aggregate") parser.add_argument("--measure_id", type=int, help="measure_id to aggregate") parser.add_argument("--location_set_version_id", type=int, help="location_set_version_id to aggregate") parser.add_argument("--redis_host", type=str, help="redis_host to manage concurrent I/O") args = parser.parse_args() cv = ComoVersion(args.como_dir) cv.load_cache() task = LocationAggTask(cv, args.measure_id, args.year_id, args.sex_id, args.redis_host) task.run_task(args.location_set_version_id, args.component)
def main(root_j_dir, root_tmp_dir, date, code_dir, in_dir, out_dir, ndraws, demographics, task_id): dems = demographics.ix[(demographics["task_id"] == task_id)] # subset based on task id to the demographic arguments location_id = np.asscalar(dems["location_id"].iloc[0]) year_id = np.asscalar(dems["year_id"].iloc[0]) sex_id = np.asscalar(dems["sex_id"].iloc[0]) # import hierarchies from Como from como.version import ComoVersion cv = ComoVersion("FILEPATH") cv.load_cache() # get dimensions and replace with what we are parallelizing # in this child script print "Copying dimensions from cv" dim = deepcopy(cv.dimensions) dim.index_dim.replace_level("location_id", location_id) dim.index_dim.replace_level("year_id", year_id) dim.index_dim.replace_level("sex_id", sex_id) print "Adding cause to dimensions" # add cause to dimensions dim.index_dim.add_level("cause_id", cv.cause_restrictions.cause_id.unique().tolist()) dim.index_dim.add_level("rei_id", cv.ncode_hierarchy.rei_id.unique().tolist()) # set the years so that we always have 2005 to calibrate years = list( set( cap_val(dim.index_dim.levels.year_id, [1990, 1995, 2000, 2005, 2010, 2016]) + [2005])) print "Years" print years # get all E-N combinations to use to make square data codes = pd.read_csv(os.path.join(code_dir, "FILEPATH.csv")) # Get Incidence df df_inc = get_incidence(dim, cv, years, codes, ndraws=ndraws) df_inc_agg = compute_aggregates(df_inc, dim, cv) assert len( df_inc_agg.index ) == 50922, "The number of rows in the injized DF is not correct." assert -df_inc_agg.duplicated(subset=[ 'location_id', 'year_id', 'age_group_id', 'sex_id', 'measure_id', 'cause_id', 'rei_id' ]).any(), "The id columns do not uniquely identify the observations!" assert -df_inc_agg.isnull().any().any( ), "You have null values in the prevalence DF!" print "Writing results for incidence" write_result_draws(df_inc_agg, measure_id=6, dim=dim, out_dir=out_dir) # Get Prevalence df df_prev = get_prevalence(dim, cv, years, codes, ndraws=ndraws) df_prev_agg = compute_aggregates(df_prev, dim, cv) assert len( df_inc_agg.index ) == 50922, "The number of rows in the injized DF is not correct." assert -df_prev_agg.duplicated(subset=[ 'location_id', 'year_id', 'age_group_id', 'sex_id', 'measure_id', 'cause_id', 'rei_id' ]).any(), "The id columns do not uniquely identify the observations!" assert -df_prev_agg.isnull().any().any( ), "You have null values in the prevalence DF!" print "Writing results for prevalence" write_result_draws(df_prev_agg, measure_id=5, dim=dim, out_dir=out_dir) # save checkfile when all done checkpath = os.path.join( "FILEPATH", "finished_{}_{}_{}.txt".format(location_id, year_id, sex_id)) tmp = open(checkpath, 'wb') tmp.close()
def run_pipeline_summarize(como_dir, component, location_id): cv = ComoVersion(como_dir) cv.load_cache() summ(cv, location_id, component)