def generate_full_upload_task(self, upstream_tasks): job_hash_name = "full_life_table_upload_{}".format( self.no_shock_death_number_estimate_version) num_cores = 10 m_mem_free = "50G" runfile = "{}/04_compile_upload_results.R".format(self.code_dir) args = [ "--no_shock_death_number_estimate_version", str(self.no_shock_death_number_estimate_version), "--gbd_year", str(self.gbd_year), "--full_life_table_estimate_version", str(self.full_life_table_estimate_version), "--upload_all_lt_params_flag", str(self.upload_all_lt_params_flag) ] argsstr = " ".join(args) command = "{r_shell} {codefile} {passargs}".format( r_shell=self.r_singularity_shell_3501, codefile=runfile, passargs=argsstr) return BashTask(command=command, upstream_tasks=upstream_tasks, name=job_hash_name, num_cores=num_cores, m_mem_free=m_mem_free, max_runtime_seconds=90000, queue="all.q")
def generate_empirical_lt_prep_task(self, upstream_tasks): job_hash_name = "gen_empirical_lts_{}".format(self.version_id) num_cores = 5 m_mem_free = "12G" runfile = "{}/gen_empir_lts.R".format(self.code_dir) args = [ "--version_id", self.version_id, "--apply_outliers", self.apply_outliers, "--mark_best", self.mark_best, "--moving_average_weights", ",".join([str(w) for w in self.moving_average_weights]) ] argsstr = " ".join(['''"{}"'''.format(str(arg)) for arg in args]) command = "{r_shell} {codefile} {passargs}".format( r_shell=self.r_singularity_shell, codefile=runfile, passargs=argsstr) return BashTask(command=command, upstream_tasks=upstream_tasks, name=job_hash_name, num_cores=num_cores, m_mem_free=m_mem_free, max_runtime_seconds=60000, j_resource=True, queue="all.q")
def generate_save_inputs_task(self, upstream_tasks): job_hash_name = "agg_save_inputs_{}".format( self.no_shock_death_number_estimate_version) num_cores = 4 m_mem_free = "36G" runfile = "{}/01_save_inputs.R".format(self.code_dir) args = [ "--no_shock_death_number_estimate_version", str(self.no_shock_death_number_estimate_version), "--population_estimate_version", str(self.population_estimate_version), "--population_single_year_estimate_version", str(self.population_single_year_estimate_version), "--gbd_year", str(self.gbd_year) ] argsstr = " ".join(args) command = "{r_shell} {codefile} {passargs}".format( r_shell=self.r_singularity_shell, codefile=runfile, passargs=argsstr) return BashTask(command=command, upstream_tasks=upstream_tasks, name=job_hash_name, num_cores=num_cores, m_mem_free=m_mem_free, max_runtime_seconds=9000, j_resource=True, queue="all.q")
def generate_full_lt_task(self, upstream_tasks, loc): job_hash_name = "full_lt_{loc}_{version}".format( loc=loc, version=self.no_shock_death_number_estimate_version) num_cores = 3 m_mem_free = "30G" runfile = "{}/02_full_lt.R".format(self.code_dir) args = [ "--no_shock_death_number_estimate_version", str(self.no_shock_death_number_estimate_version), "--mlt_life_table_estimate_version", str(self.mlt_life_table_estimate_version), "--hiv_run", self.hiv_run_name, "--loc", str(loc), "--shock_aggregator_version", str(self.shock_aggregator_version), "--gbd_year", str(self.gbd_year), "--enable_assertions_flag", str(self.enable_assertions_flag) ] argsstr = " ".join(args) command = "{r_shell} {codefile} {passargs}".format( r_shell=self.r_singularity_shell, codefile=runfile, passargs=argsstr) return BashTask(command=command, upstream_tasks=upstream_tasks, name=job_hash_name, num_cores=num_cores, m_mem_free=m_mem_free, max_runtime_seconds=90000, j_resource=True, queue="all.q")
def generate_notify_task(self, upstream_tasks): job_hash_name = "mlt_notify" num_cores = 1 m_mem_free = "2G" runfile = "{}/mlt_send_notification.R".format(self.code_dir) args = [ "--version_id", str(self.version_id), "--slack_username", self.slack_username if self.slack_username else self.user ] argsstr = " ".join(args) command = "{r_shell} {codefile} {passargs}".format( r_shell=self.r_singularity_shell, codefile=runfile, passargs=argsstr) return BashTask(command=command, upstream_tasks=upstream_tasks, name=job_hash_name, num_cores=num_cores, max_attempts=1, m_mem_free=m_mem_free, max_runtime_seconds=1200, queue="all.q")
def add_state_counties(self): interpreter = '/ihme/code/beatrixh/miniconda/envs/pyomo/bin/python' script = '/ihme/code/beatrixh/microsim_2020/census_2020/synthetic_pop/binning/count_bins_head.py' for state in self.states: counties = self.county_dict[state] #grab counties per state for county in counties: tracts = self.tract_dict[(state, county)] #grab tracts per county rtime = len(tracts) * 2 #expected runtime rtime = rtime + 600 # buffer args = state + ' ' + str(county) cmd = interpreter + ' ' + script + ' ' + args task = BashTask(cmd, name='bin_and_calc_n_hi_{}_{}'.format( state, county), num_cores=1, m_mem_free=10, max_attempts=3, max_runtime_seconds=rtime, resource_scales={ 'm_mem_free': 0.3, 'max_runtime_seconds': 2.0 }, queue='long.q') self.wflow.add_task(task) print("added {}".format(task.name))
def generate_compile_upload_task(self, upstream_tasks): job_hash_name = "mlt_compile_upload" num_cores = 15 m_mem_free = "40G" runfile = "{}/03_compile_upload.R".format(self.code_dir) args = [ "--version_id", str(self.version_id), "--mlt_envelope_version", str(self.mlt_envelope_version), "--map_estimate_version", str(self.map_estimate_version), "--gbd_year", str(self.gbd_year), "--mark_best", str(self.mark_best) ] argsstr = " ".join(args) command = "{r_shell} {codefile} {passargs}".format( r_shell=self.r_singularity_shell, codefile=runfile, passargs=argsstr) return BashTask(command=command, upstream_tasks=upstream_tasks, name=job_hash_name, num_cores=num_cores, max_attempts=2, j_resource=True, m_mem_free=m_mem_free, max_runtime_seconds=30800, queue="all.q")
def generate_scaling_task(self, upstream_tasks, year): job_hash_name = "mlt_scale_agg_{}".format(year) num_cores = 10 m_mem_free = "90G" runfile = "{}/02_scale_results.R".format(self.code_dir) args = [ "--version_id", str(self.version_id), "--year", str(year), "--age_sex_estimate_version", str(self.age_sex_estimate_version), "--gbd_year", str(self.gbd_year) ] argsstr = " ".join(args) command = "{r_shell} {codefile} {passargs}".format( r_shell=self.r_singularity_shell, codefile=runfile, passargs=argsstr) return BashTask(command=command, upstream_tasks=upstream_tasks, name=job_hash_name, num_cores=num_cores, m_mem_free=m_mem_free, max_runtime_seconds=30000, queue="all.q")
def generate_gen_lt_task(self, upstream_tasks, loc): job_hash_name = "mlt_lt_generation_{}".format(loc) num_cores = 5 m_mem_free = "30G" runfile = "{}/01_gen_lts.R".format(self.code_dir) args = [ "--version_id", str(self.version_id), "--country", loc, "--spectrum_name", self.spectrum_name, "--estimate_45q15_version", str(self.estimate_45q15_version), "--estimate_5q0_version", str(self.estimate_5q0_version), "--age_sex_estimate_version", str(self.age_sex_estimate_version), "--u5_envelope_version", str(self.u5_envelope_estimate_version), "--lt_empirical_data_version", str(self.lt_empirical_data_version), "--gbd_year", str(self.gbd_year) ] argsstr = " ".join(args) command = "{r_shell} {codefile} {passargs}".format( r_shell=self.r_singularity_shell, codefile=runfile, passargs=argsstr) return BashTask(command=command, upstream_tasks=upstream_tasks, name=job_hash_name, num_cores=num_cores, m_mem_free=m_mem_free, max_runtime_seconds=10800, queue="all.q")
def generate_prep_task(self, upstream_tasks): job_hash_name = "mlt_prep" num_cores = 2 m_mem_free = "10G" runfile = "{}/00_prep_mlt.R".format(self.code_dir) args = [ "--spectrum_name", self.spectrum_name, "--start", str(self.start), "--end", str(self.end), "--file_del", self.file_del, "--gbd_year", str(self.gbd_year), "--version_id", str(self.version_id), "--lt_empirical_data_version", str(self.lt_empirical_data_version), "--mlt_envelope_version", str(self.mlt_envelope_version), "--map_estimate_version", str(self.map_estimate_version) ] argsstr = " ".join(args) command = "{r_shell} {codefile} {passargs}".format( r_shell=self.r_singularity_shell, codefile=runfile, passargs=argsstr) return BashTask(command=command, upstream_tasks=upstream_tasks, name=job_hash_name, num_cores=num_cores, m_mem_free=m_mem_free, max_runtime_seconds=3600, j_resource=True, queue="all.q")
def generate_mv_plots_task(self, upstream_tasks, loc): job_hash_name = "save_mv_plots_{}_{}".format(self.version_id, loc) num_cores = 2 m_mem_free = "10G" runfile = "{}/mv_input_plots_child.R".format(self.code_dir) args = ["--version_id", self.version_id, "--loc", loc] argsstr = " ".join(['''"{}"'''.format(str(arg)) for arg in args]) command = "{r_shell} {codefile} {passargs}".format( r_shell=self.r_singularity_shell, codefile=runfile, passargs=argsstr) return BashTask(command=command, upstream_tasks=upstream_tasks, name=job_hash_name, num_cores=num_cores, m_mem_free=m_mem_free, max_runtime_seconds=60000, j_resource=True, queue="all.q")
def add_states(self): interpreter = '/ihme/code/beatrixh/miniconda/envs/pyomo/bin/python' script = '/ihme/code/beatrixh/microsim_2020/census_2020/synthetic_pop/gen_synth_pop/identify_pop_zero_tracts.py' for state in self.states: args = state cmd = interpreter + ' ' + script + ' ' + args task = BashTask(cmd, name='find_pop_zero_tracts_{}'.format(state), num_cores=1, m_mem_free=10, max_attempts=3, max_runtime_seconds=60 * 10, resource_scales={ 'm_mem_free': 0.3, 'max_runtime_seconds': 2.0 }, queue='all.q') self.wflow.add_task(task) print("added {}".format(task.name))
def generate_select_lts_task(self, upstream_tasks, run_mv): job_hash_name = "select_lts_{}".format(self.version_id) num_cores = 2 m_mem_free = "50G" runfile = "{}/select_lts.R".format(self.code_dir) args = [ "--version_id", self.version_id, "--mark_best", self.mark_best, "--apply_outliers", self.apply_outliers, "--run_mv", run_mv ] argsstr = " ".join(['''"{}"'''.format(str(arg)) for arg in args]) command = "{r_shell} {codefile} {passargs}".format( r_shell=self.r_singularity_shell, codefile=runfile, passargs=argsstr) return BashTask(command=command, upstream_tasks=upstream_tasks, name=job_hash_name, num_cores=num_cores, m_mem_free=m_mem_free, max_runtime_seconds=60000, j_resource=False, queue="all.q")
def generate_aggregate_lt_task(self, upstream_tasks, loc, lt_type, num_children): job_hash_name = "agg_full_lts_{loc}_{lt_type}_{version}".format( loc=loc, lt_type=lt_type, version=self.no_shock_death_number_estimate_version) if num_children < 10: num_cores = 10 m_mem_free = "100G" elif num_children >= 10 and num_children < 50: num_cores = 20 m_mem_free = "300G" else: num_cores = 30 m_mem_free = "500G" runfile = "{}/03_aggregate_lts.R".format(self.code_dir) args = [ "--no_shock_death_number_estimate_version", str(self.no_shock_death_number_estimate_version), "--loc", str(loc), "--lt_type", lt_type, "--gbd_year", str(self.gbd_year), "--enable_assertions_flag", str(self.enable_assertions_flag) ] argsstr = " ".join(args) command = "{r_shell} {codefile} {passargs}".format( r_shell=self.r_singularity_shell_3501, codefile=runfile, passargs=argsstr) return BashTask(command=command, upstream_tasks=upstream_tasks, name=job_hash_name, num_cores=num_cores, m_mem_free=m_mem_free, max_runtime_seconds=90000, j_resource=True, queue="all.q")
def create_stage1_jobs(self): """First set of tasks, thus no upstream tasks. Only run stage1 if no custom stage1 (custom_stage1) estimates. """ for ko in list(range(0, self.holdouts + 1)): # ie shell, script, and args pasted together model_root = os.path.join(paths.CODE_ROOT, 'model') cmd = (f'{RSHELL} -s {STAGE1_SCRIPT} ' f'{self.output_path} {model_root} {ko}') task = BashTask(command=cmd, name=f'stage1_{self.run_id}_{ko}', num_cores=1, m_mem_free='3G', max_attempts=2, max_runtime_seconds=300, tag='stgpr_stage1', queue='all.q', resource_scales=RESOURCE_SCALES, hard_limits=True) self.workflow.add_task(task) self.stage1_jobs[task.name] = task
def main() -> None: args = parse_args() user = getpass.getuser() today_string = datetime.date.today().strftime('%m%d%y') workflow = Workflow( workflow_args=f'anemia_causal_attribution_new_{args.decomp_step}_{today_string}', name=f'anemia_causal_attribution_{args.decomp_step}_{today_string}', description=f'Anemia: Causal attribution for decomp {args.decomp_step}', project="proj_anemia", stderr="FILEPATH", stdout="FILEPATH", working_dir=path_to_directory, resume=True) causal_attribution_tasks = [] demo = get_demographics("epi", gbd_round_id=args.gbd_round_id) for location_id in demo['location_id']: prev_year_task = None for year in args.year_id: cmd = ( f'FILEPATH ' f'FILEPATH ' f'FILEPATH ' f'{location_id} {year} {args.gbd_round_id} {args.decomp_step} ' f'{path_to_directory}/ {args.out_dir}' ) if prev_year_task: task = BashTask( command=cmd, name=f'causal_attribution_{location_id}_{year}', tag='causal_attribution', upstream_tasks=[prev_year_task], num_cores=1, m_mem_free='4G', max_attempts=3, max_runtime_seconds=60*60*2, queue='all.q') else: task = BashTask( command=cmd, name=f'causal_attribution_{location_id}_{year}', tag='causal_attribution', num_cores=1, m_mem_free='4G', max_attempts=3, max_runtime_seconds=60*60*2, queue='all.q') causal_attribution_tasks.append(task) prev_year_task = task workflow.add_tasks(causal_attribution_tasks) # once the draws exist, save results meids = pd.read_excel("FILEPATH") meids = meids.filter(like='modelable_entity').values.flatten() for modelable_entity_id in meids.tolist(): task = PythonTask( script="FILEPATH", args=[ "--modelable_entity_id", modelable_entity_id, "--year_id", " ".join([str(yr) for yr in args.year_id]), "--gbd_round_id", args.gbd_round_id, "--decomp_step", args.decomp_step, "--save_dir", "FILEPATH" ], name=f"save_{modelable_entity_id}", tag="save", upstream_tasks=causal_attribution_tasks, num_cores=8, m_mem_free="100G", max_attempts=3, max_runtime_seconds=60*60*24, queue='all.q') workflow.add_task(task) status = workflow.run() print(f'Workflow finished with status {status}')