예제 #1
0
    def generate_full_upload_task(self, upstream_tasks):
        job_hash_name = "full_life_table_upload_{}".format(
            self.no_shock_death_number_estimate_version)
        num_cores = 10
        m_mem_free = "50G"

        runfile = "{}/04_compile_upload_results.R".format(self.code_dir)
        args = [
            "--no_shock_death_number_estimate_version",
            str(self.no_shock_death_number_estimate_version), "--gbd_year",
            str(self.gbd_year), "--full_life_table_estimate_version",
            str(self.full_life_table_estimate_version),
            "--upload_all_lt_params_flag",
            str(self.upload_all_lt_params_flag)
        ]
        argsstr = " ".join(args)

        command = "{r_shell} {codefile} {passargs}".format(
            r_shell=self.r_singularity_shell_3501,
            codefile=runfile,
            passargs=argsstr)

        return BashTask(command=command,
                        upstream_tasks=upstream_tasks,
                        name=job_hash_name,
                        num_cores=num_cores,
                        m_mem_free=m_mem_free,
                        max_runtime_seconds=90000,
                        queue="all.q")
예제 #2
0
    def generate_empirical_lt_prep_task(self, upstream_tasks):
        job_hash_name = "gen_empirical_lts_{}".format(self.version_id)
        num_cores = 5
        m_mem_free = "12G"

        runfile = "{}/gen_empir_lts.R".format(self.code_dir)
        args = [
            "--version_id", self.version_id, "--apply_outliers",
            self.apply_outliers, "--mark_best", self.mark_best,
            "--moving_average_weights",
            ",".join([str(w) for w in self.moving_average_weights])
        ]
        argsstr = " ".join(['''"{}"'''.format(str(arg)) for arg in args])

        command = "{r_shell} {codefile} {passargs}".format(
            r_shell=self.r_singularity_shell,
            codefile=runfile,
            passargs=argsstr)

        return BashTask(command=command,
                        upstream_tasks=upstream_tasks,
                        name=job_hash_name,
                        num_cores=num_cores,
                        m_mem_free=m_mem_free,
                        max_runtime_seconds=60000,
                        j_resource=True,
                        queue="all.q")
예제 #3
0
    def generate_save_inputs_task(self, upstream_tasks):
        job_hash_name = "agg_save_inputs_{}".format(
            self.no_shock_death_number_estimate_version)
        num_cores = 4
        m_mem_free = "36G"

        runfile = "{}/01_save_inputs.R".format(self.code_dir)
        args = [
            "--no_shock_death_number_estimate_version",
            str(self.no_shock_death_number_estimate_version),
            "--population_estimate_version",
            str(self.population_estimate_version),
            "--population_single_year_estimate_version",
            str(self.population_single_year_estimate_version), "--gbd_year",
            str(self.gbd_year)
        ]
        argsstr = " ".join(args)

        command = "{r_shell} {codefile} {passargs}".format(
            r_shell=self.r_singularity_shell,
            codefile=runfile,
            passargs=argsstr)

        return BashTask(command=command,
                        upstream_tasks=upstream_tasks,
                        name=job_hash_name,
                        num_cores=num_cores,
                        m_mem_free=m_mem_free,
                        max_runtime_seconds=9000,
                        j_resource=True,
                        queue="all.q")
예제 #4
0
    def generate_full_lt_task(self, upstream_tasks, loc):
        job_hash_name = "full_lt_{loc}_{version}".format(
            loc=loc, version=self.no_shock_death_number_estimate_version)
        num_cores = 3
        m_mem_free = "30G"

        runfile = "{}/02_full_lt.R".format(self.code_dir)
        args = [
            "--no_shock_death_number_estimate_version",
            str(self.no_shock_death_number_estimate_version),
            "--mlt_life_table_estimate_version",
            str(self.mlt_life_table_estimate_version), "--hiv_run",
            self.hiv_run_name, "--loc",
            str(loc), "--shock_aggregator_version",
            str(self.shock_aggregator_version), "--gbd_year",
            str(self.gbd_year), "--enable_assertions_flag",
            str(self.enable_assertions_flag)
        ]
        argsstr = " ".join(args)

        command = "{r_shell} {codefile} {passargs}".format(
            r_shell=self.r_singularity_shell,
            codefile=runfile,
            passargs=argsstr)

        return BashTask(command=command,
                        upstream_tasks=upstream_tasks,
                        name=job_hash_name,
                        num_cores=num_cores,
                        m_mem_free=m_mem_free,
                        max_runtime_seconds=90000,
                        j_resource=True,
                        queue="all.q")
예제 #5
0
    def generate_notify_task(self, upstream_tasks):
        job_hash_name = "mlt_notify"
        num_cores = 1
        m_mem_free = "2G"
        runfile = "{}/mlt_send_notification.R".format(self.code_dir)
        args = [
            "--version_id",
            str(self.version_id), "--slack_username",
            self.slack_username if self.slack_username else self.user
        ]
        argsstr = " ".join(args)

        command = "{r_shell} {codefile} {passargs}".format(
            r_shell=self.r_singularity_shell,
            codefile=runfile,
            passargs=argsstr)

        return BashTask(command=command,
                        upstream_tasks=upstream_tasks,
                        name=job_hash_name,
                        num_cores=num_cores,
                        max_attempts=1,
                        m_mem_free=m_mem_free,
                        max_runtime_seconds=1200,
                        queue="all.q")
예제 #6
0
    def add_state_counties(self):

        interpreter = '/ihme/code/beatrixh/miniconda/envs/pyomo/bin/python'
        script = '/ihme/code/beatrixh/microsim_2020/census_2020/synthetic_pop/binning/count_bins_head.py'
        for state in self.states:
            counties = self.county_dict[state]  #grab counties per state
            for county in counties:
                tracts = self.tract_dict[(state,
                                          county)]  #grab tracts per county
                rtime = len(tracts) * 2  #expected runtime
                rtime = rtime + 600  # buffer
                args = state + ' ' + str(county)
                cmd = interpreter + ' ' + script + ' ' + args
                task = BashTask(cmd,
                                name='bin_and_calc_n_hi_{}_{}'.format(
                                    state, county),
                                num_cores=1,
                                m_mem_free=10,
                                max_attempts=3,
                                max_runtime_seconds=rtime,
                                resource_scales={
                                    'm_mem_free': 0.3,
                                    'max_runtime_seconds': 2.0
                                },
                                queue='long.q')
                self.wflow.add_task(task)
                print("added {}".format(task.name))
예제 #7
0
    def generate_compile_upload_task(self, upstream_tasks):
        job_hash_name = "mlt_compile_upload"
        num_cores = 15
        m_mem_free = "40G"
        runfile = "{}/03_compile_upload.R".format(self.code_dir)
        args = [
            "--version_id",
            str(self.version_id), "--mlt_envelope_version",
            str(self.mlt_envelope_version), "--map_estimate_version",
            str(self.map_estimate_version), "--gbd_year",
            str(self.gbd_year), "--mark_best",
            str(self.mark_best)
        ]
        argsstr = " ".join(args)

        command = "{r_shell} {codefile} {passargs}".format(
            r_shell=self.r_singularity_shell,
            codefile=runfile,
            passargs=argsstr)

        return BashTask(command=command,
                        upstream_tasks=upstream_tasks,
                        name=job_hash_name,
                        num_cores=num_cores,
                        max_attempts=2,
                        j_resource=True,
                        m_mem_free=m_mem_free,
                        max_runtime_seconds=30800,
                        queue="all.q")
예제 #8
0
    def generate_scaling_task(self, upstream_tasks, year):
        job_hash_name = "mlt_scale_agg_{}".format(year)
        num_cores = 10
        m_mem_free = "90G"
        runfile = "{}/02_scale_results.R".format(self.code_dir)
        args = [
            "--version_id",
            str(self.version_id), "--year",
            str(year), "--age_sex_estimate_version",
            str(self.age_sex_estimate_version), "--gbd_year",
            str(self.gbd_year)
        ]
        argsstr = " ".join(args)

        command = "{r_shell} {codefile} {passargs}".format(
            r_shell=self.r_singularity_shell,
            codefile=runfile,
            passargs=argsstr)

        return BashTask(command=command,
                        upstream_tasks=upstream_tasks,
                        name=job_hash_name,
                        num_cores=num_cores,
                        m_mem_free=m_mem_free,
                        max_runtime_seconds=30000,
                        queue="all.q")
예제 #9
0
    def generate_gen_lt_task(self, upstream_tasks, loc):
        job_hash_name = "mlt_lt_generation_{}".format(loc)
        num_cores = 5
        m_mem_free = "30G"
        runfile = "{}/01_gen_lts.R".format(self.code_dir)
        args = [
            "--version_id",
            str(self.version_id), "--country", loc, "--spectrum_name",
            self.spectrum_name, "--estimate_45q15_version",
            str(self.estimate_45q15_version), "--estimate_5q0_version",
            str(self.estimate_5q0_version), "--age_sex_estimate_version",
            str(self.age_sex_estimate_version), "--u5_envelope_version",
            str(self.u5_envelope_estimate_version),
            "--lt_empirical_data_version",
            str(self.lt_empirical_data_version), "--gbd_year",
            str(self.gbd_year)
        ]
        argsstr = " ".join(args)

        command = "{r_shell} {codefile} {passargs}".format(
            r_shell=self.r_singularity_shell,
            codefile=runfile,
            passargs=argsstr)

        return BashTask(command=command,
                        upstream_tasks=upstream_tasks,
                        name=job_hash_name,
                        num_cores=num_cores,
                        m_mem_free=m_mem_free,
                        max_runtime_seconds=10800,
                        queue="all.q")
예제 #10
0
    def generate_prep_task(self, upstream_tasks):
        job_hash_name = "mlt_prep"
        num_cores = 2
        m_mem_free = "10G"
        runfile = "{}/00_prep_mlt.R".format(self.code_dir)
        args = [
            "--spectrum_name", self.spectrum_name, "--start",
            str(self.start), "--end",
            str(self.end), "--file_del", self.file_del, "--gbd_year",
            str(self.gbd_year), "--version_id",
            str(self.version_id), "--lt_empirical_data_version",
            str(self.lt_empirical_data_version), "--mlt_envelope_version",
            str(self.mlt_envelope_version), "--map_estimate_version",
            str(self.map_estimate_version)
        ]
        argsstr = " ".join(args)

        command = "{r_shell} {codefile} {passargs}".format(
            r_shell=self.r_singularity_shell,
            codefile=runfile,
            passargs=argsstr)

        return BashTask(command=command,
                        upstream_tasks=upstream_tasks,
                        name=job_hash_name,
                        num_cores=num_cores,
                        m_mem_free=m_mem_free,
                        max_runtime_seconds=3600,
                        j_resource=True,
                        queue="all.q")
예제 #11
0
 def generate_mv_plots_task(self, upstream_tasks, loc):
     job_hash_name = "save_mv_plots_{}_{}".format(self.version_id, loc)
     num_cores = 2
     m_mem_free = "10G"
     runfile = "{}/mv_input_plots_child.R".format(self.code_dir)
     args = ["--version_id", self.version_id, "--loc", loc]
     argsstr = " ".join(['''"{}"'''.format(str(arg)) for arg in args])
     command = "{r_shell} {codefile} {passargs}".format(
         r_shell=self.r_singularity_shell,
         codefile=runfile,
         passargs=argsstr)
     return BashTask(command=command,
                     upstream_tasks=upstream_tasks,
                     name=job_hash_name,
                     num_cores=num_cores,
                     m_mem_free=m_mem_free,
                     max_runtime_seconds=60000,
                     j_resource=True,
                     queue="all.q")
예제 #12
0
    def add_states(self):

        interpreter = '/ihme/code/beatrixh/miniconda/envs/pyomo/bin/python'
        script = '/ihme/code/beatrixh/microsim_2020/census_2020/synthetic_pop/gen_synth_pop/identify_pop_zero_tracts.py'
        for state in self.states:
            args = state
            cmd = interpreter + ' ' + script + ' ' + args
            task = BashTask(cmd,
                            name='find_pop_zero_tracts_{}'.format(state),
                            num_cores=1,
                            m_mem_free=10,
                            max_attempts=3,
                            max_runtime_seconds=60 * 10,
                            resource_scales={
                                'm_mem_free': 0.3,
                                'max_runtime_seconds': 2.0
                            },
                            queue='all.q')
            self.wflow.add_task(task)
            print("added {}".format(task.name))
예제 #13
0
 def generate_select_lts_task(self, upstream_tasks, run_mv):
     job_hash_name = "select_lts_{}".format(self.version_id)
     num_cores = 2
     m_mem_free = "50G"
     runfile = "{}/select_lts.R".format(self.code_dir)
     args = [
         "--version_id", self.version_id, "--mark_best", self.mark_best,
         "--apply_outliers", self.apply_outliers, "--run_mv", run_mv
     ]
     argsstr = " ".join(['''"{}"'''.format(str(arg)) for arg in args])
     command = "{r_shell} {codefile} {passargs}".format(
         r_shell=self.r_singularity_shell,
         codefile=runfile,
         passargs=argsstr)
     return BashTask(command=command,
                     upstream_tasks=upstream_tasks,
                     name=job_hash_name,
                     num_cores=num_cores,
                     m_mem_free=m_mem_free,
                     max_runtime_seconds=60000,
                     j_resource=False,
                     queue="all.q")
예제 #14
0
    def generate_aggregate_lt_task(self, upstream_tasks, loc, lt_type,
                                   num_children):
        job_hash_name = "agg_full_lts_{loc}_{lt_type}_{version}".format(
            loc=loc,
            lt_type=lt_type,
            version=self.no_shock_death_number_estimate_version)
        if num_children < 10:
            num_cores = 10
            m_mem_free = "100G"
        elif num_children >= 10 and num_children < 50:
            num_cores = 20
            m_mem_free = "300G"
        else:
            num_cores = 30
            m_mem_free = "500G"

        runfile = "{}/03_aggregate_lts.R".format(self.code_dir)
        args = [
            "--no_shock_death_number_estimate_version",
            str(self.no_shock_death_number_estimate_version), "--loc",
            str(loc), "--lt_type", lt_type, "--gbd_year",
            str(self.gbd_year), "--enable_assertions_flag",
            str(self.enable_assertions_flag)
        ]
        argsstr = " ".join(args)

        command = "{r_shell} {codefile} {passargs}".format(
            r_shell=self.r_singularity_shell_3501,
            codefile=runfile,
            passargs=argsstr)

        return BashTask(command=command,
                        upstream_tasks=upstream_tasks,
                        name=job_hash_name,
                        num_cores=num_cores,
                        m_mem_free=m_mem_free,
                        max_runtime_seconds=90000,
                        j_resource=True,
                        queue="all.q")
예제 #15
0
    def create_stage1_jobs(self):
        """First set of tasks, thus no upstream tasks.
        Only run stage1 if no custom stage1 (custom_stage1)
        estimates. """
        for ko in list(range(0, self.holdouts + 1)):

            # ie shell, script, and args pasted together
            model_root = os.path.join(paths.CODE_ROOT, 'model')
            cmd = (f'{RSHELL} -s {STAGE1_SCRIPT} '
                   f'{self.output_path} {model_root} {ko}')

            task = BashTask(command=cmd,
                            name=f'stage1_{self.run_id}_{ko}',
                            num_cores=1,
                            m_mem_free='3G',
                            max_attempts=2,
                            max_runtime_seconds=300,
                            tag='stgpr_stage1',
                            queue='all.q',
                            resource_scales=RESOURCE_SCALES,
                            hard_limits=True)

            self.workflow.add_task(task)
            self.stage1_jobs[task.name] = task
예제 #16
0
def main() -> None:
    args = parse_args()
    user = getpass.getuser()
    today_string = datetime.date.today().strftime('%m%d%y')
    workflow = Workflow(
        workflow_args=f'anemia_causal_attribution_new_{args.decomp_step}_{today_string}',
        name=f'anemia_causal_attribution_{args.decomp_step}_{today_string}',
        description=f'Anemia: Causal attribution for decomp {args.decomp_step}',
        project="proj_anemia",
        stderr="FILEPATH",
        stdout="FILEPATH",
        working_dir=path_to_directory,
        resume=True)

    causal_attribution_tasks = []
    demo = get_demographics("epi", gbd_round_id=args.gbd_round_id)
    for location_id in demo['location_id']:
        prev_year_task = None
        for year in args.year_id:
            cmd = (
                f'FILEPATH '
                f'FILEPATH '
                f'FILEPATH '
                f'{location_id} {year} {args.gbd_round_id} {args.decomp_step} '
                f'{path_to_directory}/ {args.out_dir}'
            )
            if prev_year_task:
                task = BashTask(
                    command=cmd,
                    name=f'causal_attribution_{location_id}_{year}',
                    tag='causal_attribution',
                    upstream_tasks=[prev_year_task],
                    num_cores=1,
                    m_mem_free='4G',
                    max_attempts=3,
                    max_runtime_seconds=60*60*2,
                    queue='all.q')
            else:
                task = BashTask(
                    command=cmd,
                    name=f'causal_attribution_{location_id}_{year}',
                    tag='causal_attribution',
                    num_cores=1,
                    m_mem_free='4G',
                    max_attempts=3,
                    max_runtime_seconds=60*60*2,
                    queue='all.q')
            causal_attribution_tasks.append(task)
            prev_year_task = task
    workflow.add_tasks(causal_attribution_tasks)

    # once the draws exist, save results
    meids = pd.read_excel("FILEPATH")
    meids = meids.filter(like='modelable_entity').values.flatten()
    for modelable_entity_id in meids.tolist():
        task = PythonTask(
            script="FILEPATH",
            args=[
                "--modelable_entity_id", modelable_entity_id,
                "--year_id", " ".join([str(yr) for yr in args.year_id]),
                "--gbd_round_id", args.gbd_round_id,
                "--decomp_step", args.decomp_step,
                "--save_dir", "FILEPATH"
            ],
            name=f"save_{modelable_entity_id}",
            tag="save",
            upstream_tasks=causal_attribution_tasks,
            num_cores=8,
            m_mem_free="100G",
            max_attempts=3,
            max_runtime_seconds=60*60*24,
            queue='all.q')
        workflow.add_task(task)

    status = workflow.run()
    print(f'Workflow finished with status {status}')