Beispiel #1
0
    def execute(self):

        # compile submission arguments
        kwargs = self.build_args[1]
        me_map = kwargs.pop("me_map")

        # get qmaster
        q = self.get_qmaster()

        # command line args for adjuster.py
        # parallelize by year
        for i in [1990, 1995, 2000, 2005, 2010, 2016]:
            ex_params = ["--me_map", json.dumps(me_map),
                         "--out_dir", data_dir, "--year_id", str(i)]
            remote_job = job.Job(
                mon_dir=log_dir,
                runfile=os.path.join(code_dir, "scripts", "run_ex_adjust.py"),
                name="{proc}_{year}".format(proc=self.identity, year=i),
                job_args=ex_params
            )
            q.queue_job(
                remote_job,
                slots=20,
                memory=40,
                project="proj_epic")
        q.block_till_done(poll_interval=60)

        outputs_tuples = self.builder.get_process_outputs(self.identity)
        result_meids = [task_tuple[0] for task_tuple in outputs_tuples]
        for meid in result_meids:
            save_params = [
                meid,
                description,
                os.path.join(data_dir, str(meid)),
                "--best",
                "--file_pattern", "{year_id}.h5",
                "--h5_tablename", "draws"]

            remote_job = job.Job(
                mon_dir=log_dir,
                name="save_" + str(meid),
                runfile=sge.true_path(executable="save_custom_results"),
                job_args=save_params)
            q.queue_job(
                remote_job,
                slots=40,
                memory=80,
                project="proj_epic")
        q.block_till_done(poll_interval=60)
Beispiel #2
0
def saver(q, dirs, save_func, log_dir):
    runfile = sge.true_path(executable=save_func)
    for d in dirs:
        meid = os.path.basename(d)
        params = [
            meid, "super-squeeze result", d, '--env', 'prod', '--file_pattern',
            '{location_id}_{year_id}_{sex_id}.h5', '--h5_tablename', 'draws',
            '--best'
        ]
        remote_job = job.Job(mon_dir=log_dir,
                             runfile=runfile,
                             name='ss_save_%s' % meid,
                             job_args=params)
        q.queue_job(remote_job,
                    slots=20,
                    memory=40,
                    project='proj_epic',
                    stderr='/FILEPATH',
                    stdout='/FILEPATH')

    q.block_till_done(poll_interval=60)
Beispiel #3
0
    def execute(self):

        # get args
        kwargs = self.build_args[1]
        parent_meid = kwargs["parent_meid"]
        env = "prod"

        # get qmaster
        q = self.get_qmaster()

        # submit split job
        remote_job = job.Job(
            mon_dir=log_dir,
            name="split_" + (str(parent_meid)),
            runfile=os.path.join(code_dir, "scripts", "FILEPATH.py"),
            job_args=[str(parent_meid), env])
        q.queue_job(
            remote_job,
            slots=49,
            memory=98,
            project="proj_epic")
        q.block_till_done(poll_interval=60)

        # submit aggregation/save jobs
        outputs_tuples = self.builder.get_process_outputs(self.identity)
        children_meids = [task_tuple[0] for task_tuple in outputs_tuples]
        for meid in children_meids:
            mvid = self._get_latest_mvid(meid)
            remote_job = job.Job(
                mon_dir=log_dir,
                name="save_" + str(mvid),
                runfile=sge.true_path(executable="aggregate_mvid"),
                job_args=[str(mvid), '--env', env, '--mark_best'])
            q.queue_job(
                remote_job,
                slots=40,
                memory=80,
                project="proj_epic")
        q.block_till_done(poll_interval=60)
Beispiel #4
0
def run_pipeline_como(
        root_dir,
        gbd_round_id=4,
        location_id=[],
        year_id=[],
        sex_id=[],
        age_group_id=[],
        measure_id=[],
        n_draws=1000,
        n_simulants=20000,
        components=["sequela", "cause", "impairment", "injuries"]):

    cv = ComoVersion.new(root_dir, gbd_round_id, location_id, year_id, sex_id,
                         age_group_id, measure_id, n_draws, components)

    try:
        cjm = CentralJobMonitor(cv.como_dir, persistent=False)
        time.sleep(5)
    except Exception as e:
        raise e
    else:
        executor_params = {"request_timeout": 10000}
        jobq = JobQueue(cv.como_dir,
                        scheduler=RetryScheduler,
                        executor=SGEExecutor,
                        executor_params=executor_params)

        # run nonfatal pipeline by location/year/sex
        parallelism = ["location_id", "sex_id"]
        for slices in cv.dimensions.index_slices(parallelism):
            jobname = "como_e_sim_{location_id}_{sex_id}".format(
                location_id=slices[0], sex_id=slices[1])
            job = jobq.create_job(
                jobname=jobname,
                runfile=true_path(executable="compute_nonfatal"),
                parameters=[
                    "--como_dir", cv.como_dir, "--location_id",
                    str(slices[0]), "--sex_id",
                    str(slices[1]), "--n_processes", "23", "--n_simulants",
                    str(n_simulants)
                ])
            jobq.queue_job(job,
                           slots=50,
                           memory=400,
                           project="proj_como",
                           process_timeout=(60 * 180))
        jobq.block_till_done(stop_scheduler_when_done=False)

        # run aggregation by year/sex/measure
        parallelism = ["year_id", "sex_id", "measure_id"]
        for slices in cv.dimensions.index_slices(parallelism):
            for component in cv.components:
                if component != "sequela":
                    loc_sets = [35, 40]
                else:
                    loc_sets = [35]
                for location_set_id in loc_sets:
                    jobname = ("como_e_agg_{component}_{year_id}_{sex_id}"
                               "_{measure_id}_{location_set_id}").format(
                                   component=component,
                                   year_id=slices[0],
                                   sex_id=slices[1],
                                   measure_id=slices[2],
                                   location_set_id=location_set_id)
                    job = jobq.create_job(
                        jobname=jobname,
                        runfile=true_path(executable="aggregate_nonfatal"),
                        parameters=[
                            "--como_dir", cv.como_dir, "--component",
                            component, "--year_id",
                            str(slices[0]), "--sex_id",
                            str(slices[1]), "--measure_id",
                            str(slices[2]), "--location_set_id",
                            str(location_set_id)
                        ])
                    jobq.queue_job(job,
                                   slots=25,
                                   memory=200,
                                   project="proj_como",
                                   process_timeout=(60 * 600))
        jobq.block_till_done(stop_scheduler_when_done=False)

        # run summaries by component/location
        lt = dbtrees.loctree(None, 35)
        sdi_lts = dbtrees.loctree(None, 40, return_many=True)
        locs = [l.id for l in lt.nodes]
        sdi_locs = [l.root.id for l in sdi_lts]
        for component in cv.components:
            if component != "sequela":
                summ_locs = locs + sdi_locs
            else:
                summ_locs = locs[:]
            for location_id in summ_locs:
                jobname = "como_e_summ_{component}_{location_id}".format(
                    component=component, location_id=location_id)
                job = jobq.create_job(
                    jobname=jobname,
                    runfile=true_path(executable="summarize_nonfatal"),
                    parameters=[
                        "--como_dir", cv.como_dir, "--component", component,
                        "--location_id",
                        str(location_id)
                    ])
                jobq.queue_job(job,
                               slots=48,
                               memory=96,
                               project="proj_como",
                               process_timeout=(60 * 240))
        jobq.block_till_done(stop_scheduler_when_done=False)

        for component in cv.components:
            jobname = "como_e_upload_{component}".format(component=component)
            job = jobq.create_job(
                jobname=jobname,
                runfile=true_path(executable="upload_nonfatal"),
                parameters=[
                    "--como_dir", cv.como_dir, "--component", component,
                    "--location_id",
                    " ".join([str(l) for l in locs + sdi_locs])
                ])
            jobq.queue_job(job,
                           slots=20,
                           memory=40,
                           project="proj_como",
                           process_timeout=(60 * 720))
        jobq.block_till_done()

    finally:
        cjm.generate_report()
        cjm.stop_responder()
        cjm.stop_publisher()