Example #1
0
class SquareImport(object):

    _idx_dmnsns = {
        "year_id": [{YEAR IDS}],
        "age_group_id": getset.get_age_group_set({AGE GROUP SET ID})["age_group_id"],
        "sex_id": [{SEX IDS}],
        "location_id": getset.get_most_detailed_location_ids(),
        "measure_id": [{MEASURE IDS}]
    }

    _draw_cols = ["draw_{i}".format(i=i) for i in range(0, 1000)]

    def __init__(self, idx_dmnsns=None, draw_cols=None):

        if idx_dmnsns is None:
            self.idx_dmnsns = collections.OrderedDict(
                sorted(self.dUSERt_idx_dmnsns.items()))
        else:
            self.idx_dmnsns = collections.OrderedDict(
                sorted(idx_dmnsns.items()))

        if draw_cols is None:
            self.draw_cols = self.dUSERt_draw_cols
        else:
            self.draw_cols = draw_cols

        # expected index
        self.index_df = self.get_index_df()

    @ClassProperty
    @classmethod
    def dUSERt_idx_dmnsns(cls):
        return cls._idx_dmnsns.copy()

    @ClassProperty
    @classmethod
    def dUSERt_draw_cols(cls):
        return cls._draw_cols[:]

    def get_index_df(self):
        """create template index for square dataset"""
        idx = pd.MultiIndex.from_product(
            self.idx_dmnsns.values(),
            names=self.idx_dmnsns.keys())
        return pd.DataFrame(index=idx)

    def import_square(self, gopher_what, source, filler=None, **kwargs):
        """get draws for the specified modelable entity by dimensions"""
        if not kwargs:
            kwargs = self.idx_dmnsns.copy()

            # replace keys with their plural form for gopher.draws
            for k in kwargs.keys():
                kwargs[k + "s"] = kwargs.pop(k)

        if filler is None:
            filler = 0

        df = gopher.draws(gopher_what, source=source, verbose=False,
                          **kwargs)
        for c in self.idx_dmnsns.keys():
            df[c] = pd.to_numeric(df[c])
        df = df.set_index(self.idx_dmnsns.keys())
        df = df[self.draw_cols]
        df = pd.concat([self.index_df, df], axis=1)
        df.fillna(value=filler, inplace=True)
        return df
Example #2
0
    def execute(self):

        # compile submission arguments
        me_map = self.build_args[0][0]

        # make server directory
        directory = "{root}/{proc}".format(root=root, proc=self.identity)
        if not os.path.exists(directory):
            os.makedirs(directory)
        else:
            shutil.rmtree(directory)
            os.makedirs(directory)

        # make output directories
        save_ids = []
        for mapper in me_map.values():
            outputs = mapper.get("trgs", {})
            for me_id in outputs.values():
                os.makedirs(os.path.join(directory, str(me_id)))
                save_ids.append(me_id)

        attr_params = [
            "--me_map",
            json.dumps(me_map), "--out_dir", directory, "--location_id"
        ]

        q = qmaster.MonitoredQ(directory, request_timeout=120000)  # monitor
        try:

            # attribution jobs by location_id
            for i in getset.get_most_detailed_location_ids():
                q.qsub(runfile="{root}/infertility/female_attr.py".format(
                    root=root),
                       jobname="{proc}_{loc}".format(proc=self.identity,
                                                     loc=i),
                       parameters=attr_params + [i],
                       slots=4,
                       memory=8,
                       project="proj_custom_models")
                time.sleep(1.5)
            q.qblock(poll_interval=60)  # monitor them
            fail = q.manager.query(
                "select count(*) as num from job where current_status != 5;"
            )[1]["num"].item()
            if fail > 0:
                raise Exception("Not all attr jobs finished")

            # save the results
            for save_id in save_ids:

                save_params = [
                    save_id, female_attr_description,
                    os.path.join(directory, str(save_id)), "--best", "--sexes",
                    "2", "--file_pattern", "{location_id}.h5",
                    "--h5_tablename", "data"
                ]
                q.qsub(runfile=("/home/j/WORK/10_gbd/00_library/adding_machine"
                                "/bin/save_custom_results"),
                       jobname="save_" + str(save_id),
                       parameters=save_params,
                       slots=20,
                       memory=40,
                       project="proj_custom_models")
            q.qblock(poll_interval=60)  # monitor them
            fail = q.manager.query(
                "select count(*) as num from job where current_status != 5;"
            )[1]["num"].item()
            if fail > 0:
                raise Exception("Not all save jobs finished")
        finally:
            q.stop_monitor()  # stop monitor
Example #3
0
    def execute(self):

        # compile submission arguments
        me_map = {
            "env": {
                "srcs": {
                    "prim": "3339",
                    "sec": "3341"
                }
            },
            "idio": {
                "trgs": {
                    "prim": "2061",
                    "sec": "2062"
                }
            },
            "kline": {
                "srcs": {
                    "bord": "2218",
                    "mild": "2659",
                    "asym": "3069"
                }
            },
            "cong_uro": {
                "srcs": {
                    "inf_only": "11033",
                    "ag": "11037",
                    "uti": "11039",
                    "imp": "11041",
                    "ag_uti": "11042",
                    "ag_imp": "11044",
                    "imp_uti": "11045",
                    "ag_imp_uti": "11046"
                }
            }
        }

        # make server directory
        directory = "{root}/{proc}".format(root=root,
                                           proc='male_cause_attribution')

        if os.path.exists(directory):
            shutil.rmtree(directory)
            os.makedirs(directory)
        else:
            os.makedirs(directory)

        # make output directories
        save_ids = []
        for mapper in me_map.values():
            outputs = mapper.get("trgs", {})
            for me_id in outputs.values():
                if not os.path.exists(os.path.join(directory, str(me_id))):
                    os.makedirs(os.path.join(directory, str(me_id)))
                save_ids.append(me_id)

        attr_params = [
            "--me_map", "\'{}\'".format(json.dumps(me_map)), "--out_dir",
            directory, "--location_id"
        ]

        # attribution jobs by year_id
        male_attr_string = ''
        for i in getset.get_most_detailed_location_ids():
            male_attr_job = "male_attr_{}".format(i)
            male_attr_string = male_attr_string + ',' + male_attr_job
            call = ('qsub -hold_jid {hj}'
                    ' -pe multi_slot 4'
                    ' -cwd -P proj_custom_models'
                    ' -o {o}'
                    ' -e {e}'
                    ' -N {jn}'
                    ' cluster_shell.sh'
                    ' male_attr.py'
                    ' {arg1}'.format(hj=self.hold,
                                     o=output_path,
                                     e=error_path,
                                     jn=male_attr_job,
                                     arg1=' '.join(attr_params + [str(i)])))
            subprocess.call(call, shell=True)

        # save the results
        hold = male_attr_string
        for save_id in save_ids:
            save_params = [
                str(save_id), "--description",
                "\'{}\'".format(male_attr_description), "--input_dir",
                os.path.join(directory, str(save_id)), "--best", "--sexes",
                "1", "--meas_ids", "5", "--file_pattern", "{location_id}.h5"
            ]

            call = ('qsub -hold_jid {hj} -pe multi_slot 15'
                    ' -cwd -P proj_custom_models'
                    ' -o {o}'
                    ' -e {e}'
                    ' -N {jn}'
                    ' cluster_shell.sh'
                    ' save.py'
                    ' {arg1}'.format(hj=hold,
                                     o=output_path,
                                     e=error_path,
                                     jn='male_attr_save_{}'.format(save_id),
                                     arg1=' '.join(save_params)))
            subprocess.call(call, shell=True)
Example #4
0
    def execute(self):

        # compile submission arguments
        me_map = {
            "env": {
                "type": "envelope",
                "srcs": {
                    "prim": "3340",
                    "sec": "3342"
                }
            },
            "resid": {
                "type": "residual",
                "trgs": {
                    "prim": "2071",
                    "sec": "2072"
                }
            },
            "excess": {
                "type": "excess",
                "trgs": {
                    "endo": "9748",
                    "pcos": "9743"
                }
            },
            "cong_uro": {
                "type": "locked",
                "srcs": {
                    "inf_only": "11033",
                    "ag": "11037",
                    "uti": "11039",
                    "imp": "11041",
                    "ag_uti": "11042",
                    "ag_imp": "11044",
                    "imp_uti": "11045",
                    "ag_imp_uti": "11046"
                }
            },
            "turner": {
                "type": "locked",
                "srcs": {
                    "nohf": "2208",
                    "hf": "2653"
                }
            },
            "sepsis": {
                "type": "sub_group",
                "srcs": {
                    "tot": "2624"
                },
                "trgs": {
                    "sec": "9678"
                }
            },
            "pcos_asymp": {
                "type": "sub_group",
                "excess": "pcos",
                "srcs": {
                    "tot": "9675"
                },
                "trgs": {
                    "prim": "2069",
                    "sec": "3088"
                }
            },
            "pcos_disfig": {
                "type": "sub_group",
                "excess": "pcos",
                "srcs": {
                    "tot": "9676"
                },
                "trgs": {
                    "prim": "2938",
                    "sec": "3087"
                }
            },
            "endo_asymp": {
                "type": "sub_group",
                "excess": "endo",
                "srcs": {
                    "tot": "9671"
                },
                "trgs": {
                    "prim": "2076",
                    "sec": "2077"
                }
            },
            "endo_mild": {
                "type": "sub_group",
                "excess": "endo",
                "srcs": {
                    "tot": "9672"
                },
                "trgs": {
                    "prim": "2959",
                    "sec": "2962"
                }
            },
            "endo_mod": {
                "type": "sub_group",
                "excess": "endo",
                "srcs": {
                    "tot": "9673"
                },
                "trgs": {
                    "prim": "2960",
                    "sec": "2963"
                }
            },
            "endo_sev": {
                "type": "sub_group",
                "excess": "endo",
                "srcs": {
                    "tot": "9674"
                },
                "trgs": {
                    "prim": "2961",
                    "sec": "2964"
                }
            },
            "gono": {
                "type": "sub_group",
                "srcs": {
                    "tot": "3023"
                },
                "trgs": {
                    "prim": "1639",
                    "sec": "1640"
                }
            },
            "chlam": {
                "type": "sub_group",
                "srcs": {
                    "tot": "3022"
                },
                "trgs": {
                    "prim": "1633",
                    "sec": "1634"
                }
            },
            "otherstd": {
                "type": "sub_group",
                "srcs": {
                    "tot": "3024"
                },
                "trgs": {
                    "prim": "1645",
                    "sec": "1646"
                }
            }
        }

        # make server directory
        directory = "{root}/{proc}".format(root=root,
                                           proc='female_cause_attribution')

        if os.path.exists(directory):
            shutil.rmtree(directory)
            os.makedirs(directory)
        else:
            os.makedirs(directory)

        # make output directories
        save_ids = []
        for mapper in me_map.values():
            outputs = mapper.get("trgs", {})
            for me_id in outputs.values():
                if not os.path.exists(os.path.join(directory, str(me_id))):
                    os.makedirs(os.path.join(directory, str(me_id)))
                save_ids.append(me_id)

        attr_params = [
            "--me_map", "\'{}\'".format(json.dumps(me_map)), "--out_dir",
            directory, "--location_id"
        ]

        # parallelize by location
        fem_attr_string = ''
        for i in getset.get_most_detailed_location_ids():
            fem_attr_job = "fem_attr_{}".format(i)
            fem_attr_string = fem_attr_string + ',' + fem_attr_job
            call = ('qsub -hold_jid {hj}'
                    ' -pe multi_slot 4'
                    ' -cwd -P proj_custom_models'
                    ' -o {o}'
                    ' -e {e}'
                    ' -N {jn}'
                    ' cluster_shell.sh'
                    ' female_attr.py'
                    ' {arg1}'.format(hj=self.hold,
                                     o=output_path,
                                     e=error_path,
                                     jn=fem_attr_job,
                                     arg1=' '.join(attr_params + [str(i)])))
            subprocess.call(call, shell=True)

        # save the results
        hold = fem_attr_string
        for save_id in save_ids:

            save_params = [
                str(save_id), "--description",
                "\'{}\'".format(female_attr_description), "--input_dir",
                os.path.join(directory, str(save_id)), "--best", "--sexes",
                "2", "--meas_ids", "5", "--file_pattern", "{location_id}.h5"
            ]

            job_name = 'fem_attr_save_{}'.format(save_id)
            self.save_hold = self.save_hold + ',' + job_name
            call = ('qsub -hold_jid {hj} -pe multi_slot 15'
                    ' -cwd -P proj_custom_models'
                    ' -o {o}'
                    ' -e {e}'
                    ' -N {jn}'
                    ' cluster_shell.sh'
                    ' save.py'
                    ' {arg1}'.format(hj=hold,
                                     o=output_path,
                                     e=error_path,
                                     jn=job_name,
                                     arg1=' '.join(save_params)))
            subprocess.call(call, shell=True)
        return self.save_hold
Example #5
0
class SquareImport(object):

    _idx_dmnsns = {
        "year_id": [1990, 1995, 2000, 2005, 2010, 2015, 2017, 2019],
        #"age_group_id": getset.get_age_group_set(12)["age_group_id"],
        "age_group_id": [i for i in range(2, 21, 1)] + [30, 31, 32, 235, 164],
        "sex_id": [1, 2],
        "location_id": getset.get_most_detailed_location_ids(),
        "measure_id": [5, 6]
    }

    _draw_cols = ["draw_{i}".format(i=i) for i in range(0, 1000)]

    def __init__(self, idx_dmnsns=None, draw_cols=None):

        if idx_dmnsns is None:
            self.idx_dmnsns = collections.OrderedDict(
                sorted(self.default_idx_dmnsns.items()))
        else:
            self.idx_dmnsns = collections.OrderedDict(
                sorted(idx_dmnsns.items()))

        if draw_cols is None:
            self.draw_cols = self.default_draw_cols
        else:
            self.draw_cols = draw_cols

        # expected index
        self.index_df = self.get_index_df()

    @ClassProperty
    @classmethod
    def default_idx_dmnsns(cls):
        return cls._idx_dmnsns.copy()

    @ClassProperty
    @classmethod
    def default_draw_cols(cls):
        return cls._draw_cols[:]

    def get_index_df(self):
        """create template index for square dataset"""
        idx = pd.MultiIndex.from_product(self.idx_dmnsns.values(),
                                         names=self.idx_dmnsns.keys())
        return pd.DataFrame(index=idx)

    def import_square(self, meid, source, filler=None, **kwargs):
        """get draws for the specified modelable entity by dimensions"""
        if not kwargs:
            kwargs = self.idx_dmnsns.copy()

            # spillover from needing to replace keys with their plural form for gopher.draws
            for k in kwargs.keys():
                kwargs[k] = kwargs.pop(k)

        if filler is None:
            filler = 0

        df = get_draws('modelable_entity_id',
                       meid,
                       source=source,
                       status='latest',
                       decomp_step='step1',
                       **kwargs)
        for c in self.idx_dmnsns.keys():
            df[c] = pd.to_numeric(df[c])
        df = df.set_index(self.idx_dmnsns.keys())
        df = df[self.draw_cols]
        df = pd.concat([self.index_df, df], axis=1)
        df.fillna(value=filler, inplace=True)
        return df
Example #6
0
    def execute(self):

        # compile submission arguments
        me_map = self.build_args[0][0]

        # make server directory
        directory = "{root}/{proc}".format(root=root, proc=self.identity)
        if not os.path.exists(directory):
            os.makedirs(directory)
        else:
            shutil.rmtree(directory)
            os.makedirs(directory)

        # make output directories
        save_ids = []
        for mapper in me_map.values():
            outputs = mapper.get("trgs", {})
            for me_id in outputs.values():
                os.makedirs(os.path.join(directory, str(me_id)))
                save_ids.append(me_id)

        attr_params = ["--me_map", json.dumps(me_map),
                       "--out_dir", directory,
                       "--location_id"]

        q = self.get_qmaster()
        # attribution jobs by location_id
        for i in getset.get_most_detailed_location_ids():
            remote_job=job.Job(
                mon_dir=log_dir,
                name="{proc}_{loc}".format(proc=self.identity,
                                           loc=i),
                runfile="{root}/infertility/female_attr.py".format(
                    root=root),
                job_args=attr_params + [str(i)])
            q.queue_job(
                remote_job,
                slots=4,
                memory=8,
                project={PROJECT},
                stderr=log_dir,
                stdout=log_dir)
            time.sleep(1.5)
        q.block_till_done(poll_interval=60)  # monitor them

        # save the results
        for save_id in save_ids:

            save_params = [
                str(save_id), female_attr_description,
                os.path.join(directory, str(save_id)), "--best",
                "--sexes", "{SEX ID}", "--file_pattern", "{location_id}.h5",
                "--h5_tablename", "data"]

            remote_job=job.Job(
                mon_dir=log_dir,
                name="save_" + str(save_id),
                runfile=("{FILEPATH}/save_custom_results"),
                job_args=save_params)
            q.queue_job(
                remote_job,
                slots=20,
                memory=40,
                project={PROJECT},
                stderr=log_dir,
                stdout=log_dir)
        q.block_till_done(poll_interval=60)  # monitor them