Ejemplo n.º 1
0
class SexualViolenceInputCollector:
    def __init__(self, como_version, location_id, year_id, age_group_id,
                 sex_id):

        self.como_version = como_version
        self._estim_years = estimation_years_from_gbd_round_id(
            self.como_version.gbd_round_id)

        # set up draw source factory
        self._ss_factory = SourceSinkFactory(como_version)

        # set up the dimensions we are using
        self.dimensions = self.como_version.nonfatal_dimensions
        if location_id:
            self.dimensions.simulation_index["location_id"] = location_id
        if year_id:
            self.dimensions.simulation_index["year_id"] = year_id
        if sex_id:
            self.dimensions.simulation_index["sex_id"] = sex_id
        if age_group_id:
            self.dimensions.simulation_index["age_group_id"] = age_group_id

    @property
    def _sexual_violence_set(self):
        memv_df = self.como_version.mvid_list.merge(
            self.como_version.sexual_violence_sequela,
            on='modelable_entity_id')
        arglist = list(
            zip(list(memv_df.modelable_entity_id),
                list(memv_df.model_version_id)))
        return list(set(arglist))

    def read_single_sexual_violence_injury(
            self,
            modelable_entity_id,
            model_version_id,
            measure_id=[measures.YLD, measures.PREVALENCE]):
        injury_source = (
            self._ss_factory.get_sexual_violence_modelable_entity_source(
                modelable_entity_id, model_version_id))
        dim = self.dimensions.get_simulation_dimensions(measure_id=measure_id,
                                                        at_birth=False)

        # get filters w/ added years if interpolation is needed
        filters = dim.index_dim.to_dict()["levels"]
        req_years = filters["year_id"]
        if not set(req_years).issubset(set(self._estim_years)):
            filters["year_id"] = list(set(req_years + self._estim_years))

        # read data
        df = injury_source.content(filters=filters)
        if df.empty:
            raise Exception(f"No data returned for ME {modelable_entity_id}, "
                            f"model version {model_version_id}.")
        draw_cols = [col for col in df.columns if "draw_" in col]

        # add indices to dimensions object from draw source transforms
        dim.index_dim.add_level("cause_id", df.cause_id.unique().tolist())

        # interpolate missing years
        if not set(df.year_id.unique()).issuperset(set(req_years)):
            interp_df = pchip_interpolate(df=df,
                                          id_cols=dim.index_names,
                                          value_cols=draw_cols,
                                          time_col="year_id",
                                          time_vals=req_years)
            df = df[df.year_id.isin(req_years)]
            df = df.append(interp_df)
        else:
            df = df[df.year_id.isin(req_years)]

        # resample
        if len(dim.data_list()) != len(draw_cols):
            gbdizer = gbdize.GBDizeDataFrame(dim)
            df = gbdizer.correlated_percentile_resample(df)

        return df

    def _q_read_single_sexual_violence_injury(self, inq, outq):
        for arglist in iter(inq.get, SENTINEL):
            try:
                result = self.read_single_sexual_violence_injury(*arglist)
            except Exception as e:
                print(arglist)
                result = ExceptionWrapper(e)
            outq.put(result)

    def collect_sexual_violence_inputs(self,
                                       n_processes=20,
                                       measure_id=[
                                           measures.YLD, measures.PREVALENCE
                                       ]):
        # spin up xcom queues
        inq = Queue()
        outq = Queue()

        # Create and feed reader procs
        read_procs = []
        for i in range(n_processes):
            p = Process(target=self._q_read_single_sexual_violence_injury,
                        args=(inq, outq))
            read_procs.append(p)
            p.start()

        for readkey in self._sexual_violence_set:
            args = readkey + (measure_id, )
            inq.put(args)

        # make the workers die after
        for _ in read_procs:
            inq.put(SENTINEL)

        # get results
        result_list = []
        for _ in self._sexual_violence_set:
            proc_result = outq.get()
            result_list.append(proc_result)

        # close up the queue
        for p in read_procs:
            p.join()

        return result_list
Ejemplo n.º 2
0
class SexualViolenceInputCollector(object):

    _estim_years = [1990, 1995, 2000, 2005, 2010, 2017]
    _draw_cols = ["draw_{}".format(i) for i in range(1000)]

    def __init__(self,
                 como_version,
                 location_id=[],
                 year_id=[],
                 age_group_id=[],
                 sex_id=[]):

        self.como_version = como_version

        # set up draw source factory
        self._ss_factory = SourceSinkFactory(como_version)

        # set up the dimensions we are using
        self.dimensions = self.como_version.nonfatal_dimensions
        if location_id:
            self.dimensions.simulation_index["location_id"] = location_id
        if year_id:
            self.dimensions.simulation_index["year_id"] = year_id
        if sex_id:
            self.dimensions.simulation_index["sex_id"] = sex_id
        if age_group_id:
            self.dimensions.simulation_index["age_group_id"] = age_group_id

    @property
    def _sexual_violence_set(self):
        memv_df = self.como_version.mvid_list.merge(
            self.como_version.sexual_violence_sequela,
            on='modelable_entity_id')
        arglist = zip(list(memv_df.modelable_entity_id),
                      list(memv_df.model_version_id))
        return list(set(arglist))

    def read_single_sexual_violence_injury(self,
                                           modelable_entity_id,
                                           model_version_id,
                                           measure_id=[3, 5]):
        injury_source = (
            self._ss_factory.get_sexual_violence_modelable_entity_source(
                modelable_entity_id, model_version_id))
        dim = self.dimensions.get_simulation_dimensions(measure_id)

        # get filters w/ added years if interpolation is needed
        filters = dim.index_dim.to_dict()["levels"]
        req_years = filters["year_id"]
        if not set(req_years).issubset(set(self._estim_years)):
            filters["year_id"] = list(set(req_years + self._estim_years))

        # read data
        df = injury_source.content(filters=filters)
        if df.empty:
            raise Exception("No data returned for meid:{} and mvid:{}".format(
                modelable_entity_id, model_version_id))

        # add indices to dimensions object from draw source transforms
        dim.index_dim.add_level("cause_id", df.cause_id.unique().tolist())

        # interpolate missing years
        if not set(df.year_id.unique()).issuperset(set(req_years)):
            interp_df = pchip_interpolate(df=df,
                                          id_cols=dim.index_names,
                                          value_cols=self._draw_cols,
                                          time_col="year_id",
                                          time_vals=req_years)
            df = df[df.year_id.isin(req_years)]
            df = df.append(interp_df)
        else:
            df = df[df.year_id.isin(req_years)]

        # birth prevelance here

        # resample if ndraws is less than 1000
        if len(dim.data_list()) != 1000:
            gbdizer = gbdize.GBDizeDataFrame(dim)
            df = gbdizer.correlated_percentile_resample(df)

        return df

    def _q_read_single_sexual_violence_injury(self, inq, outq):
        for arglist in iter(inq.get, sentinel):
            try:
                result = self.read_single_sexual_violence_injury(*arglist)
            except Exception as e:
                print(arglist)
                result = ExceptionWrapper(e)
            outq.put(result)

    def collect_sexual_violence_inputs(self,
                                       n_processes=20,
                                       measure_id=[3, 5]):

        # spin up xcom queues
        inq = Queue()
        outq = Queue()

        # Create and feed reader procs
        read_procs = []
        for i in range(min([n_processes, self._sexual_violence_set])):
            p = Process(target=self._q_read_single_sexual_violence_injury,
                        args=(inq, outq))
            read_procs.append(p)
            p.start()

        for readkey in self._sexual_violence_set:
            args = readkey + (measure_id, )
            inq.put(args)

        # make the workers die after
        for _ in read_procs:
            inq.put(sentinel)

        # get results
        result_list = []
        for _ in self._sexual_violence_set:
            proc_result = outq.get()
            result_list.append(proc_result)

        # close up the queue
        for p in read_procs:
            p.join()

        return result_list