Exemple #1
0
    def set_remission(self, remission_df):
        """calculate remission aggregates from remission_df"""
        # if remission is not missing try and generate values
        if remission_df is not None:
            mean_re = np.mean(remission_df["mean"])

            # if mean is non zero we can calculate standard error and upper
            if mean_re != 0:
                remission_df["se"] = remission_df.apply(
                    lambda x:
                        se_from_ui(x["mean"], x["lower"], x["upper"],
                                   method="non-ratio"),
                    axis=1)
                se_re = aggregate_se(remission_df["mean"],
                                     remission_df["se"])["se"].item()
                self.re_upper = upper_from_se(mean_re, se_re,
                                              param_type="rate")

                # if upper < 1 it's a long duration so only use prevalence
                self.mean_re = mean_re
                self.se_re = se_re

            # if mean is 0 then we cannot calculate the aggregate standard
            # error and therefore just assign the 0 case
            else:
                self.mean_re = 0
                self.se_re = 0
                self.re_upper = 0
        # if remission is missing then assign the zero case
        else:
            self.mean_re = 0
            self.se_re = 0
            self.re_upper = 0
Exemple #2
0
    def pred_emr(self):

        # get template for merging
        adj_data_plate = self.construct_template(self.adj_data,
                                                 id_var="input_data_key")

        # load in predicted emr
        if self.envr == "prod":
            pred_emr = pd.read_csv(
                ("/ihme/epi/panda_cascade/prod/{mv}/full/locations/1/outputs/"
                 "both/2000/model_estimate_fit.csv").format(
                    mv=self.model_version_id))
        else:
            pred_emr = pd.read_csv(
                ("/ihme/epi/panda_cascade/dev/{mv}/full/locations/1/outputs/"
                 "both/2000/model_estimate_fit.csv").format(
                    mv=self.model_version_id))
        pred_emr = pred_emr.ix[pred_emr.measure_id == 9]
        pred_emr = pred_emr.rename(columns={"pred_mean": "mean_emr",
                                            "pred_lower": "lower_emr",
                                            "pred_upper": "upper_emr"})
        pred_emr = pred_emr.drop(["measure_id"], axis=1)
        pred_emr = pred_emr.merge(adj_data_plate, how="inner",
                                  on=["age_group_id", "sex_id"])

        # aggregate csmr by input_data_key
        pred_emr["se_emr"] = pred_emr.apply(
            lambda x:
                se_from_ui(x["mean_emr"], x["lower_emr"], x["upper_emr"],
                           method="non-ratio"),
            axis=1)
        pred_emr = pred_emr[["input_data_key", "mean_emr", "se_emr"]]
        grouped = pred_emr.groupby(["input_data_key"])
        emr = grouped.apply(
            lambda x: aggregate_se(x["mean_emr"], x["se_emr"])
        ).reset_index()
        emr = emr.rename(columns={"mean": "mean_emr", "se": "se_emr"})
        emr = emr[(emr["mean_emr"] > 0) & (emr["se_emr"] != 0)]
        return emr
Exemple #3
0
    def csmr(self):
        """pull csmr and aggreagate or duplicate to template"""

        # get template for merging
        adj_data_plate = self.construct_template(self.adj_data,
                                                 id_var="input_data_key")

        if self.csmr_cause_id != -1:
            # possible demographics
            locs = make_sql_obj(adj_data_plate.location_id.tolist())
            ages = make_sql_obj(adj_data_plate.age_group_id.tolist())
            sexes = make_sql_obj(adj_data_plate.sex_id.tolist())
            years = make_sql_obj(adj_data_plate.year_id.tolist())

            # pull data
            query = """
            SELECT
                co.location_id,
                co.year_id,
                co.age_group_id,
                co.sex_id,
                co.mean_death/pop_scaled AS mean_csmr,
                co.upper_death/pop_scaled AS upper_csmr,
                co.lower_death/pop_scaled AS lower_csmr
            FROM
                cod.output co
            JOIN
                cod.output_version cov
                    ON cov.output_version_id = co.output_version_id
            JOIN
                mortality.output mo
                    ON  mo.location_id = co.location_id
                    AND mo.age_group_id = co.age_group_id
                    AND mo.sex_id = co.sex_id
                    AND mo.year_id = co.year_id
            JOIN
                mortality.output_version mov
                    ON mov.output_version_id = mo.output_version_id
            WHERE
                cov.output_version_id = {codcorrect_version_id}
                AND cov.best_end IS NULL
                AND mov.is_best = 1
                AND mov.best_end IS NULL
                AND co.cause_id = {cause_id}
                AND co.location_id in ({locs})
                AND co.age_group_id in ({ages})
                AND co.year_id in ({years})
                AND co.sex_id in({sexes})
            """.format(locs=locs, ages=ages, sexes=sexes, years=years,
                       cause_id=self.csmr_cause_id,
                       codcorrect_version_id=self.codcorrect_version_id)
            df = db_tools.query(query, database="cod")
        elif self.csmr_cause_id == -1:
            # pull custom csmr data
            query = """
            SELECT
                input_data_id, location_id, year_start, year_end, age_start,
                age_end, sex_id, mean as mean_csmr, lower as lower_csmr,
                upper as upper_csmr
            FROM epi.input_data id
            JOIN epi.model_version mv
            USING (modelable_entity_id)
            WHERE
                model_version_id = {mv}
                AND measure_id = 15
                AND id.last_updated_action != "DELETE"
            """.format(mv=self.model_version_id)
            df = db_tools.query(query, database="epi")

            # get template for merging with adjusted data
            csmr_data_plate = self.construct_template(df,
                                                      id_var="input_data_id")
            df = df[["input_data_id", "mean_csmr", "lower_csmr", "upper_csmr"]]
            df = df.merge(csmr_data_plate, how="inner", on="input_data_id")
            df.drop('input_data_id', axis=1, inplace=True)
        else:
            raise NoCSMRValues("no corresponding csmr values discovered")

        df = df.merge(adj_data_plate, how="inner",
                      on=["year_id", "age_group_id", "sex_id", "location_id"])

        # aggregate csmr by input_data_key
        df["se_csmr"] = df.apply(
            lambda x:
                se_from_ui(x["mean_csmr"], x["lower_csmr"], x["upper_csmr"],
                           method="non-ratio"),
            axis=1)
        df = df[["input_data_key", "mean_csmr", "se_csmr"]]
        grouped = df.groupby(["input_data_key"])
        df = grouped.apply(
            lambda x: aggregate_se(x["mean_csmr"], x["se_csmr"])
        ).reset_index()
        df = df.rename(columns={"mean": "mean_csmr", "se": "se_csmr"})
        df = df[(df["mean_csmr"] > 0) & (df["se_csmr"] != 0)]

        if len(df) == 0:
            raise NoCSMRValues("no corresponding csmr values discovered")
        return df
Exemple #4
0
    def acmr(self):

        # get template for merging
        adj_data_plate = self.construct_template(self.adj_data,
                                                 id_var="input_data_key")

        # possible demographics
        locs = make_sql_obj(adj_data_plate.location_id.tolist())
        ages = make_sql_obj(adj_data_plate.age_group_id.tolist())
        sexes = make_sql_obj(adj_data_plate.sex_id.tolist())
        years = make_sql_obj(adj_data_plate.year_id.tolist())

        # pull data
        query = """
        SELECT
            co.location_id,
            co.year_id,
            co.age_group_id,
            co.sex_id,
            co.mean_death/pop_scaled AS mean_acmr,
            co.upper_death/pop_scaled AS upper_acmr,
            co.lower_death/pop_scaled AS lower_acmr
        FROM
            cod.output co
        JOIN
            cod.output_version cov
                ON cov.output_version_id = co.output_version_id
        JOIN
            mortality.output mo
                ON  mo.location_id = co.location_id
                AND mo.age_group_id = co.age_group_id
                AND mo.sex_id = co.sex_id
                AND mo.year_id = co.year_id
        JOIN
            mortality.output_version mov
                ON mov.output_version_id = mo.output_version_id
        WHERE
            cov.output_version_id = {codcorrect_version_id}
            AND cov.best_end IS NULL
            AND mov.is_best = 1
            AND mov.best_end IS NULL
            AND co.cause_id = 294
            AND co.location_id in ({locs})
            AND co.age_group_id in ({ages})
            AND co.year_id in ({years})
            AND co.sex_id in({sexes})
        """.format(locs=locs, ages=ages, sexes=sexes, years=years,
                   codcorrect_version_id=self.codcorrect_version_id)
        acmr = db_tools.query(query, database="cod")
        acmr = acmr.merge(adj_data_plate, how="inner",
                          on=["year_id", "age_group_id",
                              "sex_id", "location_id"])

        # aggregate csmr by input_data_key
        acmr["se_acmr"] = acmr.apply(
            lambda x:
                se_from_ui(x["mean_acmr"], x["lower_acmr"], x["upper_acmr"],
                           method="non-ratio"),
            axis=1)
        acmr = acmr[["input_data_key", "mean_acmr", "se_acmr"]]
        grouped = acmr.groupby(["input_data_key"])
        acmr = grouped.apply(
            lambda x: aggregate_se(x["mean_acmr"], x["se_acmr"])
        ).reset_index()
        acmr = acmr.rename(columns={"mean": "mean_acmr", "se": "se_acmr"})
        acmr = acmr[(acmr["mean_acmr"] > 0) & (acmr["se_acmr"] != 0)]
        return acmr
Exemple #5
0
    def adj_data(self, value):
        """clean adjusted data in preparation for claculation"""

        # keep unique
        value = value[["input_data_key", "mean", "lower", "upper"]]
        value.drop_duplicates(inplace=True)

        # query metadata
        value["input_data_key"] = value["input_data_key"].astype(int)
        id_keys = make_sql_obj(value.input_data_key.tolist())
        demo_query = """
        SELECT
            input_data_key,
            input_data_id,
            modelable_entity_id,
            location_id,
            sex_id,
            year_start,
            year_end,
            age_start,
            age_end,
            measure_id
        FROM
            epi.input_data_audit ida
        WHERE
            input_data_key in ({id_keys})
        """.format(id_keys=id_keys)
        wrows = db_tools.query(demo_query, database="epi", envr=self.envr)
        df_wrows = value.merge(wrows, on=["input_data_key"], how="left")

        # subset
        df_wrows = df_wrows.ix[df_wrows.sex_id != 3]  # get rid of both sex
        df_wrows = df_wrows.ix[df_wrows["mean"] > 0]  # get rid of 0 means
        df_wrows = df_wrows.ix[
            ((df_wrows.age_end - df_wrows.age_start) <= 15) |  # > 20 age group
            (df_wrows.age_start >= 80)]  # or terminal
        df_wrows = df_wrows.ix[
            (df_wrows["mean"].notnull()) &
            (df_wrows["lower"].notnull()) &
            (df_wrows["upper"].notnull())]  # mean upper and lower not null
        df_wrows = df_wrows.ix[df_wrows.measure_id.isin([5, 6])]
        if len(df_wrows) == 0:
            raise NoNonZeroValues("no non-zero values for incidence")

        # query for previously calculated emr row numbers
        me_id = df_wrows.modelable_entity_id.unique().item()
        input_data_ids = make_sql_obj(df_wrows.input_data_id.tolist())
        metadata_query = """
        SELECT
            id.row_num as emr_row_num,
            input_data_metadata_value as input_data_id
        FROM
            epi.input_data id
        JOIN
            epi.input_data_metadata idm
                ON id.input_data_id = idm.input_data_id
        WHERE
            modelable_entity_id = {me_id}
            AND input_data_metadata_type_id = 66
            AND input_data_metadata_value in ({input_data_ids})
            AND id.last_updated_action != "DELETE"
        """.format(me_id=me_id, input_data_ids=input_data_ids)
        old_emr = db_tools.query(metadata_query, database="epi",
                                 envr=self.envr)
        old_emr["input_data_id"] = old_emr.input_data_id.astype(float)
        old_emr["input_data_id"] = old_emr.input_data_id.astype(int)
        df_wmetadata = df_wrows.merge(old_emr, on=["input_data_id"],
                                      how="left")

        # compute standard error
        df_wmetadata["se"] = df_wmetadata.apply(
            lambda x:
                se_from_ui(x["mean"], x["lower"], x["upper"],
                           method="non-ratio"),
            axis=1)
        df = df_wmetadata.rename(columns={"mean": "mean_", "se": "se_"})
        df = df.drop(["upper", "lower"], axis=1)
        df = df[(df["mean_"] > 0) & (df["se_"] != 0)]

        # set result on self
        self._adj_data = df