Esempio n. 1
0
    def generate_age_standardized(self):
        """
        Standardizes data_draws using age group weights from database

        :param self:
        """
        logger.info("Generating age-standardized draws.")
        # get age weights
        sql_query = """
                    SELECT
                        age_group_id,
                        age_group_weight_value
                    FROM
                        shared.age_group_weight agw
                    WHERE
                        gbd_round_id = {gbd_round_id};
                    """.format(gbd_round_id=self.gbd_round_id)
        age_standard_data = db_connect.query(sql_query, conn_def=self.conn_def)
        # prep draws for merge
        self.data_draws = self.data_draws.ix[
            self.data_draws['age_group_id'] != 27]
        data = self.format_draws(self.data_draws)
        data = data.ix[data['age_group_id'] in self.AGES_DISAGGREGATED]
        # merge on age-weights
        data = pd.merge(data, age_standard_data, on='age_group_id')
        # make adjusted rate
        for c in self.data_columns:
            data[c] = data[c] * data['age_group_weight_value'] / data[
                self.pop_column]
        # collapsing to generate ASR
        data['age_group_id'] = 27
        data = data.groupby(self.index_columns).sum().reset_index()
        # merge with original data
        self.data_draws = pd.concat([self.data_draws, data])
Esempio n. 2
0
    def get_model_details(self):
        """
        Gets acause, sex_id, and user from database and stores them in self

        :param self:
        """
        logger.info("Getting model details.")
        sql_query = """
                    SELECT
                        mv.model_version_id,
                        mv.cause_id,
                        c.acause,
                        mv.sex_id,
                        mv.inserted_by
                    FROM
                        cod.model_version mv
                    JOIN
                        shared.cause c USING (cause_id)
                    WHERE
                        model_version_id = {};
                    """.format(self.model_version_id)
        model_data = db_connect.query(sql_query, conn_def=self.conn_def)
        self.acause = model_data.ix[0, 'acause']
        self.sex_id = model_data.ix[0, 'sex_id']
        self.user = model_data.ix[0, 'inserted_by']
Esempio n. 3
0
def get_cause_hierarchy_version(conn_def, gbd_round_id, cause_set_id=3):
    """
    Get the IDs associated with best version of a cause set

    :param cause_set_id: int
        the set of causes to use
    :param gbd_round_id: int
    :param conn_def: str
    """
    logger.info("Getting cause hierarchy version.")
    sql_statement = """
        SELECT
            a.cause_set_version_id, v.cause_metadata_version_id
        FROM shared.cause_set_version_active a
        JOIN shared.cause_set_version v
          ON a.cause_set_version_id = v.cause_set_version_id
        WHERE a.cause_set_id = {cause_set_id}
          AND a.gbd_round_id = {gbd_round_id}
        """.format(cause_set_id=cause_set_id, gbd_round_id=gbd_round_id)
    result_df = db_connect.query(sql_statement, conn_def)

    # make sure exactly one cause set version is returned
    if len(result_df) > 1:
        exception_text = """
            This returned more than 1 cause_set_version_id ({returned_ids})
        """.format(returned_ids=", ".join(
            str(result_df['cause_set_version_id'].drop_duplicates().tolist())))
        raise LookupError(exception_text)
    elif len(result_df) < 1:
        raise LookupError("No cause set versions returned")

    return result_df.loc[0, 'cause_set_version_id'], \
           result_df.loc[0, 'cause_metadata_version_id']
Esempio n. 4
0
    def get_location_hierarchy(self):
        """
        Reads and returns location hierarchy information from SQL

        :param self:
        :return: dataframe
            pandas dataframe with location hierarchy information from database
        """
        logger.info("Getting location hierarchy.")
        sql_query = """SELECT
                           location_id,
                           level,
                           parent_id,
                           is_estimate
                       FROM
                           shared.location_hierarchy_history lhh
                       JOIN
                           shared.location_set_version_active lsv USING (location_set_version_id)
                       WHERE
                           lhh.location_set_id = {location_set_id} AND
                           lsv.gbd_round_id = {gbd_round_id};
                       """.format(location_set_id=self.location_set_id,
                                  gbd_round_id=self.gbd_round_id)
        location_hierarchy_history = db_connect.query(sql_query,
                                                      conn_def=self.conn_def)
        location_hierarchy_history.drop_duplicates(inplace=True)
        return location_hierarchy_history
Esempio n. 5
0
def read_input_model_data(global_model_version_id, developed_model_version_id,
                          conn_def):
    logger.info("Reading input model data.")
    # Read input model data from SQL
    sql_query = """SELECT
                     mv.model_version_id,
                     mv.cause_id,
                     c.acause,
                     mv.sex_id,
                     mv.age_start,
                     mv.age_end,
                     mv.gbd_round_id,
                     mv.refresh_id,
                     mv.decomp_step_id
                 FROM
                     cod.model_version mv
                 JOIN
                     shared.cause c USING (cause_id)
                 WHERE
                     model_version_id IN ({global_model_version_id},
                                          {developed_model_version_id})
                 """.format(
        global_model_version_id=global_model_version_id,
        developed_model_version_id=developed_model_version_id)
    feeder_model_data = db_connect.query(sql_query, conn_def=conn_def)
    return feeder_model_data
Esempio n. 6
0
def get_cause_dict(conn_def):
    """
    Returns information stored in the shared.cause table with a key of cause_id.
    This does not include hierarchy-specific information like age start or age
    end.

    :return: dict
        cause information
    """
    logger.info("Getting cause dictionary")
    df = db_connect.query('SELECT * FROM shared.cause;',
                          conn_def=conn_def).set_index('cause_id')
    df = df.to_dict('index')
    return df
Esempio n. 7
0
def get_locations(location_set_version_id, conn_def):
    """
    Get a DataFrame of locations and their hierarchy data from the
    specified location hierarchy version

    :param location_set_version_id: int
        the version id for the set of locations to retrieve
    :param conn_def: str
        name of string
    :return: dataframe
        pandas dataframe containing the location hierarchy information
    """
    logger.info("Get locations for location_set_version_id {}".format(
        location_set_version_id))
    call = "Call shared.view_location_hierarchy_history({loc_set_vid})".\
        format(loc_set_vid=location_set_version_id)
    return db_connect.query(call, conn_def=conn_def)
Esempio n. 8
0
def get_model_properties(model_version_id, conn_def):
    """
    Gets a dictionary of the model information for a specified model_version_id

    :param model_version_id: int
        specification for which model version to retrieve
    :param conn_def: str
        which full server name to use
    :return: dict
        dictionary where each entry is a column of the model_version table
    """
    logger.info("Getting model properties for {}".format(model_version_id))
    call = "SELECT * FROM cod.model_version WHERE model_version_id = {};".\
        format(model_version_id)
    model_data = db_connect.query(call, conn_def=conn_def)
    output = {}
    for c in model_data.columns:
        output[c] = model_data.loc[0, c]
    return output
Esempio n. 9
0
def check_model_attribute(model_version_id, model_attribute_name,
                          model_attribute, conn_def):
    """
    Checks that the specific model version is truly associated with the model attribute
    that is specified.
    :param model_version_id:
    :param model_attribute_name:
    :param model_attribute:
    :param conn_def:
    :return:
    """
    call = '''
        SELECT {} FROM cod.model_version WHERE model_version_id = {}
        '''.format(model_attribute_name, model_version_id)
    attribute_to_check = db_connect.query(
        call, conn_def)[model_attribute_name].iloc[0]
    if invalid_attribute(attribute_to_check, model_attribute_name,
                         model_attribute):
        raise ValueError(
            'The model attribute for {} in model_version {} does not match up!'
            .format(model_attribute_name, model_version_id))
Esempio n. 10
0
def acause_from_id(model_version_id, conn_def):
    """
    Given a valid model version id returns the acause associated with it.

    :param model_version_id: int
        valid model version id
    :param conn_def: str
    :return: str
        string representing an acause
    """
    call = '''
    SELECT
    acause
    FROM
    shared.cause
    WHERE
    cause_id = (SELECT cause_id
                FROM cod.model_version
                WHERE model_version_id = {})
    '''.format(model_version_id)
    acause = db_connect.query(call, conn_def)["acause"][0]
    return acause
Esempio n. 11
0
def get_location_hierarchy_version(conn_def, gbd_round_id, location_set_id=35):
    """
    Get the IDs associated with best version of a location set

    :param location_set_id: int
        the set of locations to use
    :param gbd_round_id: int
        the year of the gbd round to use
    :param conn_def: str
    :return location_set_version_id: int
        the version id for the location set and year that were input
    :return location_metadata_version_id: int
        the version id for the metadata of the location and year that were
        input
    """
    logger.info("Getting location hierarchy version.")
    sql_statement = """
        SELECT
            a.location_set_version_id, v.location_metadata_version_id
        FROM shared.location_set_version_active a
        JOIN shared.location_set_version v
            ON a.location_set_version_id = v.location_set_version_id
        WHERE a.location_set_id = {location_set_id}
            AND a.gbd_round_id = {gbd_round_id}
    """.format(location_set_id=location_set_id, gbd_round_id=gbd_round_id)
    result_df = db_connect.query(sql_statement, conn_def)

    if len(result_df) > 1:
        exception_text = """
            This returned more than 1 location_set_version_id ({returned_ids})
        """.format(returned_ids=", ".join(
            result_df['location_set_version_id'].drop_duplicates().to_list()))
        raise LookupError(exception_text)
    elif len(result_df) < 1:
        raise LookupError("No location set versions returned")
    return result_df.loc[0, 'location_set_version_id'], result_df.loc[
        0, 'location_metadata_version_id']
Esempio n. 12
0
def gbd_round_from_id(model_version_id, conn_def):
    call = '''
        SELECT gbd_round_id FROM cod.model_version WHERE model_version_id = {}
        '''.format(model_version_id)
    return db_connect.query(call, conn_def)["gbd_round_id"][0]