예제 #1
0
    def generate_age_standardized(self):
        """
        Standardizes data_draws using age group weights from database

        :param self:
        """
        # get age weights
        sql_query = """
                    SELECT
                        age_group_id,
                        age_group_weight_value
                    FROM
                        shared.age_group_weight agw
                    WHERE
                        gbd_round_id = {gbd_round_id};
                    """.format(gbd_round_id=self.gbd_round_id)
        age_standard_data = run_query(sql_query, server=self.server)
        # prep draws for merge
        self.data_draws = self.data_draws.ix[
            self.data_draws['age_group_id'] != 27]
        data = self.format_draws(self.data_draws)
        data = data.ix[data['age_group_id'] in AGES_DISAGGREGATED]
        # merge on age-weights
        data = pd.merge(data, age_standard_data, on='age_group_id')
        # make adjusted rate
        for c in self.data_columns:
            data[c] = data[c] * data['age_group_weight_value'] / \
                data[self.pop_column]
        # collapsing to generate ASR
        data['age_group_id'] = 27
        data = data.groupby(self.index_columns).sum().reset_index()
        # merge with original data
        self.data_draws = pd.concat([self.data_draws, data])
예제 #2
0
 def generate_age_standardized(self):
     print "Getting age-weights"
     sql_query = """SELECT
                        age_group_id,
                        age_group_weight_value
                    FROM
                        shared.age_group_weight agw
                    JOIN
                        shared.gbd_round USING (gbd_round_id)
                    WHERE
                        gbd_round = 2015;"""
     age_standard_data = run_query(sql_query)
     print "Prepping draws for merge"
     self.data_draws = self.data_draws.ix[self.data_draws['age_group_id']!=27]
     data = self.format_draws(self, self.data_draws)
     data = data.ix[(data['age_group_id']>=2)&(data['age_group_id']<=21)]
     print "Merging on age-weights"
     data = pd.merge(data,
                     age_standard_data,
                     on='age_group_id')
     print "Making adjusted rate"
     for c in self.data_columns:
         data[c] = data[c] * data['age_group_weight_value'] / data[self.pop_column]
     print "Collapsing to generate ASR"
     data['age_group_id'] = 27
     data = data.groupby(self.index_columns).sum().reset_index()
     print "Merging with original data"
     self.data_draws = pd.concat([self.data_draws, data])
     print "Age-standardized rates generated!"
예제 #3
0
    def get_location_hierarchy(self):
        """
        Reads and returns location hierarchy information from SQL

        :param self:
        :return: dataframe
            pandas dataframe with location hierarchy information from database
        """
        sql_query = """SELECT
                           location_id,
                           level,
                           parent_id,
                           is_estimate
                       FROM
                           shared.location_hierarchy_history lhh
                       JOIN
                           shared.location_set_version_active lsv USING (location_set_version_id)
                       WHERE
                           lhh.location_set_id = {location_set_id} AND
                           lsv.gbd_round_id = {gbd_round_id};
                       """.format(location_set_id=self.location_set_id,
                                  gbd_round_id=self.gbd_round_id)
        location_hierarchy_history = run_query(sql_query, server=self.server)
        location_hierarchy_history.drop_duplicates(inplace=True)
        return location_hierarchy_history
예제 #4
0
def get_model_properties(model_version_id):
    model_data = run_query(
        "SELECT * FROM cod.model_version WHERE model_version_id = {};".format(
            model_version_id),
        server=server_name)
    output = {}
    for c in model_data.columns:
        output[c] = model_data.ix[0, c]
    return output
예제 #5
0
def get_locations(location_set_version_id):
    """
    Get a DataFrame of locations and their hierarchy data from the
    specified location hierarchy version

    :param location_set_version_id: int
        the version id for the set of locations to retrieve
    :return: dataframe
        pandas dataframe containing the location hierarchy information
    """
    call = "Call shared.view_location_hierarchy_history({loc_set_vid})".\
        format(loc_set_vid=location_set_version_id)
    return run_query(call, server=server_name)
예제 #6
0
def get_cause_dict():
    """ Return cause information in dictionary form

    Will return information stored in the shared.cause table with a key of cause_id.
    This does not include hierarchy-specific information like age start or age end
    """
    cause_data = run_query('SELECT * FROM shared.cause;',
                           server=server_name).set_index('cause_id')
    output = {}
    for i in cause_data.index:
        output[i] = {}
        for c in cause_data.columns:
            output[i][c] = cause_data.ix[i, c]
    return output
예제 #7
0
def get_model_properties(model_version_id):
    """
    Gets a dictionary of the model information for a specified model_version_id

    :param model_version_id: int
        specification for which model version to retrieve
    :return: dict
        dictionary where each entry is a column of the model_version table
    """
    call = "SELECT * FROM cod.model_version WHERE model_version_id = {};".\
        format(model_version_id)
    model_data = run_query(call, server=server_name)
    output = {}
    for c in model_data.columns:
        output[c] = model_data.ix[0, c]
    return output
예제 #8
0
 def get_location_hierarchy(self):
     sql_query = """SELECT
                        location_id,
                        level,
                        parent_id,
                        most_detailed
                    FROM
                        shared.location_hierarchy_history lhh
                    JOIN
                        shared.location_set_version lsv USING (location_set_version_id)
                    WHERE
                        lhh.location_set_id = {location_set_id} AND
                        lsv.gbd_round = 2015 AND
                        lsv.end_date IS NULL;""".format(location_set_id=self.location_set_id)
     location_hierarchy_history = run_query(sql_query)
     return location_hierarchy_history
예제 #9
0
 def get_model_details(self):
     sql_query = """SELECT
                        mv.model_version_id,
                        mv.cause_id,
                        c.acause,
                        mv.sex_id,
                        mv.inserted_by
                    FROM
                        cod.model_version mv
                    JOIN
                        shared.cause c USING (cause_id)
                    WHERE
                        model_version_id = {};""".format(self.model_version_id)
     model_data = run_query(sql_query)
     self.acause = model_data.ix[0, 'acause']
     self.sex_id = model_data.ix[0, 'sex_id']
     self.user = model_data.ix[0, 'inserted_by']
예제 #10
0
def get_cause_hierarchy_version(cause_set_id=2,
                                gbd_round_id=4,
                                server='SERVER'):
    """
    Get the IDs associated with best version of a cause set

    :param cause_set_id: int
        the set of causes to use
    :param gbd_round: int
        the year of the gbd_round to use
    :param server: str
        db server to use
    :return cause_set_version_id: int
        the version id for the cause_set and year that were input
    :return cause_metadata_version_id: in
        the version id for the metadata of the cause_set and year that were
        input
    """
    print server
    sql_statement = """
        SELECT
	      a.cause_set_version_id, v.cause_metadata_version_id
        FROM shared.cause_set_version_active a
        JOIN shared.cause_set_version v
          ON a.cause_set_version_id = v.cause_set_version_id
        WHERE a.cause_set_id = {cause_set_id}
          AND a.gbd_round_id = {gbd_round_id}
        """.format(cause_set_id=cause_set_id, gbd_round_id=gbd_round_id)
    result_df = run_query(sql_statement, server)

    # make sure exactly one cause set version is returned
    if len(result_df) > 1:
        exception_text = """
            This returned more than 1 cause_set_version_id ({returned_ids})
        """.format(returned_ids=", ".join(str(result_df['cause_set_version_id'].\
                drop_duplicates().tolist())))
        raise LookupError(exception_text)
    elif len(result_df) < 1:
        raise LookupError("No cause set versions returned")

    return result_df.ix[0, 'cause_set_version_id'], \
            result_df.ix[0, 'cause_metadata_version_id']
예제 #11
0
def get_cause_hierarchy_version(cause_set_id, gbd_round):
    """ Get the IDs associated with best version of a cause set
    
    This function will return the following integers:
        cause_set_version_id
        cause_metadata_version_id
    """
    sql_statement = "SELECT cause_set_version_id, cause_metadata_version_id FROM shared.cause_set_version WHERE cause_set_id = {cause_set_id} AND gbd_round = {gbd_round} AND end_date IS NULL;".format(
        cause_set_id=cause_set_id, gbd_round=gbd_round)
    result_df = run_query(sql_statement)

    if len(result_df) > 1:
        exception_text = "This returned more than 1 cause_set_version_id ({returned_ids})".format(
            returened_ids=", ".join(
                result_df['cause_set_version_id'].drop_duplicates().to_list()))
        raise LookupError(exception_text)
    elif len(result_df) < 1:
        raise LookupError("No cause set versions returned")
    return result_df.ix[0, 'cause_set_version_id'], result_df.ix[
        0, 'cause_metadata_version_id']
예제 #12
0
def get_location_hierarchy_version(location_set_id, gbd_round):
    """ Get the IDs associated with best version of a location set
    
    This function will return the following variables:
        location_set_version_id
        location_metadata_version_id
    """
    sql_statement = "SELECT location_set_version_id, location_metadata_version_id FROM shared.location_set_version WHERE location_set_id = {location_set_id} AND gbd_round = {gbd_round} AND end_date IS NULL;".format(
        location_set_id=location_set_id, gbd_round=gbd_round)
    result_df = run_query(sql_statement)

    if len(result_df) > 1:
        exception_text = "This returned more than 1 location_set_version_id ({returned_ids})".format(
            returened_ids=", ".join(result_df['location_set_version_id'].
                                    drop_duplicates().to_list()))
        raise LookupError(exception_text)
    elif len(result_df) < 1:
        raise LookupError("No location set versions returned")
    return result_df.ix[0, 'location_set_version_id'], result_df.ix[
        0, 'location_metadata_version_id']
예제 #13
0
 def get_model_details(self):
     """
     Gets acause, sex_id, and user from database and stores them in self
     """
     sql_query = """
                 SELECT
                     mv.model_version_id,
                     mv.cause_id,
                     c.acause,
                     mv.sex_id,
                     mv.inserted_by
                 FROM
                     cod.model_version mv
                 JOIN
                     shared.cause c USING (cause_id)
                 WHERE
                     model_version_id = {};
                 """.format(self.model_version_id)
     model_data = run_query(sql_query, server=self.server)
     self.acause = model_data.ix[0, 'acause']
     self.sex_id = model_data.ix[0, 'sex_id']
     self.user = model_data.ix[0, 'inserted_by']
예제 #14
0
def get_location_hierarchy_version(location_set_id=35,
                                   gbd_round_id=4,
                                   server='SERVER'):
    """
    Get the IDs associated with best version of a location set

    :param location_set_id: int
        the set of locations to use
    :param gbd_round: int
        the year of the gbd round to use
    :param server: str
        db server to use - either
    :return location_set_version_id: int
        the version id for the location set and year that were input
    :return location_metadata_version_id: int
        the version id for the metadata of the location and year that were
        input
    """
    sql_statement = """
        SELECT
    	    a.location_set_version_id, v.location_metadata_version_id
        FROM shared.location_set_version_active a
        JOIN shared.location_set_version v
            ON a.location_set_version_id = v.location_set_version_id
        WHERE a.location_set_id = {location_set_id}
            AND a.gbd_round_id = {gbd_round_id}
    """.format(location_set_id=location_set_id, gbd_round_id=gbd_round_id)
    result_df = run_query(sql_statement, server)

    if len(result_df) > 1:
        exception_text = """
            This returned more than 1 location_set_version_id ({returned_ids})
        """.format(returned_ids=", ".join(result_df['location_set_version_id'].\
                drop_duplicates().to_list()))
        raise LookupError(exception_text)
    elif len(result_df) < 1:
        raise LookupError("No location set versions returned")
    return result_df.ix[0, 'location_set_version_id'], \
            result_df.ix[0, 'location_metadata_version_id']
예제 #15
0
def get_locations(location_set_version_id):
    """ Get a DataFrame of the specified location hierarchy version """
    return run_query(
        "Call shared.view_location_hierarchy_history({location_set_version_id})"
        .format(location_set_version_id=location_set_version_id),
        server=server_name)
예제 #16
0
                   mv.cause_id,
                   c.acause,
                   mv.sex_id,
                   mv.age_start,
                   mv.age_end
               FROM
                   cod.model_version mv
               JOIN
                   shared.cause c USING (cause_id)
               WHERE
                   model_version_id IN ({global_model_version_id},
                                        {developed_model_version_id})
               """.format(
    global_model_version_id=global_model_version_id,
    developed_model_version_id=developed_model_version_id)
feeder_model_data = run_query(sql_query, server=server_name)

# Isolate the variables that have been read in as a dataframe
sex_id = feeder_model_data['sex_id'].drop_duplicates()[0]
cause_id = feeder_model_data['cause_id'].drop_duplicates()[0]
acause = feeder_model_data['acause'].drop_duplicates()[0]
age_start = feeder_model_data['age_start'].drop_duplicates()[0]
age_end = feeder_model_data['age_end'].drop_duplicates()[0]

# Create hybrid model entry
model_version_metadata = sql.MetaData()

engine = sql.create_engine('DATABASE')
model_version_table = sql.Table('model_version',
                                model_version_metadata,
                                autoload=True,