def generate_age_standardized(self): """ Standardizes data_draws using age group weights from database :param self: """ logger.info("Generating age-standardized draws.") # get age weights sql_query = """ SELECT age_group_id, age_group_weight_value FROM shared.age_group_weight agw WHERE gbd_round_id = {gbd_round_id}; """.format(gbd_round_id=self.gbd_round_id) age_standard_data = db_connect.query(sql_query, conn_def=self.conn_def) # prep draws for merge self.data_draws = self.data_draws.ix[ self.data_draws['age_group_id'] != 27] data = self.format_draws(self.data_draws) data = data.ix[data['age_group_id'] in self.AGES_DISAGGREGATED] # merge on age-weights data = pd.merge(data, age_standard_data, on='age_group_id') # make adjusted rate for c in self.data_columns: data[c] = data[c] * data['age_group_weight_value'] / data[ self.pop_column] # collapsing to generate ASR data['age_group_id'] = 27 data = data.groupby(self.index_columns).sum().reset_index() # merge with original data self.data_draws = pd.concat([self.data_draws, data])
def get_model_details(self): """ Gets acause, sex_id, and user from database and stores them in self :param self: """ logger.info("Getting model details.") sql_query = """ SELECT mv.model_version_id, mv.cause_id, c.acause, mv.sex_id, mv.inserted_by FROM cod.model_version mv JOIN shared.cause c USING (cause_id) WHERE model_version_id = {}; """.format(self.model_version_id) model_data = db_connect.query(sql_query, conn_def=self.conn_def) self.acause = model_data.ix[0, 'acause'] self.sex_id = model_data.ix[0, 'sex_id'] self.user = model_data.ix[0, 'inserted_by']
def get_cause_hierarchy_version(conn_def, gbd_round_id, cause_set_id=3): """ Get the IDs associated with best version of a cause set :param cause_set_id: int the set of causes to use :param gbd_round_id: int :param conn_def: str """ logger.info("Getting cause hierarchy version.") sql_statement = """ SELECT a.cause_set_version_id, v.cause_metadata_version_id FROM shared.cause_set_version_active a JOIN shared.cause_set_version v ON a.cause_set_version_id = v.cause_set_version_id WHERE a.cause_set_id = {cause_set_id} AND a.gbd_round_id = {gbd_round_id} """.format(cause_set_id=cause_set_id, gbd_round_id=gbd_round_id) result_df = db_connect.query(sql_statement, conn_def) # make sure exactly one cause set version is returned if len(result_df) > 1: exception_text = """ This returned more than 1 cause_set_version_id ({returned_ids}) """.format(returned_ids=", ".join( str(result_df['cause_set_version_id'].drop_duplicates().tolist()))) raise LookupError(exception_text) elif len(result_df) < 1: raise LookupError("No cause set versions returned") return result_df.loc[0, 'cause_set_version_id'], \ result_df.loc[0, 'cause_metadata_version_id']
def get_location_hierarchy(self): """ Reads and returns location hierarchy information from SQL :param self: :return: dataframe pandas dataframe with location hierarchy information from database """ logger.info("Getting location hierarchy.") sql_query = """SELECT location_id, level, parent_id, is_estimate FROM shared.location_hierarchy_history lhh JOIN shared.location_set_version_active lsv USING (location_set_version_id) WHERE lhh.location_set_id = {location_set_id} AND lsv.gbd_round_id = {gbd_round_id}; """.format(location_set_id=self.location_set_id, gbd_round_id=self.gbd_round_id) location_hierarchy_history = db_connect.query(sql_query, conn_def=self.conn_def) location_hierarchy_history.drop_duplicates(inplace=True) return location_hierarchy_history
def read_input_model_data(global_model_version_id, developed_model_version_id, conn_def): logger.info("Reading input model data.") # Read input model data from SQL sql_query = """SELECT mv.model_version_id, mv.cause_id, c.acause, mv.sex_id, mv.age_start, mv.age_end, mv.gbd_round_id, mv.refresh_id, mv.decomp_step_id FROM cod.model_version mv JOIN shared.cause c USING (cause_id) WHERE model_version_id IN ({global_model_version_id}, {developed_model_version_id}) """.format( global_model_version_id=global_model_version_id, developed_model_version_id=developed_model_version_id) feeder_model_data = db_connect.query(sql_query, conn_def=conn_def) return feeder_model_data
def get_cause_dict(conn_def): """ Returns information stored in the shared.cause table with a key of cause_id. This does not include hierarchy-specific information like age start or age end. :return: dict cause information """ logger.info("Getting cause dictionary") df = db_connect.query('SELECT * FROM shared.cause;', conn_def=conn_def).set_index('cause_id') df = df.to_dict('index') return df
def get_locations(location_set_version_id, conn_def): """ Get a DataFrame of locations and their hierarchy data from the specified location hierarchy version :param location_set_version_id: int the version id for the set of locations to retrieve :param conn_def: str name of string :return: dataframe pandas dataframe containing the location hierarchy information """ logger.info("Get locations for location_set_version_id {}".format( location_set_version_id)) call = "Call shared.view_location_hierarchy_history({loc_set_vid})".\ format(loc_set_vid=location_set_version_id) return db_connect.query(call, conn_def=conn_def)
def get_model_properties(model_version_id, conn_def): """ Gets a dictionary of the model information for a specified model_version_id :param model_version_id: int specification for which model version to retrieve :param conn_def: str which full server name to use :return: dict dictionary where each entry is a column of the model_version table """ logger.info("Getting model properties for {}".format(model_version_id)) call = "SELECT * FROM cod.model_version WHERE model_version_id = {};".\ format(model_version_id) model_data = db_connect.query(call, conn_def=conn_def) output = {} for c in model_data.columns: output[c] = model_data.loc[0, c] return output
def check_model_attribute(model_version_id, model_attribute_name, model_attribute, conn_def): """ Checks that the specific model version is truly associated with the model attribute that is specified. :param model_version_id: :param model_attribute_name: :param model_attribute: :param conn_def: :return: """ call = ''' SELECT {} FROM cod.model_version WHERE model_version_id = {} '''.format(model_attribute_name, model_version_id) attribute_to_check = db_connect.query( call, conn_def)[model_attribute_name].iloc[0] if invalid_attribute(attribute_to_check, model_attribute_name, model_attribute): raise ValueError( 'The model attribute for {} in model_version {} does not match up!' .format(model_attribute_name, model_version_id))
def acause_from_id(model_version_id, conn_def): """ Given a valid model version id returns the acause associated with it. :param model_version_id: int valid model version id :param conn_def: str :return: str string representing an acause """ call = ''' SELECT acause FROM shared.cause WHERE cause_id = (SELECT cause_id FROM cod.model_version WHERE model_version_id = {}) '''.format(model_version_id) acause = db_connect.query(call, conn_def)["acause"][0] return acause
def get_location_hierarchy_version(conn_def, gbd_round_id, location_set_id=35): """ Get the IDs associated with best version of a location set :param location_set_id: int the set of locations to use :param gbd_round_id: int the year of the gbd round to use :param conn_def: str :return location_set_version_id: int the version id for the location set and year that were input :return location_metadata_version_id: int the version id for the metadata of the location and year that were input """ logger.info("Getting location hierarchy version.") sql_statement = """ SELECT a.location_set_version_id, v.location_metadata_version_id FROM shared.location_set_version_active a JOIN shared.location_set_version v ON a.location_set_version_id = v.location_set_version_id WHERE a.location_set_id = {location_set_id} AND a.gbd_round_id = {gbd_round_id} """.format(location_set_id=location_set_id, gbd_round_id=gbd_round_id) result_df = db_connect.query(sql_statement, conn_def) if len(result_df) > 1: exception_text = """ This returned more than 1 location_set_version_id ({returned_ids}) """.format(returned_ids=", ".join( result_df['location_set_version_id'].drop_duplicates().to_list())) raise LookupError(exception_text) elif len(result_df) < 1: raise LookupError("No location set versions returned") return result_df.loc[0, 'location_set_version_id'], result_df.loc[ 0, 'location_metadata_version_id']
def gbd_round_from_id(model_version_id, conn_def): call = ''' SELECT gbd_round_id FROM cod.model_version WHERE model_version_id = {} '''.format(model_version_id) return db_connect.query(call, conn_def)["gbd_round_id"][0]