def generate_age_standardized(self): """ Standardizes data_draws using age group weights from database :param self: """ # get age weights sql_query = """ SELECT age_group_id, age_group_weight_value FROM shared.age_group_weight agw WHERE gbd_round_id = {gbd_round_id}; """.format(gbd_round_id=self.gbd_round_id) age_standard_data = run_query(sql_query, server=self.server) # prep draws for merge self.data_draws = self.data_draws.ix[ self.data_draws['age_group_id'] != 27] data = self.format_draws(self.data_draws) data = data.ix[data['age_group_id'] in AGES_DISAGGREGATED] # merge on age-weights data = pd.merge(data, age_standard_data, on='age_group_id') # make adjusted rate for c in self.data_columns: data[c] = data[c] * data['age_group_weight_value'] / \ data[self.pop_column] # collapsing to generate ASR data['age_group_id'] = 27 data = data.groupby(self.index_columns).sum().reset_index() # merge with original data self.data_draws = pd.concat([self.data_draws, data])
def generate_age_standardized(self): print "Getting age-weights" sql_query = """SELECT age_group_id, age_group_weight_value FROM shared.age_group_weight agw JOIN shared.gbd_round USING (gbd_round_id) WHERE gbd_round = 2015;""" age_standard_data = run_query(sql_query) print "Prepping draws for merge" self.data_draws = self.data_draws.ix[self.data_draws['age_group_id']!=27] data = self.format_draws(self, self.data_draws) data = data.ix[(data['age_group_id']>=2)&(data['age_group_id']<=21)] print "Merging on age-weights" data = pd.merge(data, age_standard_data, on='age_group_id') print "Making adjusted rate" for c in self.data_columns: data[c] = data[c] * data['age_group_weight_value'] / data[self.pop_column] print "Collapsing to generate ASR" data['age_group_id'] = 27 data = data.groupby(self.index_columns).sum().reset_index() print "Merging with original data" self.data_draws = pd.concat([self.data_draws, data]) print "Age-standardized rates generated!"
def get_location_hierarchy(self): """ Reads and returns location hierarchy information from SQL :param self: :return: dataframe pandas dataframe with location hierarchy information from database """ sql_query = """SELECT location_id, level, parent_id, is_estimate FROM shared.location_hierarchy_history lhh JOIN shared.location_set_version_active lsv USING (location_set_version_id) WHERE lhh.location_set_id = {location_set_id} AND lsv.gbd_round_id = {gbd_round_id}; """.format(location_set_id=self.location_set_id, gbd_round_id=self.gbd_round_id) location_hierarchy_history = run_query(sql_query, server=self.server) location_hierarchy_history.drop_duplicates(inplace=True) return location_hierarchy_history
def get_model_properties(model_version_id): model_data = run_query( "SELECT * FROM cod.model_version WHERE model_version_id = {};".format( model_version_id), server=server_name) output = {} for c in model_data.columns: output[c] = model_data.ix[0, c] return output
def get_locations(location_set_version_id): """ Get a DataFrame of locations and their hierarchy data from the specified location hierarchy version :param location_set_version_id: int the version id for the set of locations to retrieve :return: dataframe pandas dataframe containing the location hierarchy information """ call = "Call shared.view_location_hierarchy_history({loc_set_vid})".\ format(loc_set_vid=location_set_version_id) return run_query(call, server=server_name)
def get_cause_dict(): """ Return cause information in dictionary form Will return information stored in the shared.cause table with a key of cause_id. This does not include hierarchy-specific information like age start or age end """ cause_data = run_query('SELECT * FROM shared.cause;', server=server_name).set_index('cause_id') output = {} for i in cause_data.index: output[i] = {} for c in cause_data.columns: output[i][c] = cause_data.ix[i, c] return output
def get_model_properties(model_version_id): """ Gets a dictionary of the model information for a specified model_version_id :param model_version_id: int specification for which model version to retrieve :return: dict dictionary where each entry is a column of the model_version table """ call = "SELECT * FROM cod.model_version WHERE model_version_id = {};".\ format(model_version_id) model_data = run_query(call, server=server_name) output = {} for c in model_data.columns: output[c] = model_data.ix[0, c] return output
def get_location_hierarchy(self): sql_query = """SELECT location_id, level, parent_id, most_detailed FROM shared.location_hierarchy_history lhh JOIN shared.location_set_version lsv USING (location_set_version_id) WHERE lhh.location_set_id = {location_set_id} AND lsv.gbd_round = 2015 AND lsv.end_date IS NULL;""".format(location_set_id=self.location_set_id) location_hierarchy_history = run_query(sql_query) return location_hierarchy_history
def get_model_details(self): sql_query = """SELECT mv.model_version_id, mv.cause_id, c.acause, mv.sex_id, mv.inserted_by FROM cod.model_version mv JOIN shared.cause c USING (cause_id) WHERE model_version_id = {};""".format(self.model_version_id) model_data = run_query(sql_query) self.acause = model_data.ix[0, 'acause'] self.sex_id = model_data.ix[0, 'sex_id'] self.user = model_data.ix[0, 'inserted_by']
def get_cause_hierarchy_version(cause_set_id=2, gbd_round_id=4, server='SERVER'): """ Get the IDs associated with best version of a cause set :param cause_set_id: int the set of causes to use :param gbd_round: int the year of the gbd_round to use :param server: str db server to use :return cause_set_version_id: int the version id for the cause_set and year that were input :return cause_metadata_version_id: in the version id for the metadata of the cause_set and year that were input """ print server sql_statement = """ SELECT a.cause_set_version_id, v.cause_metadata_version_id FROM shared.cause_set_version_active a JOIN shared.cause_set_version v ON a.cause_set_version_id = v.cause_set_version_id WHERE a.cause_set_id = {cause_set_id} AND a.gbd_round_id = {gbd_round_id} """.format(cause_set_id=cause_set_id, gbd_round_id=gbd_round_id) result_df = run_query(sql_statement, server) # make sure exactly one cause set version is returned if len(result_df) > 1: exception_text = """ This returned more than 1 cause_set_version_id ({returned_ids}) """.format(returned_ids=", ".join(str(result_df['cause_set_version_id'].\ drop_duplicates().tolist()))) raise LookupError(exception_text) elif len(result_df) < 1: raise LookupError("No cause set versions returned") return result_df.ix[0, 'cause_set_version_id'], \ result_df.ix[0, 'cause_metadata_version_id']
def get_cause_hierarchy_version(cause_set_id, gbd_round): """ Get the IDs associated with best version of a cause set This function will return the following integers: cause_set_version_id cause_metadata_version_id """ sql_statement = "SELECT cause_set_version_id, cause_metadata_version_id FROM shared.cause_set_version WHERE cause_set_id = {cause_set_id} AND gbd_round = {gbd_round} AND end_date IS NULL;".format( cause_set_id=cause_set_id, gbd_round=gbd_round) result_df = run_query(sql_statement) if len(result_df) > 1: exception_text = "This returned more than 1 cause_set_version_id ({returned_ids})".format( returened_ids=", ".join( result_df['cause_set_version_id'].drop_duplicates().to_list())) raise LookupError(exception_text) elif len(result_df) < 1: raise LookupError("No cause set versions returned") return result_df.ix[0, 'cause_set_version_id'], result_df.ix[ 0, 'cause_metadata_version_id']
def get_location_hierarchy_version(location_set_id, gbd_round): """ Get the IDs associated with best version of a location set This function will return the following variables: location_set_version_id location_metadata_version_id """ sql_statement = "SELECT location_set_version_id, location_metadata_version_id FROM shared.location_set_version WHERE location_set_id = {location_set_id} AND gbd_round = {gbd_round} AND end_date IS NULL;".format( location_set_id=location_set_id, gbd_round=gbd_round) result_df = run_query(sql_statement) if len(result_df) > 1: exception_text = "This returned more than 1 location_set_version_id ({returned_ids})".format( returened_ids=", ".join(result_df['location_set_version_id']. drop_duplicates().to_list())) raise LookupError(exception_text) elif len(result_df) < 1: raise LookupError("No location set versions returned") return result_df.ix[0, 'location_set_version_id'], result_df.ix[ 0, 'location_metadata_version_id']
def get_model_details(self): """ Gets acause, sex_id, and user from database and stores them in self """ sql_query = """ SELECT mv.model_version_id, mv.cause_id, c.acause, mv.sex_id, mv.inserted_by FROM cod.model_version mv JOIN shared.cause c USING (cause_id) WHERE model_version_id = {}; """.format(self.model_version_id) model_data = run_query(sql_query, server=self.server) self.acause = model_data.ix[0, 'acause'] self.sex_id = model_data.ix[0, 'sex_id'] self.user = model_data.ix[0, 'inserted_by']
def get_location_hierarchy_version(location_set_id=35, gbd_round_id=4, server='SERVER'): """ Get the IDs associated with best version of a location set :param location_set_id: int the set of locations to use :param gbd_round: int the year of the gbd round to use :param server: str db server to use - either :return location_set_version_id: int the version id for the location set and year that were input :return location_metadata_version_id: int the version id for the metadata of the location and year that were input """ sql_statement = """ SELECT a.location_set_version_id, v.location_metadata_version_id FROM shared.location_set_version_active a JOIN shared.location_set_version v ON a.location_set_version_id = v.location_set_version_id WHERE a.location_set_id = {location_set_id} AND a.gbd_round_id = {gbd_round_id} """.format(location_set_id=location_set_id, gbd_round_id=gbd_round_id) result_df = run_query(sql_statement, server) if len(result_df) > 1: exception_text = """ This returned more than 1 location_set_version_id ({returned_ids}) """.format(returned_ids=", ".join(result_df['location_set_version_id'].\ drop_duplicates().to_list())) raise LookupError(exception_text) elif len(result_df) < 1: raise LookupError("No location set versions returned") return result_df.ix[0, 'location_set_version_id'], \ result_df.ix[0, 'location_metadata_version_id']
def get_locations(location_set_version_id): """ Get a DataFrame of the specified location hierarchy version """ return run_query( "Call shared.view_location_hierarchy_history({location_set_version_id})" .format(location_set_version_id=location_set_version_id), server=server_name)
mv.cause_id, c.acause, mv.sex_id, mv.age_start, mv.age_end FROM cod.model_version mv JOIN shared.cause c USING (cause_id) WHERE model_version_id IN ({global_model_version_id}, {developed_model_version_id}) """.format( global_model_version_id=global_model_version_id, developed_model_version_id=developed_model_version_id) feeder_model_data = run_query(sql_query, server=server_name) # Isolate the variables that have been read in as a dataframe sex_id = feeder_model_data['sex_id'].drop_duplicates()[0] cause_id = feeder_model_data['cause_id'].drop_duplicates()[0] acause = feeder_model_data['acause'].drop_duplicates()[0] age_start = feeder_model_data['age_start'].drop_duplicates()[0] age_end = feeder_model_data['age_end'].drop_duplicates()[0] # Create hybrid model entry model_version_metadata = sql.MetaData() engine = sql.create_engine('DATABASE') model_version_table = sql.Table('model_version', model_version_metadata, autoload=True,