def add_robots(layout): # add the robot buttons rows = db.query(con, 'SELECT button, robot, method, method_args FROM robots;') for button, robot, method, method_args in rows: layout.append( [sg.Button(button, key=f"robot-{robot}.{method}.{method_args}")]) return layout
def get_age_group_set(age_group_set_id): q = """ SELECT age_group_id, age_group_years_start, age_group_years_end FROM shared.age_group_set_list JOIN shared.age_group USING (age_group_id) WHERE age_group_set_id = {age_group_set_id} """.format(age_group_set_id=age_group_set_id) age_df = db_tools.query(q, database="shared") return age_df
def assign_row_nums(df, envr="prod"): """Fills in missing row_nums in input dataframe Args: df (object): pandas dataframe object of input data engine (object): ihme_databases class instance with default engine set me_id (int): modelable_entity_id for the dataset in memory Returns: Returns a copy of the dataframe with the row_nums filled in, increment strarting from the max of the database for the given modelable_entity. """ me_id = df.modelable_entity_id.unique().item() query = """ select ifnull(max(row_num),0)+1 from epi.input_data where modelable_entity_id = {me_id} """.format(me_id=me_id) newMaxRowNum = db_tools.query(query, database="epi", envr=envr ).iloc[0, 0] # fill in new row_nums that are currently null nullCond = df.row_num.isnull() lengthMissingRowNums = len(df[nullCond]) if lengthMissingRowNums == 0: return df else: df.ix[nullCond, 'row_num'] = range( newMaxRowNum, lengthMissingRowNums + newMaxRowNum ) # check if row nums assigned properly assert not any(df.row_num.duplicated()), ''' Duplicate row numbers assigned''' assert max(df.row_num) == ( lengthMissingRowNums + newMaxRowNum - 1 ), 'Row numbers not assigned correctly' return df
def construct_template(df, id_var): """construct template to map adjusted data to codcorrect data""" plate = df.copy(deep=True) # get years to round plate["year_id"] = (plate["year_end"] + plate["year_start"])/2 plate["year_id"] = plate.year_id.apply(myround, args=(5,)) plate["cross"] = 1 # get ages to ids ages_query = """ SELECT age_group_id, age_group_years_start, age_group_years_end FROM shared.age_group WHERE age_group_id BETWEEN 2 AND 21 """ ages = db_tools.query(ages_query, database="shared") ages["cross"] = 1 # assign ids to spans plate = plate.merge(ages, on="cross") plate = plate.ix[ ( (plate.age_group_years_start > plate.age_start) & (plate.age_group_years_end < plate.age_end) ) | ( (plate.age_group_years_start < plate.age_end) & (plate.age_group_years_end > plate.age_start) ) | ( (plate.age_group_years_start == 0) & (plate.age_start == 0) )] return plate[[id_var, "year_id", "age_group_id", "sex_id", "location_id"]]
import db_tools cursor, conn = db_tools.conn() get_all_anno_data_sql = "SELECT TASK_NAME1 AS ThemeName, TASK_NAME2 as NewsID, PAGE_ID as NewsPageID, USER_NAME as UserName, CHOICE as AnnoCorr, REMARK1 as AnnoAdd, REMARK2 as AnnoDel, INSERT_TIME as InsertTime FROM theme_context_mark_task" insert_anno_rcd_into_new_table = "INSERT INTO theme_news_anno (ThemeName, ThemeVersionID, NewsID, NewsPageID, UserName, AnnoCorr, AnnoAdd, AnnoDel) VALUES ('%s', '0.1', '%s', %d, '%s', '%s', '%s', '%s')" cursor = db_tools.query(cursor, get_all_anno_data_sql) cursor_insert, conn_insert = db_tools.conn() row = db_tools.fetch_one_row(cursor) while row != None: ThemeName = row["ThemeName"] NewsID = row["NewsID"] NewsPageID = int(row["NewsPageID"]) UserName = row["UserName"] AnnoCorr = row["AnnoCorr"] AnnoAdd = row["AnnoAdd"] AnnoDel = row["AnnoDel"] InsertTime = row["InsertTime"] db_tools.insert( cursor_insert, conn_insert, insert_anno_rcd_into_new_table % (ThemeName, NewsID, NewsPageID, UserName, AnnoCorr, AnnoAdd, AnnoDel), ) row = db_tools.fetch_one_row(cursor) db_tools.close(cursor_insert) db_tools.close(cursor)
def depricated_df(self): """pull down all data that is not in the current dismod run. set the outlier status to 1.""" # get ids that we have me_id = make_sql_obj(self.proc_df.modelable_entity_id.unique().item()) calc_input_ids = self.proc_df.emr_calc_input_id.tolist() # query for stuff we don't have. set outlier to 1 on these data old_data_query = """ SELECT id.input_data_id, row_num, modelable_entity_id, location_id, sex_id, year_start, year_end, age_start, age_end, nid, underlying_nid, source_type_id, sampling_type_id, measure_id, representative_id, urbanicity_type_id, recall_type_id, recall_type_value, unit_type_id, unit_value_as_published, uncertainty_type_id, uncertainty_type_value, input_type_id, mean, upper, lower, standard_error, effective_sample_size, sample_size, cases, design_effect, 4 as outlier_type_id, NULL AS note_SR, NULL AS note_modeler, input_data_metadata_value as emr_calc_input_id FROM epi.input_data id JOIN epi.input_data_metadata idm ON id.input_data_id = idm.input_data_id AND input_data_metadata_type_id = 66 WHERE modelable_entity_id = {me_id} AND input_data_metadata_type_id = 66 AND id.last_updated_action != "DELETE" """.format(me_id=me_id) old_emr = db_tools.query(old_data_query, database="epi", envr=self.envr) old_emr["emr_calc_input_id"] = old_emr.emr_calc_input_id.astype(float) old_emr["emr_calc_input_id"] = old_emr.emr_calc_input_id.astype(int) old_emr = old_emr[ ~old_emr.emr_calc_input_id.isin(calc_input_ids)] old_emr = old_emr.drop(["input_data_id"], axis=1) return old_emr
def csmr(self): """pull csmr and aggreagate or duplicate to template""" # get template for merging adj_data_plate = self.construct_template(self.adj_data, id_var="input_data_key") if self.csmr_cause_id != -1: # possible demographics locs = make_sql_obj(adj_data_plate.location_id.tolist()) ages = make_sql_obj(adj_data_plate.age_group_id.tolist()) sexes = make_sql_obj(adj_data_plate.sex_id.tolist()) years = make_sql_obj(adj_data_plate.year_id.tolist()) # pull data query = """ SELECT co.location_id, co.year_id, co.age_group_id, co.sex_id, co.mean_death/pop_scaled AS mean_csmr, co.upper_death/pop_scaled AS upper_csmr, co.lower_death/pop_scaled AS lower_csmr FROM cod.output co JOIN cod.output_version cov ON cov.output_version_id = co.output_version_id JOIN mortality.output mo ON mo.location_id = co.location_id AND mo.age_group_id = co.age_group_id AND mo.sex_id = co.sex_id AND mo.year_id = co.year_id JOIN mortality.output_version mov ON mov.output_version_id = mo.output_version_id WHERE cov.output_version_id = {codcorrect_version_id} AND cov.best_end IS NULL AND mov.is_best = 1 AND mov.best_end IS NULL AND co.cause_id = {cause_id} AND co.location_id in ({locs}) AND co.age_group_id in ({ages}) AND co.year_id in ({years}) AND co.sex_id in({sexes}) """.format(locs=locs, ages=ages, sexes=sexes, years=years, cause_id=self.csmr_cause_id, codcorrect_version_id=self.codcorrect_version_id) df = db_tools.query(query, database="cod") elif self.csmr_cause_id == -1: # pull custom csmr data query = """ SELECT input_data_id, location_id, year_start, year_end, age_start, age_end, sex_id, mean as mean_csmr, lower as lower_csmr, upper as upper_csmr FROM epi.input_data id JOIN epi.model_version mv USING (modelable_entity_id) WHERE model_version_id = {mv} AND measure_id = 15 AND id.last_updated_action != "DELETE" """.format(mv=self.model_version_id) df = db_tools.query(query, database="epi") # get template for merging with adjusted data csmr_data_plate = self.construct_template(df, id_var="input_data_id") df = df[["input_data_id", "mean_csmr", "lower_csmr", "upper_csmr"]] df = df.merge(csmr_data_plate, how="inner", on="input_data_id") df.drop('input_data_id', axis=1, inplace=True) else: raise NoCSMRValues("no corresponding csmr values discovered") df = df.merge(adj_data_plate, how="inner", on=["year_id", "age_group_id", "sex_id", "location_id"]) # aggregate csmr by input_data_key df["se_csmr"] = df.apply( lambda x: se_from_ui(x["mean_csmr"], x["lower_csmr"], x["upper_csmr"], method="non-ratio"), axis=1) df = df[["input_data_key", "mean_csmr", "se_csmr"]] grouped = df.groupby(["input_data_key"]) df = grouped.apply( lambda x: aggregate_se(x["mean_csmr"], x["se_csmr"]) ).reset_index() df = df.rename(columns={"mean": "mean_csmr", "se": "se_csmr"}) df = df[(df["mean_csmr"] > 0) & (df["se_csmr"] != 0)] if len(df) == 0: raise NoCSMRValues("no corresponding csmr values discovered") return df
def acmr(self): # get template for merging adj_data_plate = self.construct_template(self.adj_data, id_var="input_data_key") # possible demographics locs = make_sql_obj(adj_data_plate.location_id.tolist()) ages = make_sql_obj(adj_data_plate.age_group_id.tolist()) sexes = make_sql_obj(adj_data_plate.sex_id.tolist()) years = make_sql_obj(adj_data_plate.year_id.tolist()) # pull data query = """ SELECT co.location_id, co.year_id, co.age_group_id, co.sex_id, co.mean_death/pop_scaled AS mean_acmr, co.upper_death/pop_scaled AS upper_acmr, co.lower_death/pop_scaled AS lower_acmr FROM cod.output co JOIN cod.output_version cov ON cov.output_version_id = co.output_version_id JOIN mortality.output mo ON mo.location_id = co.location_id AND mo.age_group_id = co.age_group_id AND mo.sex_id = co.sex_id AND mo.year_id = co.year_id JOIN mortality.output_version mov ON mov.output_version_id = mo.output_version_id WHERE cov.output_version_id = {codcorrect_version_id} AND cov.best_end IS NULL AND mov.is_best = 1 AND mov.best_end IS NULL AND co.cause_id = 294 AND co.location_id in ({locs}) AND co.age_group_id in ({ages}) AND co.year_id in ({years}) AND co.sex_id in({sexes}) """.format(locs=locs, ages=ages, sexes=sexes, years=years, codcorrect_version_id=self.codcorrect_version_id) acmr = db_tools.query(query, database="cod") acmr = acmr.merge(adj_data_plate, how="inner", on=["year_id", "age_group_id", "sex_id", "location_id"]) # aggregate csmr by input_data_key acmr["se_acmr"] = acmr.apply( lambda x: se_from_ui(x["mean_acmr"], x["lower_acmr"], x["upper_acmr"], method="non-ratio"), axis=1) acmr = acmr[["input_data_key", "mean_acmr", "se_acmr"]] grouped = acmr.groupby(["input_data_key"]) acmr = grouped.apply( lambda x: aggregate_se(x["mean_acmr"], x["se_acmr"]) ).reset_index() acmr = acmr.rename(columns={"mean": "mean_acmr", "se": "se_acmr"}) acmr = acmr[(acmr["mean_acmr"] > 0) & (acmr["se_acmr"] != 0)] return acmr
def adj_data(self, value): """clean adjusted data in preparation for claculation""" # keep unique value = value[["input_data_key", "mean", "lower", "upper"]] value.drop_duplicates(inplace=True) # query metadata value["input_data_key"] = value["input_data_key"].astype(int) id_keys = make_sql_obj(value.input_data_key.tolist()) demo_query = """ SELECT input_data_key, input_data_id, modelable_entity_id, location_id, sex_id, year_start, year_end, age_start, age_end, measure_id FROM epi.input_data_audit ida WHERE input_data_key in ({id_keys}) """.format(id_keys=id_keys) wrows = db_tools.query(demo_query, database="epi", envr=self.envr) df_wrows = value.merge(wrows, on=["input_data_key"], how="left") # subset df_wrows = df_wrows.ix[df_wrows.sex_id != 3] # get rid of both sex df_wrows = df_wrows.ix[df_wrows["mean"] > 0] # get rid of 0 means df_wrows = df_wrows.ix[ ((df_wrows.age_end - df_wrows.age_start) <= 15) | # > 20 age group (df_wrows.age_start >= 80)] # or terminal df_wrows = df_wrows.ix[ (df_wrows["mean"].notnull()) & (df_wrows["lower"].notnull()) & (df_wrows["upper"].notnull())] # mean upper and lower not null df_wrows = df_wrows.ix[df_wrows.measure_id.isin([5, 6])] if len(df_wrows) == 0: raise NoNonZeroValues("no non-zero values for incidence") # query for previously calculated emr row numbers me_id = df_wrows.modelable_entity_id.unique().item() input_data_ids = make_sql_obj(df_wrows.input_data_id.tolist()) metadata_query = """ SELECT id.row_num as emr_row_num, input_data_metadata_value as input_data_id FROM epi.input_data id JOIN epi.input_data_metadata idm ON id.input_data_id = idm.input_data_id WHERE modelable_entity_id = {me_id} AND input_data_metadata_type_id = 66 AND input_data_metadata_value in ({input_data_ids}) AND id.last_updated_action != "DELETE" """.format(me_id=me_id, input_data_ids=input_data_ids) old_emr = db_tools.query(metadata_query, database="epi", envr=self.envr) old_emr["input_data_id"] = old_emr.input_data_id.astype(float) old_emr["input_data_id"] = old_emr.input_data_id.astype(int) df_wmetadata = df_wrows.merge(old_emr, on=["input_data_id"], how="left") # compute standard error df_wmetadata["se"] = df_wmetadata.apply( lambda x: se_from_ui(x["mean"], x["lower"], x["upper"], method="non-ratio"), axis=1) df = df_wmetadata.rename(columns={"mean": "mean_", "se": "se_"}) df = df.drop(["upper", "lower"], axis=1) df = df[(df["mean_"] > 0) & (df["se_"] != 0)] # set result on self self._adj_data = df
def export_for_upload(self, df, modelable_entity_id, nid): # auto fill df["row_num"] = np.nan df["parent_id"] = np.nan df["data_sheet_file_path"] = np.nan df["input_type"] = np.nan df["modelable_entity_id"] = modelable_entity_id df["underlying_nid"] = np.nan df["nid"] = nid df["underlying_field_citation_value"] = np.nan df["field_citation_value"] = np.nan df["page_num"] = np.nan df["table_num"] = np.nan df["source_type"] = "Mixed or estimation" df["ihme_loc_id"] = np.nan df["smaller_site_unit"] = np.nan df["site_memo"] = np.nan df["sex_issue"] = 0 df["year_start"] = df["year_id"] df["year_end"] = df["year_id"] df["year_issue"] = 0 df["age_issue"] = 0 df["age_demographer"] = np.nan df["measure"] = "incidence" df["standard_error"] = np.nan df["effective_sample_size"] = np.nan df["cases"] = np.nan df["sample_size"] = np.nan df["design_effect"] = np.nan df["unit_type"] = "Person" df["unit_value_as_published"] = 1 df["measure_issue"] = 0 df["measure_adjustment"] = np.nan df["uncertainty_type"] = "Confidence interval" df["uncertainty_type_value"] = 95 df["representative_name"] = "Not Set" df["urbanicity_type"] = "Unknown" df["recall_type"] = "Not Set" df["recall_type_value"] = np.nan df["sampling_type"] = np.nan df["response_rate"] = np.nan df["case_name"] = np.nan df["case_definition"] = np.nan df["case_diagnostics"] = np.nan df["note_modeler"] = np.nan df["note_SR"] = np.nan df["extractor"] = "strUser" df["is_outlier"] = 0 df["2013_data_id"] = np.nan df["specificity"] = np.nan df["group"] = np.nan df["group_review"] = np.nan # mapped # modelable_entity_name q = """ SELECT modelable_entity_name FROM epi.modelable_entity WHERE modelable_entity_id = {modelable_entity_id} """.format(modelable_entity_id=modelable_entity_id) me_name = db_tools.query( q, database="epi")["modelable_entity_name"].item() df["modelable_entity_name"] = me_name # location_name loc_df = getset.get_current_location_set() loc_df = loc_df.ix[ loc_df["most_detailed"] == 1, ["location_id", "location_name"] ] df = df.merge(loc_df, on="location_id", how="left") # sex q = "SELECT sex_id, sex FROM shared.sex" sex_df = db_tools.query(q, database="shared") df = df.merge(sex_df, on="sex_id", how="left") # age age_df = getset.get_age_group_set(1) age_df = age_df.rename(columns={"age_group_years_start": "age_start", "age_group_years_end": "age_end"}) df = df.merge(age_df, on="age_group_id", how="left") # export q = """ SELECT concat( "/home/j/WORK/04_epi/01_database/02_data/", acause, "/", modelable_entity_id, "/04_big_data" ) AS fpath FROM epi.modelable_entity join epi.modelable_entity_cause USING (modelable_entity_id) join shared.cause USING (cause_id) join epi.modelable_entity_type using (modelable_entity_type_id) where modelable_entity_cause.last_updated_action != 'DELETE' and modelable_entity_id = {modelable_entity_id} """.format(modelable_entity_id=modelable_entity_id) outdir = db_tools.query(q, database="epi")["fpath"].item() if not os.path.exists(outdir): os.makedirs(outdir) fname = os.path.join(outdir, "westrom_calc.xlsx") # export it df = df[export_cols] df.to_excel(fname, encoding="utf8", sheet_name="extraction") return fname