def test_insert_new_row(covariate_db, cov_conn_def): """Tests the addition of a single row to the covariate.model table.""" initialize_and_fill_covariate_db(covariate_db, cov_conn_def) sesh = get_session(cov_conn_def) insert_q = """ INSERT INTO covariate.model (model_version_id, year_id, location_id, sex_id, age_group_id, mean_value, upper_value, lower_value) VALUES ('1', '2017', '1', '2', '22', '10.4', '11.2', '9.6'), ('1', '2016', '1', '2', '22', '10.5', '11.4', '9.4'), ('1', '2015', '1', '2', '22', '10.6', '11.6', '9.2'), ('1', '2014', '1', '2', '22', '10.7', '11.8', '9'), ('1', '2013', '1', '2', '22', '10.8', '12', '8.8'); """ exec_query(insert_q, session=sesh) sesh.commit() select_q = """ SELECT * FROM covariate.model WHERE model_version_id = 1; """ res = query_2_df(select_q, session=sesh) assert not res.empty # We added five entries associated with model_version_id 1 assert len(res) == 5 # We have to close the session or the next test will hang sesh.close()
def exec_query(call, conn_def): """ Executes a raw SQL statement with db_tools """ session = ezfuncs.get_session(conn_def) call = clean_string(call) return query_tools.exec_query(call, session=session, close=True)
def write_data(df, db, table, conn_def): """ Writes data in bulk as a pd.DataFrame to a specified table/database. """ session = ezfuncs.get_session(conn_def) loaders.Inserts(table=table, schema=db, insert_df=df).insert(session, commit=True)
def test_insert_defaults(covariate_db, cov_conn_def): """Insert a new row into the covariate.model_version without a best_start, best_end, best_user, best_description column to check the default value.""" covariate_db.add_conn_def(cov_conn_def) sesh = get_session(cov_conn_def) insert_q = """ INSERT INTO covariate.model_version (model_version_id, covariate_id, description, code_version, status, is_best, gbd_round_id) VALUES ('1', '1', 'testing defaults', 'version 1', '1', '0', '5'); """ exec_query(insert_q, session=sesh) sesh.commit() select_q = """ SELECT * FROM covariate.model_version WHERE model_version_id = 1; """ res = query_2_df(select_q, session=sesh) assert not res.empty assert not res.at[0, 'best_user'] assert not res.at[0, 'best_description'] assert not res.at[0, 'best_start'] assert not res.at[0, 'best_end'] # we need to close the session or the next test will hang sesh.close()
def generate_new_version(cls): session = ezfuncs.get_session(conn_def="como-epi") try: q = """ INSERT INTO epi.output_version ( username, description, code_version, status, is_best ) VALUES( '{user}', 'Central COMO run', '{code_version}', 0, 0)""".format(user=getpass.getuser(), code_version=4) session.execute(q) session.flush() como_version_id = session.execute("select LAST_INSERT_ID()" ).scalar() session.commit() except Exception: session.rollback() raise return como_version_id
def run_parent_adjust(cause_id, children, mvt, vers, detail, sex): ########################################## #Add oldCorrect model and its children #into the cod.model_version_relation table #for codCorrect ########################################## session = get_session('cod') parent = pull_mvid(cause_id, mvt, sex=sex) assert parent, "No parent saved" parent = parent[0] if mvt == 9: desc = 'oldC v%s hybrid of %s (%s)' % (vers, children, detail) elif mvt == 10: desc = 'oldC v%s target (%s)' % (vers, detail) try: for child in children: query = """INSERT INTO cod.model_version_relation (parent_id, child_id, model_version_relation_note) VALUES ({parent}, {child}, "{desc}") """.format(parent=parent, child=child, desc=desc) exec_query(query, session=session) session.commit() except: session.rollback() session.close() raise
def upload(self): """ Attempts to infile any .csv files found in the /{output_dir}/{version_id} directory to gbd.output_le_decomp_v{self.process_version_id}. Any .csv files not to be uploaded should be stashed in subfolders. """ if not self._process_version_id: raise RuntimeError("A process version must be created " "before results can be uploaded.") table_name = list( GBDProcessVersion(self._process_version_id, env=self.env).tables)[0] session = get_session(conn_def=self.conn_def) infiler = Infiles(table=table_name, schema=self._schema, session=session) print(f"Starting upload at {datetime.now().time()}") infiler.indir(path=self.write_dir, partial_commit=True, sort_files=True) session.close() print(f"Finished upload at {datetime.now().time()}")
def create_new_output_version_row(output_version_id, description, envelope_version_id, conn_def, status=2): logging.info("database.create_new_output_version_row") if not is_existing_output_version(output_version_id, conn_def): # Create a row to input now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") ins_df = pd.DataFrame( { 'output_version_id': output_version_id, 'description': description, 'code_version': 5, 'env_version': envelope_version_id, 'status': status, 'is_best': 0, 'date_inserted': now, 'inserted_by': getpass.getuser(), 'last_updated': now, 'last_updated_by': getpass.getuser(), 'last_updated_action': "INSERT" }, columns=[ 'output_version_id', 'description', 'code_version', 'env_version', 'status', 'is_best', 'date_inserted', 'inserted_by', 'last_updated', 'last_updated_by', 'last_updated_action' ], index=[0]) # Insert row ins = Inserts(table='output_version', schema='cod', insert_df=ins_df) sesh = get_session(conn_def) ins.insert(sesh, commit=True)
def upload_to_db(table, db): """Upload a csv to the cod database.""" #determine schema (uploading to cod or cancer) upload_path = _get_upload_path(table) sesh = get_session(db) inf = Infiles(table=table, schema='cod', session=sesh) inf.infile(path=upload_path, with_replace=False, commit=True)
def upload_to_table(file_list, table_name, env, raise_on_failure): if env == DBEnv.PROD: conn_def = _ConnDefs.PROD.value elif env == DBEnv.DEV: conn_def = _ConnDefs.DEV.value sesh = get_session(conn_def=conn_def, connectable=True) infiler = Infiles(table=table_name, schema="gbd", session=sesh) logger.info("Beginning infile, time = {}".format(time.time())) for f in file_list: try: logger.info("infiling {}, time = {}".format(f, time.time())) # summaries are saved with col mean, need to upload to table as # val infiler.infile(f, with_replace=False, commit=True, rename_cols={'mean': 'val'}) except Exception: if raise_on_failure: raise tb = traceback.format_exc() logger.error("failed to infile {} with exception {};" "Skipping to next file".format(f, tb)) logger.info("Done infile, time = {}".format(time.time()))
def mark_gbd_best(output_version_id, process_version_id, gbd_conn_def): gbd_sesh = get_session(gbd_conn_def) update_gbd_cmd = """UPDATE gbd.gbd_process_version SET gbd_process_version_status_id = 1 WHERE gbd_process_version_id = {v} """.format(v=process_version_id) exec_query(update_gbd_cmd, session=gbd_sesh, close=True)
def activate_new_process_version(process_version_id, conn_def): sesh = get_session(conn_def) q = """ UPDATE gbd_process_version SET gbd_process_version_status_id = 2 WHERE gbd_process_version_id = {process_version_id} """.format(process_version_id=process_version_id) exec_query(q, session=sesh, close=True)
def add_process_version_to_compare_version_output(process_version_id, compare_version_id, conn_def): sesh = get_session(conn_def) add_process_vers_query = """ CALL gbd.add_process_version_to_compare_version_output ({}, {}) """.format(compare_version_id, process_version_id) exec_query(add_process_vers_query, session=sesh, close=True)
def get_parallelization_location_group(location_set_id, nparallel, location_group, gbd_round_id, decomp_step, national_level = NATIONAL_LEVEL ): session = ezfuncs.get_session(conn_def='epi') locs = query.get_locations(location_set_id, gbd_round_id, decomp_step, session)[0] locs = locs.sort_values(by = ['level_{}'.format(national_level)]) parallel_groups = np.array_split(locs.loc[locs.level >= national_level][SPACEVAR].values, nparallel) prediction_location_ids = parallel_groups[location_group].tolist() return prediction_location_ids
def upload_cod_summaries(directories, conn_def): sesh = get_session(conn_def) exec_query("set unique_checks= 0", sesh) inf = Infiles(table='output', schema='cod', session=sesh) for directory in directories: inf.indir(path=directory, with_replace=True, partial_commit=True, commit=True)
def wipe_diagnostics(): # Create a connection sesh = get_session("codcorrect") unmark_best = ('UPDATE codcorrect.diagnostic_version SET is_best = 0 ' 'WHERE is_best = 2;') exec_query(unmark_best, session=sesh, close=True) unmark_best = ('UPDATE codcorrect.diagnostic_version SET is_best = 2 ' 'WHERE is_best = 1;') exec_query(unmark_best, session=sesh, close=True)
def create_output_partition(codcorrect_version_id) -> None: exec_query(CoDCorrectVersion.CREATE_OUTPUT_PARTITION, session=get_session(ConnectionDefinitions.COD), close=True, parameters={ 'schema': COD.DataBase.SCHEMA, 'table': COD.DataBase.TABLE, Columns.OUTPUT_VERSION_ID: codcorrect_version_id })
def add_kit_vers(self): sesh = get_session(self.conn_def) query = ('INSERT INTO gbd.kit_version ' '(kit_id, kit_version_note, ' 'gbd_round_id, kit_version_status_id) ' 'VALUES ' '(2, "MMR run for codcorrect version {}", {}, -1)' .format(self.codcorrect_vers, self.gbd_round_id)) exec_query(query, session=sesh, close=True)
def activate_new_compare_version(compare_version_id, conn_def): sesh = get_session(conn_def) q = """ UPDATE compare_version SET compare_version_status_id = 2 WHERE compare_version_id = {} AND compare_version_status_id = -1 """.format(compare_version_id) exec_query(q, session=sesh, close=True)
def update_status(output_version_id, status, conn_def): # Update status sesh = get_session(conn_def) update_cmd = """UPDATE cod.output_version SET status = {s} WHERE output_version_id = {v}""".format(s=status, v=output_version_id) exec_query(update_cmd, session=sesh, close=True)
def session_scope(conn_def: str) -> Generator[orm.Session, None, None]: """Provides a transactional scope around a series of operations.""" session = ezfuncs.get_session(conn_def) try: yield session session.commit() except Exception: session.rollback() raise finally: session.close()
def new_diagnostic_version(output_version_id): # Create values to insert ins_df = pd.DataFrame([{ 'output_version_id': output_version_id, 'is_best': 1 }]) # Insert row ins = Inserts(table='diagnostic_version', schema='codcorrect', insert_df=ins_df) sesh = get_session('codcorrect') ins.insert(sesh, commit=True)
def unmark_cod_best(output_version_id, cod_conn_def): now = datetime.datetime.now() cod_sesh = get_session(cod_conn_def) update_cod_cmd = """UPDATE cod.output_version SET is_best = 0, best_end = '{now}' WHERE output_version_id != {v} AND best_start IS NOT NULL AND best_end IS NULL""".format(now=now, v=output_version_id) exec_query(update_cod_cmd, session=cod_sesh, close=True)
def mark_cod_best(output_version_id, cod_conn_def): now = datetime.datetime.now() cod_sesh = get_session(cod_conn_def) update_cod_cmd = """UPDATE cod.output_version SET is_best = 1, best_start = '{now}' WHERE output_version_id = {v}""".format(now=now, v=output_version_id) exec_query(update_cod_cmd, session=cod_sesh, close=True)
def upload_emr(self): session = ezfuncs.get_session(envs.Environment.get_odbc_key()) path = ("FILEPATH").format( cascade_root=envs.Environment.get_cascade_root(), mv=self.model_version_id) infiler = loaders.ToInfile(path) infiler.stage_2_stagedir(self.emr) infiler.upload_stagedir(table="t3_model_version_emr", schema="epi", session=session, commit=True)
def add_metadata_to_process_version(self, process_version_id): sesh = get_session(self.conn_def) query = ('INSERT INTO gbd.gbd_process_version_metadata (' 'gbd_process_version_id, ' 'metadata_type_id, ' 'val) ' 'VALUES ' '({pv_id}, ' '1, ' '"{ov_id}")').format(pv_id=process_version_id, ov_id=self.codcorrect_vers) exec_query(query, session=sesh, close=True)
def upload_emr(self): session = ezfuncs.get_session(envs.Environment.get_odbc_key()) path = ( "{cascade_root}/{mv}/full/locations/1/outputs/both/2000").format( cascade_root=envs.Environment.get_cascade_root(), mv=self.model_version_id) infiler = loaders.ToInfile(path) infiler.stage_2_stagedir(self.emr) infiler.upload_stagedir(table="t3_model_version_emr", schema="epi", session=session, commit=True)
def upload(self, sm, process_vers, in_dir, with_replace=True): '''Args: sm: 'single' or 'multi', referring to which table to use process_vers: comes from the output of above in_dir: filepath of csvs to be uploaded conn_def: 'gbd' or 'gbd_test' ''' table = 'output_mmr_%s_year_v%s' % (sm, process_vers) sesh = get_session(self.conn_def) inf = Infiles(table=table, schema='gbd', session=sesh) inf.indir(path=in_dir, with_replace=with_replace, partial_commit=True, commit=True) activate_process_version(process_version_id=process_vers, session=sesh) print("Uploaded! You win at life.")
def find_parallelization_group_for_loc(loc, gbd_round_id, decomp_step, location_set_id = 22, nparallel = 50, national_level = NATIONAL_LEVEL): session = ezfuncs.get_session(conn_def='epi') locs = query.get_locations(location_set_id, gbd_round_id, decomp_step, session)[0] locs = locs.sort_values(by = ['level_{}'.format(national_level)]) parallel_groups = np.array_split(locs.loc[locs.level >= national_level][SPACEVAR].values, nparallel) for i in range(0, len(parallel_groups)): if loc in list(parallel_groups[i]): location_group = print(i) else: 'You sure that is a location_id? No values here.' return location_group
def load_component_location(self, component, location_id, year_type): table_tmp = getattr(self, year_type)[component] table = table_tmp.format(self.como_version.gbd_process_version_id) sesh = get_session("gbd") infiler = Infiles(table, "gbd", sesh) if year_type == "single_year": indir_glob = f"{self.como_version.como_dir}/summaries/{component}/*/{year_type}/{location_id}/*.csv" else: indir_glob = f"{self.como_version.como_dir}/summaries/{component}/*/{year_type}/{location_id}.csv" infiler.indir(path=indir_glob, commit=True, partial_commit=True, rename_cols={"mean": "val"}, no_raise=(IntegrityError))