def upload_to_db(table, db): """Upload a csv to the cod database.""" #determine schema (uploading to cod or cancer) upload_path = _get_upload_path(table) sesh = get_session(db) inf = Infiles(table=table, schema='cod', session=sesh) inf.infile(path=upload_path, with_replace=False, commit=True)
def upload_to_table(file_list, table_name, env, raise_on_failure): if env == DBEnv.PROD: conn_def = _ConnDefs.PROD.value elif env == DBEnv.DEV: conn_def = _ConnDefs.DEV.value sesh = get_session(conn_def=conn_def, connectable=True) infiler = Infiles(table=table_name, schema="gbd", session=sesh) logger.info("Beginning infile, time = {}".format(time.time())) for f in file_list: try: logger.info("infiling {}, time = {}".format(f, time.time())) # summaries are saved with col mean, need to upload to table as # val infiler.infile(f, with_replace=False, commit=True, rename_cols={'mean': 'val'}) except Exception: if raise_on_failure: raise tb = traceback.format_exc() logger.error("failed to infile {} with exception {};" "Skipping to next file".format(f, tb)) logger.info("Done infile, time = {}".format(time.time()))
def load_data(stagedir, conn, table_name): sesh = get_session(conn_def=conn) infiler = Infiles(table=table_name, schema='gbd', session=sesh) csv = '%s/%s.csv' % (stagedir, table_name) print('beginning infile') start_time = str(datetime.datetime.now().time()) infiler.infile(path=csv, commit=True) print('done with infiles at {} (started {})'.format( str(datetime.datetime.now().time()), start_time)) return None
def upload(sev_version_id, process_version_id, table_type): file_pattern = "FILEPATH/sev/{v}/summaries/{tt}_*.csv".format(v=sev_version_id, tt=table_type) data = [] for f in glob.glob(file_pattern): data.append(pd.read_csv(f)) data = pd.concat(data) data.fillna(0, inplace=True) id_cols = [c for c in data if c.endswith('_id')] data[id_cols] = data[id_cols].astype(int) table_name = "output_sev_{}_v{}".format(table_type, process_version_id.gbd_process_version_id) data_dir = "FILEPATH/sev/{v}/summaries/{tt}".format(v=sev_version_id, tt=table_type) os.mkdir(data_dir) if table_type == 'single_year': data = data[['measure_id', 'year_id', 'location_id', 'sex_id', 'age_group_id', 'rei_id', 'metric_id', 'val', 'upper', 'lower']].sort_values( ['measure_id', 'year_id', 'location_id', 'sex_id', 'age_group_id', 'rei_id', 'metric_id']) year_ids = data['year_id'].drop_duplicates().tolist() for yr in year_ids: file = os.path.join(data_dir, 'single_year_{}.csv'.format(int(yr))) data.loc[data['year_id'] == yr].to_csv(file, index=False) os.chmod(file, 0o775) else: data = data[['measure_id', 'year_start_id', 'year_end_id', 'location_id', 'sex_id', 'age_group_id', 'rei_id', 'metric_id', 'val', 'upper', 'lower']].sort_values( ['measure_id', 'year_start_id', 'year_end_id', 'location_id', 'sex_id', 'age_group_id', 'rei_id', 'metric_id']) year_ids = data[['year_start_id','year_end_id']].drop_duplicates().to_dict('split')['data'] for yr in year_ids: file = os.path.join(data_dir, 'multi_year_{}_{}.csv'.format(int(yr[0]),int(yr[1]))) data.loc[(data['year_start_id'] == yr[0]) & (data['year_end_id'] == yr[1])].to_csv(file, index=False) os.chmod(file, 0o775) sesh = get_session(conn_def='gbd', connectable=True) infiler = Infiles(table=table_name, schema='gbd', session=sesh) file_list = sorted(os.listdir(data_dir)) for f in file_list: print(f) infiler.infile(os.path.join(data_dir, f), with_replace=False, commit=True)
def load_data(stagedir, conn, table_name): ############################################################## ## Loads data into specified db using db_tools Infiles ## object ############################################################## sesh = get_session(conn_def=conn) infiler = Infiles(table=table_name, schema='gbd', session=sesh) csv = '%s/%s.csv' % (stagedir, table_name) print('beginning infile') start_time = str(datetime.datetime.now().time()) infiler.infile(path=csv, commit=True, with_replace=True) print('done with infiles at {} (started {})'.format( str(datetime.datetime.now().time()), start_time)) return None
def load_location(self, file_list): self.engine.engine.dispose() session = self.engine.create_session(scoped=True).session infiler = Infiles(self.table_name, self._schema, session) for file in file_list: try: start_time = time.time() infiler.infile(file, with_replace=False, commit=True, rename_cols={'mean': 'val'}) end_time = time.time() elapsed = end_time - start_time logger.info("Infiling file {} at time {}".format( file, elapsed)) except exc.IntegrityError: logger.info("Recieved IntegrityError for file {}, " "skipping infile at time {}".format( file, time.time())) except Exception as e: logger.info(e) raise session.close()
def upload_diagnostics(parent_dir): sesh = get_session('codcorrect') inf = Infiles(table='diagnostic', schema='codcorrect', session=sesh) inf.infile(path='{pd}/_temp/upload_diagnostics.csv'.format(pd=parent_dir), with_replace=True, commit=True)
def upload_diagnostics(parent_dir): sesh = get_session('codcorrect') inf = Infiles(table='diagnostic', schema='codcorrect', session=sesh) inf.infile(path='FILEPATH.csv', with_replace=True, commit=True)