Example #1
0
def upload_to_db(table, db):
    """Upload a csv to the cod database."""
    #determine schema (uploading to cod or cancer)
    upload_path = _get_upload_path(table)
    sesh = get_session(db)
    inf = Infiles(table=table, schema='cod', session=sesh)
    inf.infile(path=upload_path, with_replace=False, commit=True)
def upload_to_table(file_list, table_name, env, raise_on_failure):
    if env == DBEnv.PROD:
        conn_def = _ConnDefs.PROD.value
    elif env == DBEnv.DEV:
        conn_def = _ConnDefs.DEV.value
    sesh = get_session(conn_def=conn_def, connectable=True)
    infiler = Infiles(table=table_name, schema="gbd", session=sesh)

    logger.info("Beginning infile, time = {}".format(time.time()))
    for f in file_list:
        try:
            logger.info("infiling {}, time = {}".format(f, time.time()))
            # summaries are saved with col mean, need to upload to table as
            # val
            infiler.infile(f,
                           with_replace=False,
                           commit=True,
                           rename_cols={'mean': 'val'})
        except Exception:
            if raise_on_failure:
                raise
            tb = traceback.format_exc()
            logger.error("failed to infile {} with exception {};"
                         "Skipping to next file".format(f, tb))
    logger.info("Done infile, time = {}".format(time.time()))
Example #3
0
def load_data(stagedir, conn, table_name):
    sesh = get_session(conn_def=conn)

    infiler = Infiles(table=table_name,
                      schema='gbd',
                      session=sesh)

    csv = '%s/%s.csv' % (stagedir, table_name)
    print('beginning infile')
    start_time = str(datetime.datetime.now().time())

    infiler.infile(path=csv, commit=True)

    print('done with infiles at {} (started {})'.format(
            str(datetime.datetime.now().time()), start_time))
    return None
Example #4
0
def upload(sev_version_id, process_version_id, table_type):

    file_pattern = "FILEPATH/sev/{v}/summaries/{tt}_*.csv".format(v=sev_version_id, tt=table_type)
    data = []
    for f in glob.glob(file_pattern):
        data.append(pd.read_csv(f))
    data = pd.concat(data)
    data.fillna(0, inplace=True)
    id_cols = [c for c in data if c.endswith('_id')]
    data[id_cols] = data[id_cols].astype(int)
    table_name = "output_sev_{}_v{}".format(table_type, process_version_id.gbd_process_version_id)
    data_dir = "FILEPATH/sev/{v}/summaries/{tt}".format(v=sev_version_id, tt=table_type)
    os.mkdir(data_dir)

    if table_type == 'single_year':
        data = data[['measure_id', 'year_id', 'location_id', 'sex_id',
               'age_group_id', 'rei_id', 'metric_id', 'val',
               'upper', 'lower']].sort_values(
                   ['measure_id', 'year_id', 'location_id',
                    'sex_id', 'age_group_id', 'rei_id', 'metric_id'])
        year_ids = data['year_id'].drop_duplicates().tolist()
        for yr in year_ids:
            file = os.path.join(data_dir, 'single_year_{}.csv'.format(int(yr)))
            data.loc[data['year_id'] == yr].to_csv(file, index=False)
            os.chmod(file, 0o775)
    else:
        data = data[['measure_id', 'year_start_id',
                 'year_end_id', 'location_id', 'sex_id',
                 'age_group_id', 'rei_id', 'metric_id', 'val',
                 'upper', 'lower']].sort_values(
                     ['measure_id', 'year_start_id',
                      'year_end_id', 'location_id',
                      'sex_id', 'age_group_id', 'rei_id', 'metric_id'])
        year_ids = data[['year_start_id','year_end_id']].drop_duplicates().to_dict('split')['data']
        for yr in year_ids:
            file = os.path.join(data_dir, 'multi_year_{}_{}.csv'.format(int(yr[0]),int(yr[1])))
            data.loc[(data['year_start_id'] == yr[0]) & (data['year_end_id'] == yr[1])].to_csv(file, index=False)
            os.chmod(file, 0o775)

    sesh = get_session(conn_def='gbd', connectable=True)
    infiler = Infiles(table=table_name, schema='gbd', session=sesh)
    file_list = sorted(os.listdir(data_dir))
    for f in file_list:
        print(f)
        infiler.infile(os.path.join(data_dir, f), with_replace=False, commit=True)
Example #5
0
def load_data(stagedir, conn, table_name):
    ##############################################################
    ## Loads data into specified db using db_tools Infiles
    ## object
    ##############################################################
    sesh = get_session(conn_def=conn)

    infiler = Infiles(table=table_name, schema='gbd', session=sesh)

    csv = '%s/%s.csv' % (stagedir, table_name)
    print('beginning infile')
    start_time = str(datetime.datetime.now().time())

    infiler.infile(path=csv, commit=True, with_replace=True)

    print('done with infiles at {} (started {})'.format(
        str(datetime.datetime.now().time()), start_time))
    return None
Example #6
0
 def load_location(self, file_list):
     self.engine.engine.dispose()
     session = self.engine.create_session(scoped=True).session
     infiler = Infiles(self.table_name, self._schema, session)
     for file in file_list:
         try:
             start_time = time.time()
             infiler.infile(file,
                            with_replace=False,
                            commit=True,
                            rename_cols={'mean': 'val'})
             end_time = time.time()
             elapsed = end_time - start_time
             logger.info("Infiling file {} at time {}".format(
                 file, elapsed))
         except exc.IntegrityError:
             logger.info("Recieved IntegrityError for file {}, "
                         "skipping infile at time {}".format(
                             file, time.time()))
         except Exception as e:
             logger.info(e)
             raise
     session.close()
Example #7
0
def upload_diagnostics(parent_dir):
    sesh = get_session('codcorrect')
    inf = Infiles(table='diagnostic', schema='codcorrect', session=sesh)
    inf.infile(path='{pd}/_temp/upload_diagnostics.csv'.format(pd=parent_dir),
               with_replace=True,
               commit=True)
Example #8
0
def upload_diagnostics(parent_dir):
    sesh = get_session('codcorrect')
    inf = Infiles(table='diagnostic', schema='codcorrect', session=sesh)
    inf.infile(path='FILEPATH.csv', with_replace=True, commit=True)