Beispiel #1
0
def upload_to_db(table, db):
    """Upload a csv to the cod database."""
    #determine schema (uploading to cod or cancer)
    upload_path = _get_upload_path(table)
    sesh = get_session(db)
    inf = Infiles(table=table, schema='cod', session=sesh)
    inf.infile(path=upload_path, with_replace=False, commit=True)
Beispiel #2
0
    def upload(self):
        """
        Attempts to infile any .csv files found in the
        /{output_dir}/{version_id} directory to
        gbd.output_le_decomp_v{self.process_version_id}. Any
        .csv files not to be uploaded should be stashed in subfolders.
        """
        if not self._process_version_id:
            raise RuntimeError("A process version must be created "
                               "before results can be uploaded.")
        table_name = list(
            GBDProcessVersion(self._process_version_id,
                              env=self.env).tables)[0]
        session = get_session(conn_def=self.conn_def)

        infiler = Infiles(table=table_name,
                          schema=self._schema,
                          session=session)

        print(f"Starting upload at {datetime.now().time()}")
        infiler.indir(path=self.write_dir,
                      partial_commit=True,
                      sort_files=True)
        session.close()
        print(f"Finished upload at {datetime.now().time()}")
def upload_to_table(file_list, table_name, env, raise_on_failure):
    if env == DBEnv.PROD:
        conn_def = _ConnDefs.PROD.value
    elif env == DBEnv.DEV:
        conn_def = _ConnDefs.DEV.value
    sesh = get_session(conn_def=conn_def, connectable=True)
    infiler = Infiles(table=table_name, schema="gbd", session=sesh)

    logger.info("Beginning infile, time = {}".format(time.time()))
    for f in file_list:
        try:
            logger.info("infiling {}, time = {}".format(f, time.time()))
            # summaries are saved with col mean, need to upload to table as
            # val
            infiler.infile(f,
                           with_replace=False,
                           commit=True,
                           rename_cols={'mean': 'val'})
        except Exception:
            if raise_on_failure:
                raise
            tb = traceback.format_exc()
            logger.error("failed to infile {} with exception {};"
                         "Skipping to next file".format(f, tb))
    logger.info("Done infile, time = {}".format(time.time()))
Beispiel #4
0
def upload_cod_summaries(directories, conn_def):
    sesh = get_session(conn_def)
    exec_query("set unique_checks= 0", sesh)
    inf = Infiles(table='output', schema='cod', session=sesh)
    for directory in directories:
        inf.indir(path=directory,
                  with_replace=True,
                  partial_commit=True,
                  commit=True)
 def upload(self, sm, process_vers, in_dir, with_replace=True):
     '''Args: sm: 'single' or 'multi', referring to which table to use
              process_vers: comes from the output of above
              in_dir: filepath of csvs to be uploaded
              conn_def: 'gbd' or 'gbd_test'
     '''
     table = 'output_mmr_%s_year_v%s' % (sm, process_vers)
     sesh = get_session(self.conn_def)
     inf = Infiles(table=table, schema='gbd', session=sesh)
     inf.indir(path=in_dir, with_replace=with_replace,
               partial_commit=True, commit=True)
     activate_process_version(process_version_id=process_vers, session=sesh)
     print("Uploaded! You win at life.")
Beispiel #6
0
 def load_component_location(self, component, location_id, year_type):
     table_tmp = getattr(self, year_type)[component]
     table = table_tmp.format(self.como_version.gbd_process_version_id)
     sesh = get_session("gbd")
     infiler = Infiles(table, "gbd", sesh)
     if year_type == "single_year":
         indir_glob = f"{self.como_version.como_dir}/summaries/{component}/*/{year_type}/{location_id}/*.csv"
     else:
         indir_glob = f"{self.como_version.como_dir}/summaries/{component}/*/{year_type}/{location_id}.csv"
     infiler.indir(path=indir_glob,
                   commit=True,
                   partial_commit=True,
                   rename_cols={"mean": "val"},
                   no_raise=(IntegrityError))
Beispiel #7
0
def upload_gbd_summaries(process_version, conn_def, directories):
    if any('multi' in d for d in directories):
        table = 'output_cod_multi_year_v{pv}'.format(pv=process_version)
    else:
        table = 'output_cod_single_year_v{pv}'.format(pv=process_version)
    sesh = get_session(conn_def)
    inf = Infiles(table=table, schema='gbd', session=sesh)
    for directory in directories:
        print("Uploading from {}".format(directory))
        inf.indir(path=directory,
                  with_replace=True,
                  partial_commit=True,
                  commit=True)
    return "Uploaded"
def load_data(stagedir, conn, table_name):
    sesh = get_session(conn_def=conn)

    infiler = Infiles(table=table_name,
                      schema='gbd',
                      session=sesh)

    csv = '%s/%s.csv' % (stagedir, table_name)
    print('beginning infile')
    start_time = str(datetime.datetime.now().time())

    infiler.infile(path=csv, commit=True)

    print('done with infiles at {} (started {})'.format(
            str(datetime.datetime.now().time()), start_time))
    return None
Beispiel #9
0
def upload(sev_version_id, process_version_id, table_type):

    file_pattern = "FILEPATH/sev/{v}/summaries/{tt}_*.csv".format(v=sev_version_id, tt=table_type)
    data = []
    for f in glob.glob(file_pattern):
        data.append(pd.read_csv(f))
    data = pd.concat(data)
    data.fillna(0, inplace=True)
    id_cols = [c for c in data if c.endswith('_id')]
    data[id_cols] = data[id_cols].astype(int)
    table_name = "output_sev_{}_v{}".format(table_type, process_version_id.gbd_process_version_id)
    data_dir = "FILEPATH/sev/{v}/summaries/{tt}".format(v=sev_version_id, tt=table_type)
    os.mkdir(data_dir)

    if table_type == 'single_year':
        data = data[['measure_id', 'year_id', 'location_id', 'sex_id',
               'age_group_id', 'rei_id', 'metric_id', 'val',
               'upper', 'lower']].sort_values(
                   ['measure_id', 'year_id', 'location_id',
                    'sex_id', 'age_group_id', 'rei_id', 'metric_id'])
        year_ids = data['year_id'].drop_duplicates().tolist()
        for yr in year_ids:
            file = os.path.join(data_dir, 'single_year_{}.csv'.format(int(yr)))
            data.loc[data['year_id'] == yr].to_csv(file, index=False)
            os.chmod(file, 0o775)
    else:
        data = data[['measure_id', 'year_start_id',
                 'year_end_id', 'location_id', 'sex_id',
                 'age_group_id', 'rei_id', 'metric_id', 'val',
                 'upper', 'lower']].sort_values(
                     ['measure_id', 'year_start_id',
                      'year_end_id', 'location_id',
                      'sex_id', 'age_group_id', 'rei_id', 'metric_id'])
        year_ids = data[['year_start_id','year_end_id']].drop_duplicates().to_dict('split')['data']
        for yr in year_ids:
            file = os.path.join(data_dir, 'multi_year_{}_{}.csv'.format(int(yr[0]),int(yr[1])))
            data.loc[(data['year_start_id'] == yr[0]) & (data['year_end_id'] == yr[1])].to_csv(file, index=False)
            os.chmod(file, 0o775)

    sesh = get_session(conn_def='gbd', connectable=True)
    infiler = Infiles(table=table_name, schema='gbd', session=sesh)
    file_list = sorted(os.listdir(data_dir))
    for f in file_list:
        print(f)
        infiler.infile(os.path.join(data_dir, f), with_replace=False, commit=True)
Beispiel #10
0
def load_data(stagedir, conn, table_name):
    ##############################################################
    ## Loads data into specified db using db_tools Infiles
    ## object
    ##############################################################
    sesh = get_session(conn_def=conn)

    infiler = Infiles(table=table_name, schema='gbd', session=sesh)

    csv = '%s/%s.csv' % (stagedir, table_name)
    print('beginning infile')
    start_time = str(datetime.datetime.now().time())

    infiler.infile(path=csv, commit=True, with_replace=True)

    print('done with infiles at {} (started {})'.format(
        str(datetime.datetime.now().time()), start_time))
    return None
 def load_location(self, file_list):
     self.engine.engine.dispose()
     session = self.engine.create_session(scoped=True).session
     infiler = Infiles(self.table_name, self._schema, session)
     for file in file_list:
         try:
             start_time = time.time()
             infiler.infile(file,
                            with_replace=False,
                            commit=True,
                            rename_cols={'mean': 'val'})
             end_time = time.time()
             elapsed = end_time - start_time
             logger.info("Infiling file {} at time {}".format(
                 file, elapsed))
         except exc.IntegrityError:
             logger.info("Recieved IntegrityError for file {}, "
                         "skipping infile at time {}".format(
                             file, time.time()))
         except Exception as e:
             logger.info(e)
             raise
     session.close()
Beispiel #12
0
def upload_diagnostics(parent_dir):
    sesh = get_session('codcorrect')
    inf = Infiles(table='diagnostic', schema='codcorrect', session=sesh)
    inf.infile(path='{pd}/_temp/upload_diagnostics.csv'.format(pd=parent_dir),
               with_replace=True,
               commit=True)
Beispiel #13
0
def upload_diagnostics(parent_dir):
    sesh = get_session('codcorrect')
    inf = Infiles(table='diagnostic', schema='codcorrect', session=sesh)
    inf.infile(path='FILEPATH.csv', with_replace=True, commit=True)