コード例 #1
0
def table_create(df: pd.DataFrame, tablename: str, conn: str = None):
    """
    pulls all fields from dataframe and constructs a postgres table schema;
    using that schema, create new table in postgres.
    """

    table_fields = {}

    try:
        for i in df.columns:
            if tablename != 'aero_runs':
                table_fields.update(
                    {f'{i}': f'{type_translate[df.dtypes[i].name]}'})
            else:
                table_fields.update(
                    {f'{i}': f'{aero_translate[df.dtypes[i].name]}'})

        if table_fields:
            comm = sql_command(table_fields, tablename,
                               conn) if conn != 'nri' else sql_command(
                                   table_fields, tablename, 'nritest')
            d = db(f'{conn}')
            con = d.str
            cur = con.cursor()
            # return comm
            cur.execute(comm)
            con.commit()

    except Exception as e:
        print(e)
        d = db(f'{conn}')
        con = d.str
        cur = con.cursor()
コード例 #2
0
def quick_ingest(whichset):

    internal_dict = {}
    selectedset = whichset
    # for current data -- dat files
    count = 1
    if isinstance(whichset['akron'], str):
        for i in selectedset.items():
            if "BellevueTable1" not in i:
                print("processing: ", i)
                fullpath = os.path.join(files_path, i[1])
                projk = projectkey_extractor(fullpath)
                inst = datScraper(fullpath)
                df = inst.getdf()
                df['ProjectKey'] = projk
                df = second_round(df)
                df = df.loc[pd.isnull(df.TIMESTAMP) != True] if any(
                    pd.isnull(df.TIMESTAMP.unique())) else df

                internal_dict.update({f"df{count}": df})
                count += 1

        # return internal_dict
        # for historic data -- cs
    elif isinstance(whichset['akron'], list):
        for i in selectedset.items():
            for j in i[1]:
                # print(j)
                print("processing: ", j)
                fullpath = os.path.join(files_path, j)
                projk = projectkey_extractor(fullpath)
                inst = datScraper(fullpath)
                df = inst.getdf()
                df['ProjectKey'] = name_in_pg[projk]
                df = second_round(df)
                df = df.loc[pd.isnull(df.TIMESTAMP) != True] if any(
                    pd.isnull(df.TIMESTAMP.unique())) else df

                internal_dict.update({f"df{count}": df})
                count += 1
    # for i in internal_dict.items():
    finaldf = pd.concat([i[1] for i in internal_dict.items()])
    table_create(finaldf, "raw_met_data",
                 "met") if tablecheck("raw_met_data", "met") != True else None
    try:
        d = db("met")
        ingesterv2.main_ingest(finaldf, "raw_met_data", d.str, 100000)
    except Exception as e:
        print(e)
        d = db("met")
コード例 #3
0
def rowDeleter(tablename:str,dev=False,**fields:str):

    # where to delete from: dev or public
    print(f"amount of key-value pairs:{len(fields)}")
    if dev==False:
        d = db('dima')
        keyword = "public"
    elif dev==True:
        d = db("dimadev")
        keyword = "dimadev"
    # creating connection/cursor object (for dev or public)
    con = d.str
    cur = con.cursor()
    # how to structure SQL depending on number of key-value pairs
    if len(fields)<2:
        print(f"deleting 1 key-value pair from postgres.{keyword}.{tablename}")
        for key,value in fields.items():
            singleField = f'''where "{key}"=\'{value}\';'''
            printOut = f' "{key}" = \'{value}\''
        # return f'DELETE FROM postgres.public."{tablename}" {singleField}'
        try:
            sqlStart = f"DELETE FROM postgres.{keyword}.\"{tablename}\" {singleField}"
            print(f"Removing {printOut}...")
            print(f"Using SQL verb : {sqlStart}")
            cur.execute(sqlStart)
            print("Done.")
            con.commit()

        except Exception as e:
            print(e)
            con = d.str
            cur = con.cursor()

    elif len(fields)>=2:
        print(f"deleting {len(fields)} key-value pairs from postgres.{keyword}.{tablename}")
        endVerb = keyvalAdder(fields)
        # return f'DELETE FROM postgres.public."{tablename}" {endVerb};'
        try:
            sqlStart = f"DELETE FROM postgres.{keyword}.\"{tablename}\" {endVerb};"
            print(f"Removing rows using various key-value pairs...")
            print(f"Using SQL verb : {sqlStart}")
            cur.execute(sqlStart)
            print("Done.")
            con.commit()

        except Exception as e:
            print(e)
            con = d.str
            cur = con.cursor()
コード例 #4
0
def sql_command_daterange(date_key_tuple):
    date1 = date_key_tuple[0]
    date2 = date_key_tuple[1]
    pk = date_key_tuple[2]
    str = f"""
    SELECT EXISTS(
        SELECT *
        FROM public.raw_met_data
        WHERE (
            "TIMESTAMP" >= '{date1}'::timestamp
            AND
            "TIMESTAMP" < '{date2}'::timestamp
              ) and "ProjectKey"='{pk}'
    );

    """
    try:
        d = db('met')
        con = d.str
        cur = con.cursor()
        cur.execute(str)
        if cur.fetchone()[0]:
            return True
        else:
            return False
    except Exception as e:
        print(e)
        con = d.str
        cur = con.cursor()
コード例 #5
0
def single_pg_send(df, tablename):
    """
    uses: new_tablename, tablecheck, ingesterv2.main_ingest
    table_create, datetime_type_assert
    """
    d = db('dima')
    print("STARTING INGEST")
    df = blank_fixer(df)
    df = significant_digits_fix_pandas(df)
    df = datetime_type_assert(df)
    df = openingsize_fixer(df) if "openingSize" in df.columns else df
    if 'ItemType' in df.columns:
        newtablename = new_tablename(df)
        if tablecheck(newtablename):
            print(f'network table "{newtablename}" exists, ingesting..')
            ingesterv2.main_ingest(df, newtablename, d.str, 10000)
        else:
            table_create(df, newtablename, 'dima')
            print(f'created network table: {newtablename}')
            ingesterv2.main_ingest(df, newtablename, d.str, 10000)

    else:
        print("not a network table")
        newtablename = table
        if tablecheck(table):
            print("FOUND THE TABLE IN PG")
            ingesterv2.main_ingest(df, newtablename, d.str, 10000)

        else:
            print("DID NOT FIND TABLE IN PG, CREATING...")
            table_create(df, table, 'dima')
            ingesterv2.main_ingest(df, newtablename, d.str, 10000)
コード例 #6
0
def has_duplicate_pks(df, tablename):
    d = db("dima")
    try:
        con = d.str
        cur = con.cursor()
        exists_query = f'''
        select "PrimaryKey" from
        postgres.public."{tablename}"
        '''
        cur.execute(exists_query)
        df_pg_keys = list(set([i for i in cur.fetchall()]))

        if 'PrimaryKey' in df.columns:
            for i in df.PrimaryKey:
                if i in df_pg_keys:
                    return True
                else:
                    return False
        else:
            print('table has no primary key. aborting check')

    except Exception as e:
        print(e)
        con = d.str
        cur = con.cursor()
コード例 #7
0
def tall_ingest(csv_path, table):
    d = db('geo')
    model_choice = {
        "dataheader": [dataHeader, 'dataHeader'],
        "datagap": [dataGap, 'dataGap'],
        "datalpi": [dataLPI, 'dataLPI'],
        "dataheight": [dataHeight, 'dataHeight'],
        "datasoilstability": [dataSoilStability, 'dataSoilStability'],
        "dataspeciesinventory": [dataSpeciesInventory, 'dataSpeciesInventory'],
        "geoindicators": [geoindicators, 'geoIndicators'],
        "geospecies": [geoSpecies, 'geoSpecies']
    }
    m = model_handler(csv_path, model_choice[table][0], model_choice[table][1])
    sans_null = m.checked_df.loc[~pd.isnull(m.checked_df.PrimaryKey) ==
                                 True].copy()

    for i in sans_null.columns:
        if i in fields_to_drop[table.lower()]:
            sans_null.drop(columns=[i], inplace=True)

    m.create_empty_table(d.str)
    ing = ingesterv2(d.str)

    ingesterv2.main_ingest(sans_null, model_choice[table][1], d.str, 100000)

    if model_choice[table][1] in [i for i in missing_pks]:
        for i in missing_pks[model_choice[table][1]]:
            ing.drop_rows(d.str, model_choice[table][1], "PrimaryKey", i)

    if "header" not in table:
        ing.reestablish_fk(model_choice[table][1])
コード例 #8
0
def pg2csvExport(tablename=None,dev=False,dbkey=None):
    path = os.getcwd()
    # where to delete from: dev or public
    if dev==False:
        d = db('dima')
        keyword = "public"
        conn = "dima"
    elif dev==True:
        d = db("dimadev")
        keyword = "dimadev"
        conn = "dimadev"
    # creating connection/cursor object (for dev or public)
    engine = engine_conn_string(conn)
    # con = d.str
    # cur = con.cursor()
    nopk = "DBKey"
    if "tblPlots" in tablename:
        nopk = "ProjectKey"
    else:
        nopk = "DBKey"

    if tablecheck(tablename,conn=conn):
        if (dbkey is not None) and (tablename is not None):
            # both table and projectkey
            print(f"fetching pgdata that includes:{tablename}, {dbkey}...")
            sql = f"SELECT * from postgres.{keyword}.\"{tablename}\" where \"{nopk}\" = '{dbkey}'"
            df = pd.read_sql(sql, con=engine)
            csv_fieldcheck(df,path,tablename)
            print("done.")
        elif (dbkey is None) and (tablename is not None):
            # only table
            print(f"fetching pgdata that includes:{dbkey}...")
            sql = f"SELECT * from postgres.{keyword}.\"{tablename}\""
            df = pd.read_sql(sql,con=engine)
            csv_fieldcheck(df,path,tablename)
            print("done.")

        elif (dbkey is not None) and (tablename is None):
            print("csv all tables with x projectkey;not implemented")
        else:
            print("no table, no projectkey; not implemented")
    else:
        print(f"{tablename} does not exist in {conn} database")
コード例 #9
0
def tablecheck(tablename, conn="dima"):
    """
    receives a tablename and returns true if table exists in postgres table
    schema, else returns false

    """
    tableschema = "dimadev" if conn=="dimadev" else "public"
    try:
        d = db(f'{conn}')
        con = d.str
        cur = con.cursor()
        cur.execute("select exists(select * from information_schema.tables where table_name=%s and table_schema=%s)", (f'{tablename}',f'{tableschema}',))
        if cur.fetchone()[0]:
            return True
        else:
            return False

    except Exception as e:
        print(e)
        d = db(f'{conn}')
        con = d.str
        cur = con.cursor()
コード例 #10
0
def row_check(df):
    d = db("met")
    con = d.str

    for i in range(len(df)):

        try:
            if pg_check(df.iloc[i:i + 1].TIMESTAMP.values[0],
                        df.iloc[i:i + 1].ProjectKey.values[0]):
                print(
                    f'timestamp:"{df.iloc[i:i+1].TIMESTAMP.values[0]}" and projectkey: "{df.iloc[i:i+1].ProjectKey.values[0]}" already in database, moving on..'
                )
            else:
                print(
                    f'ingesting timestamp:"{df.iloc[i:i+1].TIMESTAMP.values[0]}" and projectkey: "{df.iloc[i:i+1].ProjectKey.values[0]}" '
                )
                ingesterv2.main_ingest(df.iloc[i:i + 1], "raw_met_data", d.str,
                                       10000)

        except Exception as e:
            print(e)
            d = db("met")
            con = d.str
コード例 #11
0
def add_modelrunkey_to_pg():
    d = db("aero")
    add_query = '''
        ALTER TABLE IF EXISTS "ModelRuns"
        ADD COLUMN "ModelRunKey" TEXT;
        '''
    try:
        con = d.str
        cur = con.cursor()
        cur.execute(add_query)
        con.commit()

    except Exception as e:
        print(e, 'error adding column to modelruns table')
        con = d.str
        cur = con.cursor()
コード例 #12
0
def pull_max_date(projectkey):
    qry = f"""
    SELECT MAX("TIMESTAMP")
    FROM public.raw_met_data
    WHERE "ProjectKey"='{projectkey}';
    """
    try:
        d = db("met")
        con = d.str
        cur = con.cursor()
        cur.execute(qry)
        return cur.fetchone()[0]
    except Exception as e:
        print(e)
        con = d.str
        cur = con.cursor()
コード例 #13
0
def drop_dbkey(table, path):
    squished_path = os.path.split(os.path.splitext(path)[0])[1].replace(" ","")
    d = db('dima')
    try:
        # print(f'"DELETE FROM postgres.public.{table} WHERE \"DBKey\"=\'{squished_path}\';"')
        con = d.str
        cur = con.cursor()
        cur.execute(
            sql.SQL("DELETE FROM postgres.public.{0} WHERE \"DBKey\"= '%s';" % squished_path).format(
                sql.Identifier(table))
        )
        con.commit()
        print(f'successfully dropped \'{squished_path}\' from table \'{table}\'')
    except Exception as e:
        con = d.str
        cur = con.cursor()
        print(e)
コード例 #14
0
def schemaCreate(path, tablename):
    """
    ingesting tables with less priority: schemaTable
    """
    d = db("geo")
    df = pd.read_excel(path)
    if tablecheck(tablename, conn="geo"):
        print("table exists; ingesting...")
        ingesterv2.main_ingest(df,tablename,d.str)
        print("done")

    else:
        print("table does not exists; creating table..")
        table_create(df,tablename,conn="geo")
        print("ingesting...")
        ingesterv2.main_ingest(df,tablename, d.str)
        print("done")
コード例 #15
0
def batch_looper(dimacontainer, pg=False):
    """
    addition
    creates an exhaustive list of tables across all dimas in a folder
    and then uses looper to gothrough the list of tables and create csv's for
    each.
    """
    d = db('dima')
    tablelist = None
    while tablelist is None:
        print('gathering tables within dimas..')
        tablelist = table_collector(dimacontainer)
    else:
        print('creating csvs for each table..')
        for table in tablelist:
            if pg != True:
                looper(dimacontainer, table, csv=True)
            else:
                df = looper(dimacontainer, table, csv=False)
                if 'ItemType' in df.columns:
                    # if one of the non-vegetation bsne tables, use 'new_tablename' ,
                    # function to produce a new tablename: 'tblHorizontalFlux' or
                    # 'tblDustDeposition'
                    newtablename = new_tablename(df)
                    if tablecheck(newtablename):
                        print('MWACK')
                        ingesterv2.main_ingest(df, newtablename, d.str, 10000)
                    else:
                        table_create(df, newtablename, 'dima')
                        print('llegue a 2')
                        ingesterv2.main_ingest(df, newtablename, d.str, 10000)

                else:
                    print("NOT A HORFLUX TABLE")
                    newtablename = table
                    if tablecheck(table):
                        print("FOUND THE TABLE IN PG")
                        ingesterv2.main_ingest(df, newtablename, d.str, 10000)

                    else:
                        print("DID NOT FIND TABLE IN PG, CREATING...")
                        table_create(df, table, 'dima')
                        ingesterv2.main_ingest(df, newtablename, d.str, 10000)
コード例 #16
0
def modelrun_key_check(modelrunkey):
    d = db("aero")
    if tablecheck("ModelRuns", "aero"):
        try:
            con = d.str
            cur = con.cursor()
            exists_query = '''
            select exists (
                select 1
                from "ModelRuns"
                where "ModelRunKey" = %s
            )'''
            cur.execute(exists_query, (modelrunkey, ))
            return cur.fetchone()[0]

        except Exception as e:
            print(e, "error selecting modelruns table.")
            con = d.str
            cur = con.cursor()
    else:
        print("ModelRun table does not exist.")
コード例 #17
0
def pg_check(timestamp, projectkey):
    qry = f"""SELECT EXISTS(
                SELECT *
                FROM public.raw_met_data
                WHERE
                    "TIMESTAMP"='{timestamp}'
                and
                    "ProjectKey"='{projectkey}');;;
        """
    try:
        d = db('met')
        con = d.str
        cur = con.cursor()
        cur.execute(qry)
        if cur.fetchone()[0]:
            return True
        else:
            return False
    except Exception as e:
        print(e)
        con = d.str
        cur = con.cursor()
コード例 #18
0
def model_run_updater(batchpath, modelrunkey, source=None):
    """
    1. creates a table in postgres with supplied dataframe
    2. appends data to postgres table
    """
    d = db("aero")
    df = txt_read(batchpath)
    if source != None:
        df['Source'] = source
    else:
        pass
    df['ModelRunKey'] = modelrunkey

    if tablecheck('aero_runs'):
        print('aero_runs exists, skipping table creation')
        update_model(batchpath, modelrunkey)

        ingesterv2.main_ingest(df, "aero_runs", d.str, 100000)
    else:
        print('creating aero_runs table..')
        table_create(df, "aero_runs", "aero")
        update_model(batchpath, modelrunkey)
        ingesterv2.main_ingest(df, "aero_runs", d.str, 100000)
コード例 #19
0
def colcheck(tablename, conn):
    """
    incomplete implementation: check ingested fields vs schema on pg
    """
    sql=f""" SELECT *
          FROM information_schema.columns
         WHERE table_schema = 'public'
           AND table_name   = '{tablename}'
             ;
        """
    d = db(f'{conn}')
    if tablecheck(tablename,conn):
        con = d.str
        cur = con.cursor()
        try:
            cur.execute(sql)
            return [i[3] for i in cur.fetchall()]

        except Exception as e:
            print(e)
            con = d.str
            cur = con.cursor()
    else:
        print("table does not exist")
コード例 #20
0
def batch_looper(dimacontainer, projkey=None, dev=False, pg=False):
    """
    addition
    creates an exhaustive list of tables across all dimas in a folder
    and then uses looper to gothrough the list of tables and create csv's for
    each.
    """
    if dev == False:
        d = db('dima')
        keyword = "dima"
    else:
        d = db("dimadev")
        keyword = "dimadev"

    tablelist = None
    while tablelist is None:
        print('gathering tables within dimas..')
        tablelist = table_collector(dimacontainer)
        print(tablelist, "tablelist check # 1")
    else:
        print('creating csvs for each table..')
        for table in tablelist:
            if pg != True:
                looper(dimacontainer, table, csv=True)

            else:
                df = looper(dimacontainer, table,
                            csv=False) if 'tblPlots' not in table else looper(
                                dimacontainer, table, projkey, csv=False)
                print(df.shape, "looper dataframe check # 2")
                if 'ItemType' in df.columns:
                    # if one of the non-vegetation bsne tables, use 'new_tablename' ,
                    # function to produce a new tablename: 'tblHorizontalFlux' or
                    # 'tblDustDeposition'
                    newtablename = new_tablename(df)
                    if tablecheck(newtablename, keyword):
                        print('MWACK')
                        df = dateloadedcheck(df)
                        df = ovenTemp_INT(df)
                        ingesterv2.main_ingest(df, newtablename, d.str, 10000)
                    else:
                        table_create(df, newtablename, keyword)
                        print('DDT ')
                        df = dateloadedcheck(df)
                        df = ovenTemp_INT(df)
                        ingesterv2.main_ingest(df, newtablename, d.str, 10000)

                else:
                    print("NOT A HORFLUX TABLE")
                    newtablename = table
                    if tablecheck(table, keyword):
                        print(f"FOUND THE {newtablename} IN PG")

                        df = dateloadedcheck(df)
                        ingesterv2.main_ingest(df, newtablename, d.str, 10000)

                    else:
                        print(
                            f"DID NOT FIND {newtablename} IN PG, CREATING...")

                        table_create(df, table, keyword)
                        df = dateloadedcheck(df)
                        ingesterv2.main_ingest(df, newtablename, d.str, 10000)
コード例 #21
0
def met_batcher(path, whichdata=None):
    type = {"historic": ".csv", "current": ".dat"}
    df_dict = {}
    folderlist = os.listdir(path)
    count = 1
    d = db("met")
    start = time.time()
    for i in folderlist:
        # print(i)
        for j in os.listdir(os.path.join(path, i)):
            if "historic" in whichdata:
                if os.path.splitext(os.path.join(path, i,
                                                 j))[1] == type[whichdata]:
                    print(j)
                    startlocal = time.time()
                    local_path = os.path.join(path, i, j)
                    proj_key = i
                    ins = datReader(local_path)
                    tempdf = ins.getdf()

                    tempdf = second_round(tempdf)
                    tempdf = col_name_fix(tempdf)
                    tempdf = new_instrumentation(tempdf)
                    tempdf = remove_emptytimestamps(tempdf)
                    tempdf = type_fix(tempdf)

                    tempdf['ProjectKey'] = proj_key
                    # dat_updater(tempdf)
                    # tempdf = tempdf.loc[pd.isnull(tempdf.TIMESTAMP)!=True] if any(pd.isnull(tempdf.TIMESTAMP.unique())) else tempdf

                    df_dict.update({f'df{count}': tempdf})
                    now = time.time()
                    elapsed = round(now - startlocal, 2)
                    print(f'time elapsed for {j} dataset: {elapsed}s')
                    count += 1
            if "current" in whichdata:
                if os.path.splitext(os.path.join(
                        path, i, j))[1] == type[whichdata] and ('Bellevue'
                                                                not in path):
                    print(j)
                    startlocal = time.time()
                    local_path = os.path.join(path, i, j)
                    proj_key = i
                    ins = datReader(local_path)
                    tempdf = ins.getdf()

                    tempdf = second_round(tempdf)
                    tempdf = col_name_fix(tempdf)
                    tempdf = new_instrumentation(tempdf)
                    tempdf = remove_emptytimestamps(tempdf)
                    tempdf['ProjectKey'] = proj_key

                    # dat_updater(tempdf)
                    tempdf = tempdf.loc[
                        pd.isnull(tempdf.TIMESTAMP) != True] if any(
                            pd.isnull(tempdf.TIMESTAMP.unique())) else tempdf

                    df_dict.update({f'df{count}': tempdf})
                    now = time.time()
                    elapsed = round(now - startlocal, 2)
                    print(f'time elapsed for {j} dataset: {elapsed}s')
                    count += 1

    # return df_dict
    prefix = pd.concat([i[1] for i in df_dict.items()])
    prefix = type_fix(prefix)
    prefix.TIMESTAMP = prefix.TIMESTAMP.astype("datetime64")
    # finaldf = type_fix(prefix)
    finalnow = time.time()
    finalelapsed = finalnow - start
    print(f'final elapsed time: {finalelapsed}')
    return prefix
コード例 #22
0
def engine_conn_string(string):
    d = db(string)
    return f'postgresql://{d.params["user"]}:{d.params["password"]}@{d.params["host"]}:{d.params["port"]}/{d.params["dbname"]}'
コード例 #23
0
def pg_access(tablename=None, method=None, output=None):
    """ sends dataframe to an access Database or pg

    using the `task_parser`, assembles a specific table and either
    sends it to postgres or exports it to an access database. if the access db
    does not exist, creates it in the `output` path specified.

    todo:
    - implement using only method to send all tables not just one.

    - implement faster access mdb write
    -------------------------
    Args:
        tablename (string): table to pull using `task_parser`

        method (string): `pg` or `mdb`; required to choose if sending to
                        postgres database or a local access .mdb

        output (string): path of directory where the access database will be
                         created.

    """
    df = task_parser(tablename) if method != 'mdb' else None
    if method == None:
        print('please choose \'pg\' or \'mdb\' output')

    elif method == 'pg':
        # if sending to postgres (hmmm how to include onthefly field definitions??)
        d = db('nri')

        if tablecheck(tablename):
            ingesterv2.main_ingest(df, tablename, d.str)
        else:
            table_create(df, tablename, 'nri')
            ingesterv2.main_ingest(df, tablename, d.str)

    elif method == 'mdb':
        #if sending to access db
        def chunker(seq, size):  # used by tqdm
            return (seq[pos:pos + size] for pos in range(0, len(seq), size))

        if tablename == None:  # TODO implement reading all the tables when no tablename is specified
            for table in table_map.keys():
                print(table)
        else:
            # create df, create field dictionary, if mdb exists: pass df into it, if not create mdb and pass df.
            df = task_parser(tablename)
            onthefly = access_dictionary(df, tablename)
            mdb_name = f'NRI_EXPORT_{date.today().month}_{date.today().day}_{date.today().year}.mdb'
            mdb_path = os.path.join(output, mdb_name)
            if mdb_name in os.listdir(output):
                # print('ok')
                chunksize = int(len(df) / 10)
                with tqdm(total=len(df)) as pbar:
                    for i, cdf in enumerate(chunker(df, chunksize)):
                        replace = "replace" if i == 0 else "append"
                        cdf.to_sql(name=f'{tablename}',
                                   con=ret_access(mdb_path),
                                   index=False,
                                   if_exists=replace,
                                   dtype=onthefly,
                                   chunksize=chunksize)
                        pbar.update(chunksize)
                        tqdm._instances.clear()
            else:
                # print('no')
                mdb_create(output)
                chunksize = int(len(df) / 10)

                with tqdm(total=len(df)) as pbar:
                    for i, cdf in enumerate(chunker(df, chunksize)):
                        replace = "replace" if i == 0 else "append"
                        cdf.to_sql(name=f'{tablename}',
                                   con=ret_access(mdb_path),
                                   index=False,
                                   if_exists=replace,
                                   dtype=onthefly,
                                   chunksize=chunksize)
                        pbar.update(chunksize)
                        tqdm._instances.clear()
コード例 #24
0
def pg_send(table: str, path: str, csv=None, debug=None):
    plot = None
    """ Sends dataframe to postgres or prints out CSV.

    Given a Dima path and tablename, uses the function main_translate to create
    a dataframe and either send it to a postgres database, or print it out to
    a CSV in the same directory where the DIMA file is in.

    Parameters
    ----------

    table : str
        Name of the table in DIMA. example: 'tblLines'

    path : str
        Physical path to DIMA Access file. example: 'c://Folder/dima_file.mdb'

    csv : None or any character. (default value: None)
        If not None, it will print out the table dataframe to CSV.

    debug : None or any character. (default value: None)
        If not None, it will print out each of the steps the function's processes
        take.
    """
    d = db('dima')
    df = main_translate(table, path)
    print("STARTING INGEST")
    df['DateLoadedInDB'] = datetime.now().strftime("%d-%m-%Y %H:%M:%S")
    # dbkey add calibration HERE
    if ('calibration' in path) or ('Calibration' in path):
        df['DBKey'] = os.path.join(
            split(splitext(path)[0])[1].replace(" ", ""), 'calibration')
    else:
        # creates non-calibration DBKey here
        df['DBKey'] = split(splitext(path)[0])[1].replace(" ", "")

    if 'ItemType' in df.columns:
        # if one of the non-vegetation bsne tables, use 'new_tablename' ,
        # function to produce a new tablename: 'tblHorizontalFlux' or
        # 'tblDustDeposition'
        newtablename = new_tablename(df)
        if tablecheck(newtablename):
            print('MWACK')
            ingesterv2.main_ingest(df, newtablename, d.str,
                                   10000) if csv else csv_fieldcheck(
                                       df, path, table)
        else:
            table_create(df, newtablename, 'dima')
            print('llegue a 2')
            ingesterv2.main_ingest(df, newtablename, d.str,
                                   10000) if csv else csv_fieldcheck(
                                       df, path, table)

    else:
        print("NOT A HORFLUX TABLE")
        newtablename = table
        if tablecheck(table):
            print("FOUND THE TABLE IN PG")
            ingesterv2.main_ingest(df, newtablename, d.str,
                                   10000) if csv else csv_fieldcheck(
                                       df, path, table)

        else:
            print("DID NOT FIND TABLE IN PG, CREATING...")
            table_create(df, table, 'dima')
            ingesterv2.main_ingest(df, newtablename, d.str,
                                   10000) if csv else csv_fieldcheck(
                                       df, path, table)