Python write_pandas Beispiele, snowflake.connector.pandas_tools.write_pandas Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: db.py Projekt: marcin2x4/xml_2_snowflake

    def frame_writer(self, data_frame, table_name, bool):
        try:
            self.conn = snowflake.connector.connect(
                account=self.account,
                user=self.user,
                password=self.password,
                database=self.database,
                schema=self.schema,
            )

            write_pandas(self.conn,
                         data_frame,
                         table_name,
                         quote_identifiers=bool)

            self.conn.commit()
            self.conn.close()
            print(f"load of {table_name} completed")

        except snowflake.connector.errors.InterfaceError as err:
            raise ConnectionError(err)
        except snowflake.connector.errors.ProgrammingError as err:
            raise CredentialsError(err)
        except:
            print(
                "error of other type occured in class useDB.frame_writer(), please debug"
            )

Beispiel #2

0

Datei anzeigen

def export_df_to_snowflake(df, conn, table):
    #preparing sql query to create or replace table based on the df
    cols = str(df.dtypes.to_dict())
    cols = re.sub("{", "", cols)
    cols = re.sub("}", "", cols)
    cols = re.sub(":", "", cols)
    cols = re.sub("dtype\('O'\)", "string", cols)
    cols = re.sub("dtype\('int64'\)", "integer", cols)
    cols = re.sub("dtype\('float64'\)", "double", cols)
    #add any new type convertion here if needed
    cols = re.sub("'", "", cols)
    query = "CREATE OR REPLACE TABLE " + table + ' (' + cols + ')'
    print(query)
    conn.cursor().execute(query)
    write_pandas(conn, df, table.upper())

Beispiel #3

0

Datei anzeigen

def test_location_building(conn_cnx, quote_identifiers: bool):
    """This tests that write_pandas constructs location correctly with schema and table name."""
    from snowflake.connector.cursor import SnowflakeCursor

    with conn_cnx() as cnx:  # type: SnowflakeConnection

        def mocked_execute(*args, **kwargs):
            if len(args) >= 1 and args[0].startswith("COPY INTO"):
                location = args[0].split(" ")[2]
                if quote_identifiers:
                    assert location == '"teble.table"'
                else:
                    assert location == "teble.table"
            cur = SnowflakeCursor(cnx)
            cur._result = iter([])
            return cur

        with mock.patch(
                "snowflake.connector.cursor.SnowflakeCursor.execute",
                side_effect=mocked_execute,
        ) as m_execute:
            success, nchunks, nrows, _ = write_pandas(
                cnx,
                sf_connector_version_df.get(),
                "teble.table",
                quote_identifiers=quote_identifiers,
            )
            assert m_execute.called and any(
                map(lambda e: "COPY INTO" in str(e.args),
                    m_execute.call_args_list))

Beispiel #4

0

Datei anzeigen

def test_write_pandas(conn_cnx: Callable[..., Generator['SnowflakeConnection', None, None]],
                      compression: str,
                      parallel: int,
                      chunk_size: int):
    num_of_chunks = math.ceil(len(sf_connector_version_data) / chunk_size)

    with conn_cnx() as cnx:  # type: SnowflakeConnection
        table_name = 'driver_versions'
        cnx.execute_string('CREATE OR REPLACE TABLE "{}"("name" STRING, "newest_version" STRING)'.format(table_name))
        try:
            success, nchunks, nrows, _ = write_pandas(cnx,
                                                      sf_connector_version_df,
                                                      table_name,
                                                      compression=compression,
                                                      parallel=parallel,
                                                      chunk_size=chunk_size)
            if num_of_chunks == 1:
                # Note: since we used one chunk order is conserved
                assert (cnx.cursor().execute('SELECT * FROM "{}"'.format(table_name)).fetchall() ==
                        sf_connector_version_data)
            else:
                # Note: since we used one chunk order is NOT conserved
                assert (set(cnx.cursor().execute('SELECT * FROM "{}"'.format(table_name)).fetchall()) ==
                        set(sf_connector_version_data))
            # Make sure all files were loaded and no error occurred
            assert success
            # Make sure overall as many rows were ingested as we tried to insert
            assert nrows == len(sf_connector_version_data)
            # Make sure we uploaded in as many chunk as we wanted to
            assert nchunks == num_of_chunks
        finally:
            cnx.execute_string("DROP TABLE IF EXISTS {}".format(table_name))

Beispiel #5

0

Datei anzeigen

def test_location_building_db_schema(conn_cnx, quote_identifiers: bool):
    """This tests that write_pandas constructs location correctly with database, schema and table name."""
    from snowflake.connector.cursor import SnowflakeCursor
    with conn_cnx() as cnx:  # type: SnowflakeConnection

        def mocked_execute(*args, **kwargs):
            if len(args) >= 1 and args[0].startswith('COPY INTO'):
                location = args[0].split(' ')[2]
                if quote_identifiers:
                    assert location == '"database"."schema"."table"'
                else:
                    assert location == 'database.schema.table'
            cur = SnowflakeCursor(cnx)
            cur._result = iter([])
            return cur

        with mock.patch('snowflake.connector.cursor.SnowflakeCursor.execute',
                        side_effect=mocked_execute) as m_execute:
            success, nchunks, nrows, _ = write_pandas(
                cnx,
                sf_connector_version_df,
                "table",
                database='database',
                schema='schema',
                quote_identifiers=quote_identifiers)
            assert m_execute.called and any(
                map(lambda e: 'COPY INTO' in str(e.args),
                    m_execute.call_args_list))

Beispiel #6

0

Datei anzeigen

Datei: test_pandas_tools.py Projekt: snowflakedb/snowflake-connector-python

def test_auto_create_table_similar_column_names(
    conn_cnx: Callable[..., Generator[SnowflakeConnection, None, None]], ):
    """Tests whether similar names do not cause issues when auto-creating a table as expected."""
    table_name = random_string(5, "numbas_")
    df_data = [(10, 11), (20, 21)]

    df = pandas.DataFrame(df_data, columns=["number", "Number"])
    select_sql = f'SELECT * FROM "{table_name}"'
    drop_sql = f'DROP TABLE IF EXISTS "{table_name}"'
    with conn_cnx() as cnx:
        try:
            success, nchunks, nrows, _ = write_pandas(cnx,
                                                      df,
                                                      table_name,
                                                      quote_identifiers=True,
                                                      auto_create_table=True)

            # Check write_pandas output
            assert success
            assert nrows == len(df_data)
            assert nchunks == 1
            # Check table's contents
            result = cnx.cursor(DictCursor).execute(select_sql).fetchall()
            for row in result:
                assert (
                    row["number"],
                    row["Number"],
                ) in df_data
        finally:
            cnx.execute_string(drop_sql)

Beispiel #7

0

Datei anzeigen

def test_write_pandas(
    conn_cnx: Callable[..., Generator["SnowflakeConnection", None, None]],
    db_parameters: Dict[str, str],
    compression: str,
    parallel: int,
    chunk_size: int,
    quote_identifiers: bool,
):
    num_of_chunks = math.ceil(len(sf_connector_version_data) / chunk_size)

    with conn_cnx(
            user=db_parameters["user"],
            account=db_parameters["account"],
            password=db_parameters["password"],
    ) as cnx:  # type: SnowflakeConnection
        table_name = "driver_versions"

        if quote_identifiers:
            create_sql = 'CREATE OR REPLACE TABLE "{}" ("name" STRING, "newest_version" STRING)'.format(
                table_name)
            select_sql = 'SELECT * FROM "{}"'.format(table_name)
            drop_sql = 'DROP TABLE IF EXISTS "{}"'.format(table_name)
        else:
            create_sql = "CREATE OR REPLACE TABLE {} (name STRING, newest_version STRING)".format(
                table_name)
            select_sql = "SELECT * FROM {}".format(table_name)
            drop_sql = "DROP TABLE IF EXISTS {}".format(table_name)

        cnx.execute_string(create_sql)
        try:
            success, nchunks, nrows, _ = write_pandas(
                cnx,
                sf_connector_version_df.get(),
                table_name,
                compression=compression,
                parallel=parallel,
                chunk_size=chunk_size,
                quote_identifiers=quote_identifiers,
            )

            if num_of_chunks == 1:
                # Note: since we used one chunk order is conserved
                assert (cnx.cursor().execute(select_sql).fetchall() ==
                        sf_connector_version_data)
            else:
                # Note: since we used one chunk order is NOT conserved
                assert set(cnx.cursor().execute(select_sql).fetchall()) == set(
                    sf_connector_version_data)

            # Make sure all files were loaded and no error occurred
            assert success
            # Make sure overall as many rows were ingested as we tried to insert
            assert nrows == len(sf_connector_version_data)
            # Make sure we uploaded in as many chunk as we wanted to
            assert nchunks == num_of_chunks
        finally:
            cnx.execute_string(drop_sql)

Beispiel #8

0

Datei anzeigen

Datei: snowflake.py Projekt: davehowell/flakenews

def write_df(df: pd.DataFrame,
             tbl: Table,
             conn: snowflake.SnowflakeConnection = None) -> bool:
    """ Thin wrapper over snowflake.write_pandas function """
    logger.info(f"Copying data to table {tbl.name} on Snowflake")
    success, nchunks, nrows, output = write_pandas(conn, df, tbl.name.upper())
    logger.info(
        f"""{"Succeeded" if success else "Failed"}: chunks {nchunks}, rows {nrows}, output {output}"""
    )
    return success

Beispiel #9

0

Datei anzeigen

Datei: dagsnew.py Projekt: Nayana93/jenkins-docker-integration

def snoflake():
    username = '******'
    password = '******'
    account = 'qm66558.ap-south-1'
    warehouse = 'COMPUTE_WH'
    database = 'STUDENT'
    ctx = sn.connector.connect(
        user=username,
        password=password,
        account=account,
        # warehouse=warehouse,
        database=database,
        schema='PUBLIC')
    con = sqlite3.connect(r"db.sqlite3")
    print(con)
    df = pd.read_sql_query(
        "SELECT * from STUDENT where CREATED_AT  = date() ",
        con,
    )
    logging.basicConfig(level=logging.DEBUG,
                        filename='testlog.log',
                        filemode='w')
    logging.debug("uses .02")
    logging.debug("credit used by SELECT * FROM TEST_TABLE is .02")
    df.to_excel('test.xlsx', engine='xlsxwriter', index=False)
    exc = pd.read_excel("test.xlsx")
    cs = ctx.cursor()
    write_pandas(ctx, exc, 'STUDENT')
    # print(df)
    cs.execute("SELECT current_version()")
    con.close()
    one = cs.fetchone()

    test = cs.execute(" SELECT * FROM TEST_TABLE")
    logging.debug("dfasfa %s " % test)

    cs.close()
    ctx.close()

Beispiel #10

0

Datei anzeigen

def test_location_building(conn_cnx):
    """This tests that write_pandas constructs location correctly with schema and table name."""
    from snowflake.connector.cursor import SnowflakeCursor
    with conn_cnx() as cnx:  # type: SnowflakeConnection
        def mocked_execute(*args, **kwargs):
            if len(args) >= 1 and args[0].startswith('COPY INTO'):
                location = args[0].split(' ')[2]
                assert location == '"teble.table"'
            cur = SnowflakeCursor(cnx)
            cur._result = iter([])
            return cur
        with mock.patch('snowflake.connector.cursor.SnowflakeCursor.execute', side_effect=mocked_execute) as m_execute:
            success, nchunks, nrows, _ = write_pandas(cnx, sf_connector_version_df, "teble.table")
            assert m_execute.called and any(map(lambda e: 'COPY INTO' in str(e.args), m_execute.call_args_list))

Beispiel #11

0

Datei anzeigen

def test_default_value_insertion(
    conn_cnx: Callable[..., Generator["SnowflakeConnection", None, None]],
    quote_identifiers: bool,
):
    """Tests whether default values can be successfully inserted with the pandas writeback."""
    table_name = "users"
    df_data = [("Mark", 10), ("Luke", 20)]

    # Create a DataFrame containing data about customers
    df = pandas.DataFrame(df_data, columns=["name", "balance"])
    # Assume quote_identifiers is true in string and if not remove " from strings
    create_sql = """CREATE OR REPLACE TABLE "{}"
                 ("name" STRING, "balance" INT,
                 "id" varchar(36) default uuid_string(),
                 "ts" timestamp_ltz default current_timestamp)""".format(
        table_name)
    select_sql = 'SELECT * FROM "{}"'.format(table_name)
    drop_sql = 'DROP TABLE IF EXISTS "{}"'.format(table_name)
    if not quote_identifiers:
        create_sql = create_sql.replace('"', "")
        select_sql = select_sql.replace('"', "")
        drop_sql = drop_sql.replace('"', "")
    with conn_cnx() as cnx:  # type: SnowflakeConnection
        cnx.execute_string(create_sql)
        try:
            success, nchunks, nrows, _ = write_pandas(
                cnx, df, table_name, quote_identifiers=quote_identifiers)

            # Check write_pandas output
            assert success
            assert nrows == len(df_data)
            assert nchunks == 1
            # Check table's contents
            result = cnx.cursor(DictCursor).execute(select_sql).fetchall()
            for row in result:
                assert (row["id" if quote_identifiers else "ID"]
                        is not None)  # ID (UUID String)
                assert len(row["id" if quote_identifiers else "ID"]) == 36
                assert (row["ts" if quote_identifiers else "TS"]
                        is not None)  # TS (Current Timestamp)
                assert isinstance(row["ts" if quote_identifiers else "TS"],
                                  datetime)
                assert (
                    row["name" if quote_identifiers else "NAME"],
                    row["balance" if quote_identifiers else "BALANCE"],
                ) in df_data
        finally:
            cnx.execute_string(drop_sql)

Beispiel #12

0

Datei anzeigen

def snowflake_insert_data(ctx, df, table_name):
    """Write data to Snowflake.

    Args:
        ctx: database connection object
        df (pandas df): data to insert
        table_name (str): destination table name
    """

    try:
        _, _, nrows, _ = write_pandas(ctx, df, table_name)
        print("Successfully written " + str(nrows) +
              " rows of data to Snowflake.\n")
    except Exception as e:
        print("Could not save the data.\n")
        print(e)

Beispiel #13

0

Datei anzeigen

Datei: test_pandas_tools.py Projekt: snowflakedb/snowflake-connector-python

def test_special_name_quoting(
    conn_cnx: Callable[..., Generator[SnowflakeConnection, None, None]],
    auto_create_table: bool,
):
    """Tests whether special column names get quoted as expected."""
    table_name = "users"
    df_data = [("Mark", 10), ("Luke", 20)]

    df = pandas.DataFrame(df_data, columns=["00name", "bAlance"])
    create_sql = (f'CREATE OR REPLACE TABLE "{table_name}"'
                  '("00name" STRING, "bAlance" INT, "id" INT AUTOINCREMENT)')
    select_sql = f'SELECT * FROM "{table_name}"'
    drop_sql = f'DROP TABLE IF EXISTS "{table_name}"'
    with conn_cnx() as cnx:  # type: SnowflakeConnection
        if not auto_create_table:
            cnx.execute_string(create_sql)
        try:
            success, nchunks, nrows, _ = write_pandas(
                cnx,
                df,
                table_name,
                quote_identifiers=True,
                auto_create_table=auto_create_table,
            )

            # Check write_pandas output
            assert success
            assert nrows == len(df_data)
            assert nchunks == 1
            # Check table's contents
            result = cnx.cursor(DictCursor).execute(select_sql).fetchall()
            for row in result:
                # The auto create table functionality does not auto-create an incrementing ID
                if not auto_create_table:
                    assert row["id"] in (1, 2)
                assert (
                    row["00name"],
                    row["bAlance"],
                ) in df_data
        finally:
            cnx.execute_string(drop_sql)

Beispiel #14

0

Datei anzeigen

Datei: test_pandas_tools.py Projekt: snowflakedb/snowflake-connector-python

def test_autoincrement_insertion(
    conn_cnx: Callable[..., Generator[SnowflakeConnection, None, None]],
    quote_identifiers: bool,
):
    """Tests whether default values can be successfully inserted with the pandas writeback."""
    table_name = "users"
    df_data = [("Mark", 10), ("Luke", 20)]

    # Create a DataFrame containing data about customers
    df = pandas.DataFrame(df_data, columns=["name", "balance"])
    # Assume quote_identifiers is true in string and if not remove " from strings
    create_sql = ('CREATE OR REPLACE TABLE "{}"'
                  '("name" STRING, "balance" INT, "id" INT AUTOINCREMENT)'
                  ).format(table_name)
    select_sql = f'SELECT * FROM "{table_name}"'
    drop_sql = f'DROP TABLE IF EXISTS "{table_name}"'
    if not quote_identifiers:
        create_sql = create_sql.replace('"', "")
        select_sql = select_sql.replace('"', "")
        drop_sql = drop_sql.replace('"', "")
    with conn_cnx() as cnx:  # type: SnowflakeConnection
        cnx.execute_string(create_sql)
        try:
            success, nchunks, nrows, _ = write_pandas(
                cnx, df, table_name, quote_identifiers=quote_identifiers)

            # Check write_pandas output
            assert success
            assert nrows == len(df_data)
            assert nchunks == 1
            # Check table's contents
            result = cnx.cursor(DictCursor).execute(select_sql).fetchall()
            for row in result:
                assert row["id" if quote_identifiers else "ID"] in (1, 2)
                assert (
                    row["name" if quote_identifiers else "NAME"],
                    row["balance" if quote_identifiers else "BALANCE"],
                ) in df_data
        finally:
            cnx.execute_string(drop_sql)

Beispiel #15

0

Datei anzeigen

Datei: common_functions.py Projekt: arnabdasgupta04/PBIMetaExtractorFunctnAppV1

def write_to_snowflake(df: object, table_name: str) -> int:
    try:

        ctx = snowflake.connector.connect(user=AME_SNW_USERNAME,
                                          password=AME_SNW_PASSWORD,
                                          account=AME_SNW_ACCOUNT,
                                          database=AME_SNW_DATABASE,
                                          schema=AME_SNW_SCHEMA,
                                          warehouse=AME_SNW_WAREHOUSE)
        df_count = 0
        df_count = df.count()[0]
        logging.info(f'Appending {df_count} records into {table_name}')
        success, nchunks, nrows, _ = write_pandas(ctx, df, table_name)
        logging.info(f"Impacted rows : {nrows}")
        return nrows  # Return impacted rows
    except Exception as e:
        error_message = str(e)
        function_name = sys._getframe().f_code.co_name
        output_error = get_exception_message(function_name, error_message)
        return output_error

    logging.info("Closing Snowflake connection.")
    ctx.close()

Beispiel #16

0

Datei anzeigen

                                   password='******',
                                   warehouse='COMPUTE_WH',
                                   database='TEST_DATA',
                                   role='SYSADMIN',
                                   schema='PUBLIC')

print('connect')
df1 = pandas.read_json(r'C:\Users\chathu\Desktop\wiley\course_data.json')

cur = conn.cursor()
cur.execute("""TRUNCATE TABLE TEST_DATA.PUBLIC.COURSE_SALE """)

df1 = df1[[
    'course_id', 'course_title', 'is_paid', 'num_subscribers', 'price',
    'author', 'content_duration', 'level', 'num_lectures', 'num_reviews',
    'published_timestamp', 'subject'
]]

df1.rename(columns={
    'course_id': 'COURSE_ID',
    'course_title': 'COURSE_TITLE',
    'is_paid': 'IS_PAID',
},
           inplace=True)
print(df1)
try:
    write_pandas(conn, df1, 'COURSE_SALE')
    conn.commit()
finally:
    cur.close()

Beispiel #17

0

Datei anzeigen

Datei: test.py Projekt: arthur-FD/test

    & (registration_volumes_EV_transformed['GRANULARITY'] == 'MONTH') &
    (registration_volumes_EV_transformed['sales country'] == 'USA')]

audi_A3.DATE = pd.to_datetime(audi_A3.DATE, format='%b-%y')
plt.plot(list(audi_A3.DATE), list(audi_A3.VALUE))
plt.show()

fig = make_subplots(rows=12, cols=1)

for i, year in enumerate(years_available):
    temp = audi_A3[(pd.datetime(int(year), 1, 1) <= audi_A3['DATE'])
                   & (audi_A3['DATE'] < pd.datetime(int(year) + 1, 1, 1))]
    fig.add_trace(go.Scatter(x=temp.DATE, y=temp.VALUE), row=i + 1, col=1)

fig.update_layout(height=2000, width=800, title_text="Side By Side Subplots")
fig.show()

from config_loader import ConfigLoader
with open("conf/parameter.yml", "r") as file:
    parameters = yaml.load(file, Loader=ConfigLoader)

conn = snowflake.connector.connect(user='******',
                                   password=PSW_SF,
                                   account=ACCOUNT_SF,
                                   **parameters["snowflake_config"])
write_pandas(conn, new_data, 'EV_VOLUMES_TEST')

tables_to_import['EV_VOLUMES_TEST'].replace(to_replace=np.nan,
                                            value='',
                                            inplace=True)
write_pandas(conn, tables_to_import['EV_VOLUMES_TEST'], 'EV_VOLUMES_TEST')

Beispiel #18

0

Datei anzeigen

Datei: apply_model_to_snf.py Projekt: justinkolpak-wmp/recipe-analysis-code

df['INGREDIENT_NAME_CRF_FMT'] = df['INGREDIENT_NAME'].map(
    lambda x: prep.sentence_format_for_crf(x)[0])

df['PREDICTION_RESULTS_RAW'] = \
    df['INGREDIENT_NAME'] \
        .map(lambda x: predict.sentence_predict_label(x, 'recipetagging-v5.crfsuite'))

df['PREDICTION_RESULTS_FMT'] = \
    df.apply(lambda x: predict.format_prediction(x.INGREDIENT_NAME, x.PREDICTION_RESULTS_RAW), axis=1)

df['NAME'] = df['PREDICTION_RESULTS_FMT'].map(
    lambda x: predict.get_name_from_prediction(x))
df['QUANTITY'] = df['PREDICTION_RESULTS_FMT'].map(
    lambda x: predict.get_qty_from_prediction(x))
df['UNIT'] = df['PREDICTION_RESULTS_FMT'].map(
    lambda x: predict.get_unit_from_prediction(x))
df['ETL_LAST_UPDATED'] = df.ETL_LAST_UPDATED.dt.date  # Snowflake library has problem with writing timestamps, using date

final_df = df[[
    'RECIPE_INGREDIENT_ID', 'RECIPE_NAME', 'INGREDIENT_NAME', 'RECIPE_SECTION',
    'ETL_LAST_UPDATED', 'NAME', 'QUANTITY', 'UNIT'
]]

#print(final_df[final_df['RECIPE_NAME'] == 'Cheddar Scones with Chive Butter Recipe'])
truncate_sql = "TRUNCATE TABLE RECIPE_INGREDIENT_LABELED"
cur.execute(truncate_sql)

snf_pandas.write_pandas(conn=conn,
                        df=final_df,
                        table_name="RECIPE_INGREDIENT_LABELED")

Beispiel #19

0

Datei anzeigen

# In[42]:

cs.execute('''CREATE OR REPLACE TABLE location(
    YEARMONTH int,
    EPISODE_ID int,
    EVENT_ID int,
    LOCATION_INDEX int,
    RANGE float,
    AZIMUTH varchar(255),
    LOCATION varchar(255),
    LATITUDE float,
    LONGITUDE float,
    LAT2 int, 
    LON2 int)''')

write_pandas(ctx, df_location, "LOCATION")

# In[43]:

# Adding the Fatality Table to Snowflake

cs.execute('''CREATE OR REPLACE TABLE fatality(
    FAT_YEARMONTH int,
    FAT_DAY int,
    FAT_TIME int,
    FATALITY_ID int,
    EVENT_ID int,
    FATALITY_TYPE varchar(255),
    FATALITY_DATE datetime,
    FATALITY_AGE int,
    FATALITY_SEX varchar(255),

Beispiel #20

0

Datei anzeigen

Datei: splitfile.py Projekt: Dragana1404/Split_big_file

            current_piece += 1  #The counter is incremented
            current_limit = row_limit * current_piece  #The number of rows increases
            current_out_path = os.path.join(  #A function that associates files with a specific name in a specific path
                output_path, output_name_template % current_piece)
            current_out_writer = csv.writer(
                open(current_out_path, 'w'),
                delimiter=',')  #Files are created by merging headers and rows
            if keep_headers:
                current_out_writer.writerow(headers)
        current_out_writer.writerow(row)


if __name__ == "__main__":  #Spliting a large file using the defined split function
    print("file split Begins")
    split(open(r"C:\Users\Nenad\PycharmProjects\untitled15\BIGFILE.csv"))
    print("File split Ends")

os.chdir(r'C:\Users\Nenad\Desktop\Data\proba')  #Path to the folder..
file_extension = ".csv"  #..file extension..
all_filenames = [
    i for i in glob.glob(f"*{file_extension}")
]  #..and read all files from that folder with the csv extension

for file in all_filenames:  #A loop that places all files in a table
    df = pd.read_csv(file, delimiter=',')  #Files are converted to a DataFrame
    cs.execute("USE DRAGANA").fetchall(
    )  #Execute sql query which use Database DRAGANA and fetch result from query
    write_pandas(
        ctx, df, 'BIGTABLE'
    )  #Function that inserts dataframes into a table named 'BIGTABLE' into the Database 'DRAGANA' using connector

Beispiel #21

0

Datei anzeigen

Datei: test_pandas_tools.py Projekt: snowflakedb/snowflake-connector-python

def test_write_pandas(
    conn_cnx: Callable[..., Generator[SnowflakeConnection, None, None]],
    db_parameters: dict[str, str],
    compression: str,
    chunk_size: int,
    quote_identifiers: bool,
    auto_create_table: bool,
    create_temp_table: bool,
):
    num_of_chunks = math.ceil(len(sf_connector_version_data) / chunk_size)

    with conn_cnx(
            user=db_parameters["user"],
            account=db_parameters["account"],
            password=db_parameters["password"],
    ) as cnx:
        table_name = "driver_versions"

        if quote_identifiers:
            create_sql = 'CREATE OR REPLACE TABLE "{}" ("name" STRING, "newest_version" STRING)'.format(
                table_name)
            select_sql = f'SELECT * FROM "{table_name}"'
            drop_sql = f'DROP TABLE IF EXISTS "{table_name}"'
        else:
            create_sql = "CREATE OR REPLACE TABLE {} (name STRING, newest_version STRING)".format(
                table_name)
            select_sql = f"SELECT * FROM {table_name}"
            drop_sql = f"DROP TABLE IF EXISTS {table_name}"

        if not auto_create_table:
            cnx.execute_string(create_sql)
        try:
            success, nchunks, nrows, _ = write_pandas(
                cnx,
                sf_connector_version_df.get(),
                table_name,
                compression=compression,
                chunk_size=chunk_size,
                quote_identifiers=quote_identifiers,
                auto_create_table=auto_create_table,
                create_temp_table=create_temp_table,
            )

            if num_of_chunks == 1:
                # Note: since we used one chunk order is conserved
                assert (cnx.cursor().execute(select_sql).fetchall() ==
                        sf_connector_version_data)
            else:
                # Note: since we used more than one chunk order is NOT conserved
                assert set(cnx.cursor().execute(select_sql).fetchall()) == set(
                    sf_connector_version_data)

            # Make sure all files were loaded and no error occurred
            assert success
            # Make sure overall as many rows were ingested as we tried to insert
            assert nrows == len(sf_connector_version_data)
            # Make sure we uploaded in as many chunk as we wanted to
            assert nchunks == num_of_chunks
            # Check to see if this is a temporary or regular table if we auto-created this table
            if auto_create_table:
                table_info = (cnx.cursor(DictCursor).execute(
                    f"show tables like '{table_name}'").fetchall())
                assert table_info[0]["kind"] == (
                    "TEMPORARY" if create_temp_table else "TABLE")
        finally:
            cnx.execute_string(drop_sql)

Beispiel #22

0

Datei anzeigen

Datei: test_pandas_tools.py Projekt: snowflakedb/snowflake-connector-python

def test_all_pandas_types(conn_cnx: Callable[...,
                                             Generator[SnowflakeConnection,
                                                       None, None]]):
    table_name = random_string(5, "all_types_")
    datetime_with_tz = datetime(1997,
                                6,
                                3,
                                14,
                                21,
                                32,
                                00,
                                tzinfo=timezone(timedelta(hours=+10)))
    datetime_with_ntz = datetime(1997, 6, 3, 14, 21, 32, 00)
    df_data = [
        (1, 1.1, "1string1", True, datetime_with_tz, datetime_with_ntz),
        (2, 2.2, "2string2", False, datetime_with_tz, datetime_with_ntz),
    ]
    df_data_no_timestamps = [(
        row[0],
        row[1],
        row[2],
        row[3],
    ) for row in df_data]

    df = pandas.DataFrame(
        df_data,
        columns=[
            "int", "float", "string", "bool", "timestamp_tz", "timestamp_ntz"
        ],
    )

    select_sql = f'SELECT * FROM "{table_name}"'
    drop_sql = f'DROP TABLE IF EXISTS "{table_name}"'
    with conn_cnx() as cnx:
        try:
            success, nchunks, nrows, _ = write_pandas(cnx,
                                                      df,
                                                      table_name,
                                                      quote_identifiers=True,
                                                      auto_create_table=True)

            # Check write_pandas output
            assert success
            assert nrows == len(df_data)
            assert nchunks == 1
            # Check table's contents
            result = cnx.cursor(DictCursor).execute(select_sql).fetchall()
            for row in result:
                assert (
                    row["int"],
                    row["float"],
                    row["string"],
                    row["bool"],
                ) in df_data_no_timestamps
                # TODO: Schema detection on the server-side has bugs dealing with timestamp_ntz and timestamp_tz.
                #  After the bugs are fixed, change the assertion to `data[0]["tm_tz"] == datetime_with_tz`
                #  and `data[0]["tm_ntz"] == datetime_with_ntz`,
                #  JIRA https://snowflakecomputing.atlassian.net/browse/SNOW-524865
                #  JIRA https://snowflakecomputing.atlassian.net/browse/SNOW-359205
                #  JIRA https://snowflakecomputing.atlassian.net/browse/SNOW-507644
                assert row["timestamp_tz"] is not None
                assert row["timestamp_ntz"] is not None
        finally:
            cnx.execute_string(drop_sql)