def test_autocommit_switching(dsn, configuration): table_name = unique_table_name() connection = connect(dsn, **get_credentials(configuration)) connection.autocommit = True # <--- connection.cursor().execute(f"CREATE TABLE {table_name} (a INTEGER)") connection.close() options = make_options(autocommit=True) connection = connect( dsn, turbodbc_options=options, **get_credentials(configuration) ) connection.autocommit = False # <--- connection.cursor().execute(f"INSERT INTO {table_name} VALUES (?)", [42]) connection.close() # table is there, but data was not persisted connection = connect(dsn, **get_credentials(configuration)) cursor = connection.cursor() cursor.execute(f"SELECT * FROM {table_name}") results = cursor.fetchall() assert results == [] cursor.execute(f"DROP TABLE {table_name}") connection.commit()
def create_connection_string_turbo(server, database): options = turbodbc.make_options(prefer_unicode=True) constr = 'Driver={ODBC Driver 13 for SQL Server};Server=' + \ server + ';Database=' + database + ';Trusted_Connection=yes;' con = turbodbc.connect(connection_string=constr, turbodbc_options=options) return con
def test_options_with_overrides(): options = make_options(read_buffer_size=Rows(123), parameter_sets_to_buffer=2500, prefer_unicode=True, use_async_io=True) assert options.read_buffer_size.rows == 123 assert options.parameter_sets_to_buffer == 2500 assert options.prefer_unicode == True assert options.use_async_io == True
def __init__(self): """ :param config: """ try: #[].[dbo].[big_table] self.database = 'AdventureWorks2016' #[AdventureWorks2016].[Person].[Person] user = '******' pwd = 'trusted' server = 'CRPC015162\SQLDEV' driver = 'ODBC+Driver+17+for+SQL+Server' driver = "{" + driver.replace("+", " ") + "}" options = make_options(read_buffer_size=Megabytes(100), parameter_sets_to_buffer=1000, varchar_max_character_limit=10000, use_async_io=True, prefer_unicode=True, autocommit=True, large_decimals_as_64_bit_types=True, limit_varchar_results_to_max=True) if pwd == 'trusted': self.cursor = turbodbc.connect( driver=driver, server=server, database=self.database, uid=user, Trusted_Connection='yes', turbodbc_options=options).cursor() else: self.cursor = turbodbc.connect( driver=driver, server=server, database=self.database, uid=user, pwd=pwd, turbodbc_options=options).cursor() con_string = f"DRIVER={driver};SERVER={server};DATABASE={self.database};Trusted_Connection=yes;" \ if pwd == 'trusted' \ else f"DRIVER={driver};" \ f"SERVER={server};" \ f"DATABASE={self.database};" \ f"UID={user};" \ f"PWD={pwd}" quoted = urllib.parse.quote_plus(con_string) self.engine = sa.create_engine( 'mssql+pyodbc:///?odbc_connect={}'.format(quoted)) except turbodbc.exceptions.DatabaseError as ex: logging.exception( f"[Exception][database_connector][init][{str(ex)}]")
def execute(self, query): """Execute SQL query on Teradata database. Parameters: query (str): SQL query """ con = turbodbc.connect( dsn=self.dsn, turbodbc_options=turbodbc.make_options(prefer_unicode=True, autocommit=True) ) cur = con.cursor() cur.execute(query) con.close()
def exasol_connection(db_uri, commit=True): db_params = urlparse(db_uri) options = make_options(read_buffer_size=Megabytes(100)) connection = connect( driver="/Library/ODBC/EXASolution_ODBC.bundle/Contents/MacOS/libexaodbc-io418sys.dylib", EXAUID=db_params.username, EXAPWD=db_params.password, EXAHOST=db_params.hostname, turbodbc_options=options, ) yield connection if commit: connection.commit()
def connect_to_db(self): options = make_options(prefer_unicode=True, autocommit=True, use_async_io=True) cnxn = connect(DRIVER="SQL SERVER", SERVER=self.server, DATABASE=self.database, UID=self.uid, PWD=self.pwd, turbodbc_options=options) self.conn = cnxn self.cursor = cnxn.cursor() self.engine = create_engine(self.engine)
def td_load_df(df, dsn, table_name, index=None): #This is the main function """ Load pandas dataframe to teradata (using turbodbc) This function's intended use is to quickly load small to relatively large dataframes into the database without thinking about it The function is NOT recommended for use with very large dataframes (over 10 million cells) Parameters ---------- df: a pandas dataframe, must only used allowed dtypes dsn: datasource name, should be == 'Teradata' unless it has a different name on your PC table_name: Teradata tablename (where to load) index (optional): string column name of the index column, if None attempts to guess Examples: --------- >>>test_1 = pd.DataFrame({'ints_1':[0, 1, 2, 3] ,'ints_2':[0, 3, 4, 5] ,'ints_3':[-1, -2, -3, -4]}) >>>td_load_df(test_1, dsn='Teradata', table_name='ar_test_turbodbc_1') Current limitations: -------------------- 0. Largely untested 1. String column length is equal to the length of the largest string column 2. Datetime columns are loaded as several integer columns (year, month, day) 3. Does not support NA values at this time (throws an error) """ if df.isnull().values.any(): raise ValueError("""This function currently does not support NA values, please fill them before loading""") df.columns = [col.upper() for col in df.columns] cat_columns = df.select_dtypes(['category']).columns for col in cat_columns: df[col] = df[col].astype(str) df_datetime_to_text(df) table_name_clean = 'UAT_DM.' + table_name.upper().replace('UAT_DM.', '') options = make_options(autocommit=True) connection = connect(dsn=dsn, turbodbc_options=options) cursor = connection.cursor() try: cursor.execute('DROP TABLE ' + table_name_clean) except DatabaseError: pass cursor.execute( sql_create_statement(df, table_name_clean, guess_index(df, index))) cursor.executemanycolumns(sql_insert_statement(df, table_name_clean), [df[col].values for col in df.columns]) connection.close() print("Loaded your dataframe successfully")
def test_autocommit_enabled_at_start(dsn, configuration): table_name = unique_table_name() options = make_options(autocommit=True) connection = connect(dsn, turbodbc_options=options, **get_credentials(configuration)) connection.cursor().execute('CREATE TABLE {} (a INTEGER)'.format(table_name)) connection.close() connection = connect(dsn, **get_credentials(configuration)) cursor = connection.cursor() cursor.execute('SELECT * FROM {}'.format(table_name)) results = cursor.fetchall() assert results == [] cursor.execute('DROP TABLE {}'.format(table_name)) connection.commit()
def open_connection(configuration, rows_to_buffer=None, parameter_sets_to_buffer=100, **turbodbc_options): dsn = configuration['data_source_name'] prefer_unicode = configuration.get('prefer_unicode', False) read_buffer_size = turbodbc.Rows(rows_to_buffer) if rows_to_buffer else turbodbc.Megabytes(1) options = turbodbc.make_options(read_buffer_size=read_buffer_size, parameter_sets_to_buffer=parameter_sets_to_buffer, prefer_unicode=prefer_unicode, **turbodbc_options) connection = turbodbc.connect(dsn, turbodbc_options=options, **get_credentials(configuration)) yield connection connection.close()
def read(self, query): """Read data from Teradata database. Parameters: query (str): SQL query Returns: pandas.DataFrame with results of query """ con = turbodbc.connect( dsn=self.dsn, turbodbc_options=turbodbc.make_options(prefer_unicode=True, autocommit=True) ) cur = con.cursor() cur.execute(query) df = pd.DataFrame(cur.fetchallnumpy()) df.columns = df.columns.str.lower() con.close() return df
def test_options_with_overrides(): options = make_options(read_buffer_size=Rows(123), parameter_sets_to_buffer=2500, varchar_max_character_limit=42, prefer_unicode=True, use_async_io=True, autocommit=True, large_decimals_as_64_bit_types=True, limit_varchar_results_to_max=True) assert options.read_buffer_size.rows == 123 assert options.parameter_sets_to_buffer == 2500 assert options.varchar_max_character_limit == 42 assert options.prefer_unicode == True assert options.use_async_io == True assert options.autocommit == True assert options.large_decimals_as_64_bit_types == True assert options.limit_varchar_results_to_max == True
def open_connection(configuration, rows_to_buffer=None, parameter_sets_to_buffer=100, **turbodbc_options): dsn = configuration['data_source_name'] prefer_unicode = configuration.get('prefer_unicode', False) read_buffer_size = turbodbc.Rows( rows_to_buffer) if rows_to_buffer else turbodbc.Megabytes(1) options = turbodbc.make_options( read_buffer_size=read_buffer_size, parameter_sets_to_buffer=parameter_sets_to_buffer, prefer_unicode=prefer_unicode, **turbodbc_options) connection = turbodbc.connect(dsn, turbodbc_options=options, **get_credentials(configuration)) yield connection connection.close()
def __init__(self, table_name, idx_name, dsn, extra_joins='', var_list=None): ''' table_name(string): name of SQL table that stores records idx_name(string): column name that stores unique identifier for a record (row from table specified) dsn(string): dsn ODBC string to connect to SQL through turbodbc driver extra_joins(string): extra SQL code the end of get_record{s}(). Can be used to create extra joins for getting data from secondary tables or for more WHERE filters ''' self.var_list = var_list self.table_name = table_name self.idx_name = '[' + idx_name + ']' options = make_options() self.conn = turbodbc.connect(dsn=dsn, turbodbc_options=options) self.cursor = self.conn.cursor() self.extra_joins = extra_joins """
def test_options_with_overrides(): options = make_options(read_buffer_size=Rows(123), parameter_sets_to_buffer=2500, varchar_max_character_limit=42, prefer_unicode=True, use_async_io=True, autocommit=True, large_decimals_as_64_bit_types=True, limit_varchar_results_to_max=True, force_extra_capacity_for_unicode=True, fetch_wchar_as_char=True) assert options.read_buffer_size.rows == 123 assert options.parameter_sets_to_buffer == 2500 assert options.varchar_max_character_limit == 42 assert options.prefer_unicode == True assert options.use_async_io == True assert options.autocommit == True assert options.large_decimals_as_64_bit_types == True assert options.limit_varchar_results_to_max == True assert options.force_extra_capacity_for_unicode == True assert options.fetch_wchar_as_char == True
def test_autocommit_switching(dsn, configuration): table_name = unique_table_name() connection = connect(dsn, **get_credentials(configuration)) connection.autocommit = True # <--- connection.cursor().execute('CREATE TABLE {} (a INTEGER)'.format(table_name)) connection.close() options = make_options(autocommit=True) connection = connect(dsn, turbodbc_options=options, **get_credentials(configuration)) connection.autocommit = False # <--- connection.cursor().execute('INSERT INTO {} VALUES (?)'.format(table_name), [42]) connection.close() # table is there, but data was not persisted connection = connect(dsn, **get_credentials(configuration)) cursor = connection.cursor() cursor.execute('SELECT * FROM {}'.format(table_name)) results = cursor.fetchall() assert results == [] cursor.execute('DROP TABLE {}'.format(table_name)) connection.commit()
def test_options_without_parameters(): options = make_options() # one of the default parameters tested in the C++ part assert options.parameter_sets_to_buffer == 1000
def load(self, df, table): """Load pandas.DataFrame to Teradata table. Parameters: df (pandas.DataFrame): DataFrame for loading table (str): table name """ con = turbodbc.connect( dsn=self.dsn, turbodbc_options=turbodbc.make_options(prefer_unicode=True, autocommit=True) ) cur = con.cursor() try: drop_sql = 'drop table {}.{}'.format(self.database, table) cur.execute(drop_sql) except turbodbc.DatabaseError: pass teradata_types = { 'int8': 'byteint', 'int16': 'smallint', 'int32': 'integer', 'int64': 'bigint', 'float16': 'float', 'float32': 'float', 'float64': 'double', 'object': 'varchar', 'bool': 'byteint' } query = 'create multiset table {}.{} ('.format(self.database, table) for idx, dtype in zip(df.dtypes.index, df.dtypes.values): dtype = str(dtype) td_type = teradata_types[dtype] + \ ('' if dtype != 'object' else '({})'.format(df[idx].str.len().max())) query += '{} {}, '.format(idx, td_type) query = query[:-2] + ') no primary index' cur.execute(query) con.close() if not os.path.exists(self.temp_folder): os.makedirs(self.temp_folder) df.to_csv('{}/df.csv'.format(self.temp_folder), sep=',', decimal='.', index=False) script = \ 'set session charset "UTF8";\n' + \ f'logon {self.teradata_ip}/{self.login}, {self.password};\n' + \ f'database {self.database};\n' + \ f'begin loading {self.table}\n' + \ f'errorfiles {self.fastload_err1}, {self.fastload_err2}\n' + \ 'checkpoint 1000000;\n' + \ 'set record vartext "," nostop;\n' + \ 'record 2;\n' + \ 'define\n' + \ ',\n'.join( ['{} (varchar({}))'.format(col, df[col].astype(str).str.len().max()) for col in df.columns] ) + '\n' + \ 'file = {};\n'.format(os.getcwd() + '\\{}\\df.csv'.format(self.temp_folder)) + \ f'insert into {table}\n' + \ 'values(\n' + \ ',\n'.join([':' + col for col in df.columns]) + \ ');\n' + \ 'end loading;\n' + \ 'logoff;\n' + \ 'quit;' script_file = open(f'{self.temp_folder}/fastload_script.txt', "w+") script_file.write(script) script_file.close() command = f'cd {self.fastload_path} | fastload.exe < ' + \ f'{os.getcwd()}\\{self.temp_folder}\\fastload_script.txt' flg = os.system(command) shutil.rmtree(self.temp_folder)
def test_options_without_parameters(): options = make_options() # one of the default parameters tested in the C++ part assert options.parameter_sets_to_buffer == 1000
from turbodbc import connect, make_options, Megabytes import pandas as pd import pyarrow as pa import pyarrow.parquet as pq options = make_options(read_buffer_size=Megabytes(20), autocommit=True) conn = connect(dsn='drill', turbodbc_options=options) sql = "select * from dfs.`/test.csv`" cursor = conn.cursor() cursor.execute(sql) table = cursor.fetchallarrow() data = table.to_pandas() print(data)
df_workcases = pd.read_sql_query( """select left(hubs.hub_name,2) as Country,lwc.id as LWCid, u.name as 'Action_by',c.id as CustomerID, lp.id as LoanPortfolioID, lwc.closed_at, lwc.activity_category_id from loan_workout_cases lwc inner join customers c on c.id=lwc.customer_id inner join loan_portfolios lp on lp.customer_id=c.id inner join payment_accounts pa on pa.loan_portfolio_id=lp.id inner join payments p on p.payment_account_id=pa.id inner join users u on lwc.user_id = u.id inner join hubs on c.hub_id=hubs.id where lwc.could_reach_customer=1 and lwc.activity_category_id IN ("5252","5253") and lwc.closed_at >= "2018-12-10" group by lwc.id limit 10;""", con=conSolar) options = make_options(parameter_sets_to_buffer=1000) conn = connect(driver='{ODBC Driver 17 for SQL Server}', server='mbslbiserver.database.windows.net', database='mbsldwh_dev', turbodbc_options=options) test_query = '''DELETE FROM EXTR_WORKCASES INSERT INTO Extr_workcases ([Country],[LWCid] ,[Action_by] ,[CustomerID] ,[LoanPortfolioID] ,[closed_at] ,[activity_category_id]) VALUES (?,?,?,?,?,?,?) ''' cursor.executemanycolumns(test_query, [ df_workcases['Country'].values, df_workcases['LWCid'].values, df_workcases['Action_by'].values, df_workcases['CustomerID'].values, df_workcases['activity_category_id'].values,
def create_connect_args(self, url): """Create the connect args for Turbodbc. Some code adapted from the Pyodbc connector in the SQLAlchemy codebase. """ options = url.translate_connect_args(username='******') query = url.query options.update(query) connect_args = {} # first get the Turbodbc specific options turbodbc_options = {} for param in ('read_buffer_size', 'parameter_sets_to_buffer', 'use_async_io', 'autocommit', 'large_decimals_as_64_bit_types'): if param in options: raw = options.pop(param) if param in ('use_async_io', 'autocommit', 'large_decimals_as_64_bit_types'): value = util.asbool(raw) else: value = util.asint(raw) turbodbc_options[param] = value # we always need to set prefer_unicode=True for MSSQL + Turbodbc connect_args['turbodbc_options'] = make_options(prefer_unicode=True, **turbodbc_options) for param in ('ansi', 'unicode_results'): if param in options: connect_args[param] = util.asbool(options.pop(param)) dsn_connection = 'dsn' in options or \ ('host' in options and 'database' not in options) if dsn_connection: dsn = [options.pop('host', '') or options.pop('dsn', '')] else: dsn = [] port = '' if 'port' in options and 'port' not in query: port = int(options.pop('port')) driver = options.pop('driver', None) if driver is None: util.warn( "No driver name specified; " "this is expected by ODBC when using " "DSN-less connections") else: connect_args['driver'] = driver connect_args.update( server=(options.pop('host', '')), port=port, database=options.pop('database', '') ) user = options.pop('user', None) if user: connect_args.update( uid=user, pwd=options.pop('password', '') ) else: connect_args['trusted_connection'] = 'Yes' return [dsn, connect_args]