def current_sequence(self, table_name): # Note: Based on Siriusware proprietary CDC vs MS RowVersion CDC. command_name = 'current_sequence' sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) return self.cursor.fetchone()[0]
def drop_table_sql(self, schema_name, table_name): command_name = 'drop_table_if_exists' sql_template = self.sql('drop_table_if_exists') sql_command = expand(sql_template) sql_command = f'{sql_command}\n\n' self.log(command_name, sql_command) return sql_command
def bulk_insert_into_table(self, schema_name, table_name, table_schema, rows, extended_definitions=None): command_name = f'insert_into_table' # insert extended column definitions into schema if extended_definitions: table_schema.column_definitions(extended_definitions) column_names = ', '.join(quote(table_schema.columns.keys())) # print(f'column_names: {column_names}') column_placeholders = ', '.join([self.queryparm] * len(table_schema.columns)) autocommit = self.conn.autocommit self.conn.autocommit = False sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.fast_executemany = True row_count = self.cursor.executemany(sql_command, rows) self.cursor.commit() self.conn.autocommit = autocommit return row_count
def timestamp_logic(self, current_timestamp, last_timestamp=None): if not self.table.timestamp: # # TEMP SOLUTION: 2019-03-19 # if self.table.schema_name == 'dataset_1001': # # PostgreSQL: # timestamp_str = f'{current_timestamp:%Y-%m-%d %H:%M:%S}' # self.timestamp_value = f"to_timestamp('{timestamp_str}', 'YYYY-MM-DD hh24:mi:ss')::timestamp without time zone" # self.timestamp_where_condition = '' # else: # # SQL Server: {ts'{timestamp_str}'} # timestamp_str = f'{current_timestamp:%Y-%m-%d %H:%M:%S}' # self.timestamp_value = f"to_timestamp('{timestamp_str}', 'YYYY-MM-DD hh24:mi:ss')::timestamp without time zone" # self.timestamp_where_condition = '' # May/Jun enhancement self.timestamp_value = self.db_engine.timestamp_literal( current_timestamp) self.timestamp_where_condition = "" else: timestamp_columns = add_aliases(split(self.table.timestamp)) if len(timestamp_columns) == 1: timestamp_value = q(timestamp_columns[0]) else: # build timestamp column values as ("<column-1>"), ("<column-2>"), ("<column-n>") timestamp_values = ", ".join([ f"({q(column_name)})" for column_name in timestamp_columns ]) timestamp_value = ( f'(select max("v") from (values {timestamp_values}) as value("v"))' ) self.timestamp_value = timestamp_value self.timestamp_where_condition = expand( self.timestamp_where_template)
def timestamp_literal(self, timestamp_value): timestamp_str = f'{current_timestamp:%Y-%m-%d %H:%M:%S}' command_name = 'timestamp_literal' sql_template = self.sql(command_name) # we evaluation expression in Python vs via database engine return expand(sql_template)
def use_database_sql(self, database_name): command_name = 'use_database' sql_template = self.sql('use_database') sql_command = expand(sql_template) sql_command += '\n\n' self.log(command_name, sql_command) return sql_command
def merge(self, schema_name, nk): table_name = self.table.table_name match_condition = self.match_condition(nk) column_assignments = self.column_assignments() column_names = self.column_names() source_column_names = self.source_column_names() sql = expand(self.merge_template) return delete_blank_lines(sql.strip())
def drop_table(self, schema_name, table_name): command_name = 'drop_table' if self.does_table_exist(schema_name, table_name): autocommit = self.conn.autocommit self.conn.autocommit = True sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.execute(sql_command) self.conn.autocommit = autocommit
def create_schema(self, schema_name): command_name = 'create_schema' if not self.does_schema_exist(schema_name): autocommit = self.conn.autocommit self.conn.autocommit = True sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.execute(sql_command) self.conn.autocommit = autocommit
def create_database(self, database_name): command_name = 'create_database' if not self.does_database_exist(database_name): autocommit = self.conn.autocommit self.conn.autocommit = True sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.execute(sql_command) self.conn.autocommit = autocommit
def create_named_table(self, schema_name, table_name): command_name = f'create_named_table_{schema_name}_{table_name}' if not self.does_table_exist(schema_name, table_name): autocommit = self.conn.autocommit self.conn.autocommit = True sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.execute(sql_command) self.conn.autocommit = autocommit
def create_table_sql(self, schema_name, table_name, workbook): command_name = 'create_ref_table' sql_template = self.sql('create_ref_table') # noinspection PyUnusedLocal column_definitions = self._column_definition_sql(table_name, workbook) sql_command = expand(sql_template) sql_command = f'{sql_command}\n\n' self.log(command_name, sql_command) return sql_command
def execute(self, command_name, value=None): # noinspection PyUnusedLocal queryparm = self.queryparm sql_template = self.sql(command_name) sql_command = expand(sql_template) if value is None: cursor = self.cursor.execute(sql_command) else: cursor = self.cursor.execute(sql_command, value) self.log(command_name, sql_command) return cursor
def select(self, job_id, current_timestamp, last_timestamp): self.timestamp_logic(current_timestamp, last_timestamp) schema_name = self.table.schema_name table_name = self.table.table_name column_names = self.column_names() timestamp_value = self.timestamp_value join_clause = self.join_clause() where_clause = self.where_clause() order_clause = self.order_clause() sql = expand(self.select_template) return delete_blank_lines(sql.strip() + ";")
def generate_row_count_timestamp(self): self.timestamp_logic(self.table.timestamp, self.table.first_timestamp) # these are used in the expand method. Ignore warnings schema_name = self.table.schema_name table_name = self.table.table_name join_clause = self.join_clause() where_clause = self.where_clause() order_clause = self.order_clause() # column_names = self.column_names() sql = expand(self.select_template) return delete_blank_lines(sql.strip() + ';')
def select_min_max(self, schema_name, table_name, column_name): """Returns minimum and maximum values of column""" command_name = 'select_min_max' if not self.does_table_exist(schema_name, table_name): # print(f'Table does not exist: {schema_name}.{table_name}') return None else: sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.execute(sql_command) column_detail = self.cursor.fetchone() return column_detail
def select_row_count(self, schema_name, table_name): """Returns row count""" command_name = 'select_row_count' if not self.does_table_exist(schema_name, table_name): # print(f'Table does not exist: {schema_name}.{table_name}') return None else: sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.execute(sql_command) row_count = self.cursor.fetchone() return row_count
def select_null_count(self, schema_name, table_name, column_name): """Returns null count""" # column_name is used when the sql_template is expanded. ignore warning command_name = 'select_null_count' if not self.does_table_exist(schema_name, table_name): # print(f'Table does not exist: {schema_name}.{table_name}') return None else: sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.execute(sql_command) null_count = self.cursor.fetchone() return null_count.null_count
def insert_into_table(self, schema_name, table_name, **column_names_values): command_name = f'insert_into_table' column_names = ', '.join(quote(column_names_values.keys())) column_placeholders = ', '.join([self.queryparm] * len(column_names_values)) column_values = column_names_values.values() autocommit = self.conn.autocommit self.conn.autocommit = True sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.execute(sql_command, *column_values) self.conn.autocommit = autocommit
def assign_set(dirpath, path, tasks, should_skip): old_dir = os.getcwd() epath = expand(dirpath) os.chdir(epath) # there is not so much of .set files, so there won't be # any significant time penalty if we parse the configs # here every time if configs.configs.has_key('exclude')\ and configs.configs['exclude'].strip() != '': exclude = configs.configs['exclude'].split(',') bname = os.path.basename(path) for e in exclude: e = e.strip() if re.search(e, bname): print('Skiping {0} benchmarks'.format(bname)) os.chdir(old_dir) return False try: f = open(path, 'r') except OSError as e: err("Failed opening set of benchmarks ({0}): {1}".format( path, e.strerror)) num = len(tasks) assert num > 0 cat = path[:-4] for line in f: line = line.strip() if not line: continue n = 0 for it in glob.iglob(line): bench = ('benchmarks/c/{0}'.format(it), cat) if should_skip(bench): dbg('Skipping benchmark {0}'.format(it)) else: tasks[n % num].add(bench) n += 1 f.close() os.chdir(old_dir) return n != 0
def git_checkout(repo_dir, tag): old_dir = os.getcwd() epath = expand(repo_dir) if os.path.isdir(epath): dirpath = epath else: dirpath = os.path.dirname(epath) os.chdir(dirpath) ret = subprocess.call(['git', 'checkout', tag]) os.chdir(old_dir) return ret == 0
def insert_into_table_sql(self, schema_name, table_name, worksheet): command_name = 'insert_into_table' sql_template = self.sql('insert_into_table') # worksheet = workbook.active # worksheet = workbook['Data'] column_values = self._column_values_sql(schema_name, table_name, worksheet) sql_command = expand(sql_template) sql_command = f'{sql_command}\n\n' sql_command.replace('values,', 'values') # sql_command.replace(',\n insert', '\n insert') return sql_command
def drop_temp_table(self, table_name): command_name = 'drop_temp_table' # strip optional leading #'s from table name since our SQL template includes # FIX: This means we strip ##global_temp as well as #local_temp # noinspection PyUnusedLocal table_name = table_name.strip('#') autocommit = self.conn.autocommit self.conn.autocommit = True sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.execute(sql_command) self.conn.autocommit = autocommit
def select_columns_with_datatype(self, schema_name, table_name): """select_columns_with_datatype""" command_name = 'select_columns_with_datatype' if not self.does_table_exist(schema_name, table_name): # print(f'Table does not exist: {schema_name}.{table_name}') return None else: sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.execute(sql_command) columns_with_datatypes = self.cursor.fetchall() # Lower all column names in columns_with_datatypes # columns_with_datatypes = list(set(column.column.lower() for column in columns_with_datatypes)) return columns_with_datatypes
def select_nullable_columns(self, schema_name, table_name): """Returns nullable columns""" command_name = 'select_nullable_columns' if not self.does_table_exist(schema_name, table_name): # print(f'Table does not exist: {schema_name}.{table_name}') return None else: null_column_list = [] sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.execute(sql_command) null_columns = self.cursor.fetchall() for columns in null_columns: null_column_list.append(columns.column.lower()) return null_column_list
def get_pk(self, schema_name, table_name, pk_column_name, nk_column_name, **key_values): command_name = f'get_pk' pk_conditions = list() for key, value in key_values.items(): pk_conditions.append(f'{key}={value}') pk_conditions = ' and '.join(pk_conditions) autocommit = self.conn.autocommit self.conn.autocommit = True sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.execute(sql_command) self.conn.autocommit = autocommit
def timestamp_logic(self, current_timestamp, last_timestamp=None): timestamp_columns = add_aliases(split(self.table.timestamp)) if not timestamp_columns: self.timestamp_value = f"'{current_timestamp:%Y-%m-%d %H:%M:%S}'" self.timestamp_where_condition = '' else: if len(timestamp_columns) == 1: timestamp_value = q(timestamp_columns[0]) else: # build timestamp column values as ("created_at"), ("updated_at"), ("other_timestamp") timestamp_values = ', '.join([ f'({q(column_name)})' for column_name in timestamp_columns ]) timestamp_value = f'(select max("v") from (values {timestamp_values}) as value("v"))' self.timestamp_value = timestamp_value self.timestamp_where_condition = expand( self.timestamp_where_template)
def select_table_pk(self, schema_name, table_name): """Returns a comma delimited string of sorted pk column names or '' if no pk is defined.""" command_name = 'select_table_pk' if not self.does_table_exist(schema_name, table_name): # print(f'Table does not exist: {schema_name}.{table_name}') return None else: sql_template = self.sql(command_name) sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.execute(sql_command) rows = self.cursor.fetchall() if not rows: pk_columns = '' else: pk_columns = sorted([row[0] for row in rows]) pk_columns = ', '.join(pk_columns) return pk_columns
def get_benchmarks(files, tasks): items = files.split(',') skip_known_id = configs.configs['skip-known-benchmarks'] if skip_known_id != 'no': from database_proxy import DatabaseProxy dbproxy = DatabaseProxy() year_id = dbproxy.getYearID(configs.configs['year']) if year_id is None: err('Wrong year: {0}'.format(configs.configs['year'])) try: toolid = int(skip_known_id) except ValueError: err('Invalid tool id for skip-known-benchmarks') dbg('Will skip benchmarks from tool {0}'.format(toolid)) should_skip = lambda x: _should_skip_with_db(dbproxy, toolid, year_id, x) else: should_skip = lambda x: False for it in items: paths = glob.glob(expand(it)) if not paths: sys.stderr.write('Directory does not exist: {0}\n'.format(it)) return for path in paths: # get folder or .set file if os.path.isdir(path): path = '{0}/c'.format(path) assign_set_dir(path, tasks, should_skip) elif os.path.isfile(path): dirpath = os.path.dirname(path) basename = os.path.basename(path) assign_set(dirpath, basename, tasks, should_skip) # return number of found benchmarks num = 0 for t in tasks: num += t.getCount() return num
def capture_select(self, schema_name, table_name, column_names, last_timestamp=None, current_timestamp=None): command_name = f'capture_select' column_names = ', '.join(quote(column_names)) autocommit = self.conn.autocommit if self.platform == 'mssql': self.conn.autocommit = True sql_template = self.sql(command_name) # print(f'\ncapture_select.sql_template:\n{sql_template}\n') sql_command = expand(sql_template) self.log(command_name, sql_command) self.cursor.execute(sql_command) if self.platform == 'mssql': self.conn.autocommit = autocommit return self.cursor