def split_sql_for_tqdm(self, sql): """ reads in an input sql script with comments representing progress updates. splits statements into a list. expects comments intended for progress reporting to be terminated by a semicolon. also expects sql statements to NOT begin with comments (i.e. don't lead off with a comment or the whole statement will be interpreted as a progress update) args sql -- the raw sql text returns tqdm object composed of a list of dictionaries where each entry has two values, the query and a progress update """ statements = [s for s in sql.split(";") if len(s.strip()) > 1] parsed = [] running_entry = {"update": None, "query": " "} for statement in statements: if statement.strip()[:2] == '--': running_entry["update"] = statement.strip()[2:] else: running_entry["query"] = statement parsed.append(dict(running_entry)) # running_entry = { # "update": " ", # "query": None # } return tqdm(parsed)
def execute_ddl_script(sql): """Handle Redshift DDL scripts, which are exceptional in a number of ways. * CREATE EXTERNAL SCHEMA must be executed separately from any later references to that schema. The simplest way to enforce that requirement is to split the multi-statement SQL string by semicolon, and execute each statement in turn. * DROP EXTERNAL TABLE and CREATE EXTERNAL TABLE will fail with a 'cannot run inside a transaction block' message unless autocommit is enabled. WARNING: This will break horribly if a semicolon terminated statement is inside a block quote. """ statements = sql.split(';') # Remove any trailing debris after the last SQL statement. del statements[-1] with _get_cursor() as cursor: if not cursor: app.logger.error( 'Failed to get cursor to execute DDL script; aborting.') return False for index, statement in enumerate(statements): app.logger.info( f'Executing DDL script {index + 1} of {len(statements)}') result = _execute(statement, operation='write', cursor=cursor) if not result: app.logger.error( f'Aborting DDL script. Error executing statement: {statement}' ) return False return True