def vertica_export_sql( db_engine: Engine, schema: str, s3_url: str, table: str, delimiter: str, record_terminator: str, to_charset: str, # keep these things halfway digestible in memory chunksize: int = 5 * GIG_IN_BYTES ) -> str: # https://my.vertica.com/docs/8.1.x/HTML/index.htm#Authoring/SQLReferenceManual/Functions/VerticaFunctions/s3export.htm template = """ SELECT S3EXPORT( * USING PARAMETERS {params}) OVER(PARTITION BEST) FROM {schema_and_table} """ def quote(value: str) -> str: return quote_value(db_engine, value) params_data = { "url": quote(s3_url + 'records.csv'), "chunksize": chunksize, "to_charset": quote(to_charset), "delimiter": quote(delimiter), "record_terminator": quote(record_terminator), } params = ", ".join( [f"{key}={value}" for key, value in params_data.items()]) schema_and_table = quote_schema_and_table(db_engine, schema, table) sql = template.format(params=params, schema_and_table=schema_and_table) return sql
def prep_bucket(self) -> None: # These are modes we may want to try to make work in the # future, but Redshift doesn't support them directly today so # we'd need to do some lower level things (e.g., truncating # the actual Parquet files) if self.existing_table_handling in [ ExistingTableHandling.TRUNCATE_AND_OVERWRITE, ExistingTableHandling.DROP_AND_RECREATE, ExistingTableHandling.DELETE_AND_OVERWRITE ]: if self.existing_table_handling == ExistingTableHandling.DELETE_AND_OVERWRITE: logger.warning( 'Redshift Spectrum does not support transactional delete.') if self.existing_table_handling == ExistingTableHandling.DROP_AND_RECREATE: schema_and_table: str = quote_schema_and_table( self.db, self.schema_name, self.table_name) logger.info(f"Dropping external table {schema_and_table}...") with self.db.connect() as cursor: # See below note about fix from Spectrify cursor.execution_options(isolation_level='AUTOCOMMIT') cursor.execute(f"DROP TABLE IF EXISTS {schema_and_table}") logger.info(f"Deleting files in {self.output_loc}...") self.output_loc.purge_directory() elif self.existing_table_handling == ExistingTableHandling.APPEND: raise NotImplementedError('APPEND mode not yet supported') else: raise NotImplementedError( f'Teach me how to handle {self.existing_table_handling}')
def test_quote_schema_and_table(self): mock_engine = Mock() quotable_table = Mock() quotable_schema = Mock() mock_preparer = mock_engine.dialect.preparer.return_value mock_preparer.quote.return_value = '"foo"' quoted = quoting.quote_schema_and_table(mock_engine, quotable_schema, quotable_table) self.assertEqual(mock_preparer.quote.return_value + "." + mock_preparer.quote.return_value, quoted) mock_preparer.quote.assert_has_calls([call(quotable_schema), call(quotable_table)])
def set_grant_permissions_for_users(self, schema_name: str, table: str, users: Dict[str, List[str]], db: Union[sqlalchemy.engine.Engine, sqlalchemy.engine.Connection]) -> None: schema_and_table: str = quote_schema_and_table(self.db.engine, schema_name, table) for perm_type in users: user_list = users[perm_type] for user in user_list: user_name: str = quote_user_name(self.db.engine, user) if not perm_type.isalpha(): raise TypeError("Please make sure your permission types" " are an acceptable value.") perms_sql = f'GRANT {perm_type} ON TABLE {schema_and_table} TO {user_name}' db.execute(perms_sql)
def set_grant_permissions_for_groups(self, schema_name: str, table: str, groups: Dict[str, List[str]], db: Union[sqlalchemy.engine.Engine, sqlalchemy.engine.Connection]) -> None: schema_and_table = quote_schema_and_table(self.db.engine, schema_name, table) for perm_type in groups: groups_list = groups[perm_type] for group in groups_list: group_name: str = quote_group_name(self.db.engine, group) if not perm_type.isalpha(): raise TypeError("Please make sure your permission types" " are an acceptable value.") perms_sql = f'GRANT {perm_type} ON TABLE {schema_and_table} TO GROUP {group_name}' db.execute(perms_sql) return None
def add_permissions(self, conn: Connection, driver: DBDriver) -> None: schema_and_table: str = quote_schema_and_table(driver.db, self.tbl.schema_name, self.tbl.table_name) if self.tbl.add_group_perms_for is not None: logger.info(f"Adding permissions for {schema_and_table} " f"to group {self.tbl.add_group_perms_for}") driver.set_grant_permissions_for_groups( self.tbl.schema_name, self.tbl.table_name, self.tbl.add_group_perms_for, conn) if self.tbl.add_user_perms_for is not None: logger.info(f"Adding permissions for {schema_and_table} " f"to {self.tbl.add_user_perms_for}") driver.\ set_grant_permissions_for_users(self.tbl.schema_name, self.tbl.table_name, self.tbl.add_user_perms_for, conn)
def move_and_verify(self, source_dbname: str, target_dbname: str) -> None: session = Session() records = session.records targets = records.targets sources = records.sources source_engine = session.get_db_engine(source_dbname) target_engine = session.get_db_engine(target_dbname) source_schema_name = schema_name(source_dbname) target_schema_name = schema_name(target_dbname) source_table_name = f'itest_source_{BUILD_NUM}_{CURRENT_EPOCH}' records_database_fixture = RecordsDatabaseFixture( source_engine, source_schema_name, source_table_name) records_database_fixture.tear_down() records_database_fixture.bring_up() existing = ExistingTableHandling.DROP_AND_RECREATE source = sources.table(schema_name=source_schema_name, table_name=source_table_name, db_engine=source_engine) target = targets.table(schema_name=target_schema_name, table_name=TARGET_TABLE_NAME, db_engine=target_engine, existing_table_handling=existing) out = records.move(source, target) # redshift doesn't give reliable info on load results, so this # will be None or 1 self.assertNotEqual(0, out.move_count) validator = RecordsTableValidator(target_engine, source_db_engine=source_engine) validator.validate(schema_name=target_schema_name, table_name=TARGET_TABLE_NAME) quoted_target = quote_schema_and_table(target_engine, target_schema_name, TARGET_TABLE_NAME) sql = f"DROP TABLE {quoted_target}" target_engine.execute(sql) records_database_fixture.tear_down()
def prep_table_for_load(self, schema_sql: str, existing_table_handling: ExistingTableHandling, driver: DBDriver) -> None: logger.info("Looking for existing table..") db = driver.db if driver.has_table(table=self.tbl.table_name, schema=self.tbl.schema_name): logger.info("Table already exists.") how_to_prep = existing_table_handling schema_and_table: str = quote_schema_and_table( db, self.tbl.schema_name, self.tbl.table_name) if (how_to_prep == ExistingTableHandling.TRUNCATE_AND_OVERWRITE): logger.info("Truncating...") db.execute(f"TRUNCATE TABLE {schema_and_table}") logger.info("Truncated.") elif (how_to_prep == ExistingTableHandling.DELETE_AND_OVERWRITE): logger.info("Deleting rows...") db.execute(f"DELETE FROM {schema_and_table} WHERE true") logger.info("Deleted") elif (how_to_prep == ExistingTableHandling.DROP_AND_RECREATE): with db.engine.connect() as conn: with conn.begin(): logger.info("Dropping and recreating...") drop_table_sql = f"DROP TABLE {schema_and_table}" conn.execute(drop_table_sql) logger.info(f"Just ran {drop_table_sql}") self.create_table(schema_sql, conn, driver) elif (how_to_prep == ExistingTableHandling.APPEND): logger.info("Appending rows...") else: raise ValueError(f"Don't know how to handle {how_to_prep}") else: with db.engine.connect() as conn: with conn.begin(): self.create_table(schema_sql, conn, driver)
def vertica_import_sql(schema: str, table: str, db_engine, gzip: bool, delimiter: str, trailing_nullcols: bool, record_terminator: str, null_as: str, rejectmax: int, enforcelength: bool, error_tolerance: bool, abort_on_error: bool, load_method: str, no_commit: bool, escape_as: Optional[str] = None, enclosed_by: Optional[str] = None, stream_name: Optional[str] = None, skip: int = 0, rejected_data_table: Optional[str] = None, rejected_data_schema: Optional[str] = None) -> str: # https://my.vertica.com/docs/8.1.x/HTML/index.htm#Authoring/SQLReferenceManual/Statements/COPY/COPY.htm # https://my.vertica.com/docs/8.1.x/HTML/index.htm#Authoring/SQLReferenceManual/Statements/COPY/Parameters.htm # https://my.vertica.com/docs/8.1.x/HTML/index.htm#Authoring/AdministratorsGuide/BulkLoadCOPY/SpecifyingCOPYFROMOptions.htm import_sql_template = """ COPY {schema_and_table} FROM STDIN {gzip} {delimiter_as} {trailing_nullcols} {null_as} {escape} {enclosed_by} {record_terminator} {skip} {skip_bytes} {trim} {rejectmax} {rejected_data} {exceptions} {enforcelength} {error_tolerance} {abort_on_error} {storage} {stream_name} {no_commit} ; """ def quote(value: str) -> str: return quote_value(db_engine, value) if rejected_data_table is not None and rejected_data_schema is not None: rejected_target = quote_schema_and_table(db_engine, rejected_data_schema, rejected_data_table) rejected_data = f"REJECTED DATA AS TABLE {rejected_target}" else: rejected_data = '' import_sql = import_sql_template.format( schema_and_table=quote_schema_and_table(db_engine, schema, table), # https://forum.vertica.com/discussion/238556/reading-gzip-files-from-s3-into-vertica gzip='GZIP' if gzip else '', # The capital E in the next line specifies a string literal # that will be backslash-interpreted; the call to python's # "repr()" built-in will encode the string with backslashes. delimiter_as=f"DELIMITER AS E{repr(delimiter)}", trailing_nullcols="TRAILING NULLCOLS" if trailing_nullcols else '', null_as=f"NULL AS {quote(null_as)}" if null_as is not None else '', escape=f"ESCAPE AS E{repr(escape_as)}" if escape_as is not None else "NO ESCAPE", enclosed_by=f"ENCLOSED BY E{repr(enclosed_by)}" if enclosed_by is not None else '', record_terminator=f"RECORD TERMINATOR E{repr(record_terminator)}", skip=f"SKIP {skip}", skip_bytes='', # only for fixed-width trim="", # only for fixed-width rejectmax=f"REJECTMAX {rejectmax}" if rejectmax is not None else '', rejected_data=rejected_data, exceptions='', # not yet supported enforcelength='ENFORCELENGTH' if enforcelength else '', error_tolerance='ERROR TOLERANCE' if error_tolerance else '', abort_on_error='ABORT ON ERROR' if abort_on_error else '', storage=f"STORAGE {load_method}", stream_name=f"STREAM NAME {quote(stream_name)}" if stream_name is not None else '', no_commit="NO COMMIT" if no_commit else '', ) return import_sql
def quote_schema_and_table(self, schema, table): return quote_schema_and_table(self.engine, schema, table)