예제 #1
0
def vertica_export_sql(
    db_engine: Engine,
    schema: str,
    s3_url: str,
    table: str,
    delimiter: str,
    record_terminator: str,
    to_charset: str,
    # keep these things halfway digestible in memory
    chunksize: int = 5 * GIG_IN_BYTES
) -> str:
    # https://my.vertica.com/docs/8.1.x/HTML/index.htm#Authoring/SQLReferenceManual/Functions/VerticaFunctions/s3export.htm
    template = """
        SELECT S3EXPORT( * USING PARAMETERS {params})
        OVER(PARTITION BEST) FROM {schema_and_table}
    """

    def quote(value: str) -> str:
        return quote_value(db_engine, value)

    params_data = {
        "url": quote(s3_url + 'records.csv'),
        "chunksize": chunksize,
        "to_charset": quote(to_charset),
        "delimiter": quote(delimiter),
        "record_terminator": quote(record_terminator),
    }

    params = ", ".join(
        [f"{key}={value}" for key, value in params_data.items()])
    schema_and_table = quote_schema_and_table(db_engine, schema, table)
    sql = template.format(params=params, schema_and_table=schema_and_table)

    return sql
예제 #2
0
    def prep_bucket(self) -> None:
        # These are modes we may want to try to make work in the
        # future, but Redshift doesn't support them directly today so
        # we'd need to do some lower level things (e.g., truncating
        # the actual Parquet files)
        if self.existing_table_handling in [
                ExistingTableHandling.TRUNCATE_AND_OVERWRITE,
                ExistingTableHandling.DROP_AND_RECREATE,
                ExistingTableHandling.DELETE_AND_OVERWRITE
        ]:
            if self.existing_table_handling == ExistingTableHandling.DELETE_AND_OVERWRITE:
                logger.warning(
                    'Redshift Spectrum does not support transactional delete.')
            if self.existing_table_handling == ExistingTableHandling.DROP_AND_RECREATE:
                schema_and_table: str = quote_schema_and_table(
                    self.db, self.schema_name, self.table_name)
                logger.info(f"Dropping external table {schema_and_table}...")
                with self.db.connect() as cursor:
                    # See below note about fix from Spectrify
                    cursor.execution_options(isolation_level='AUTOCOMMIT')
                    cursor.execute(f"DROP TABLE IF EXISTS {schema_and_table}")

            logger.info(f"Deleting files in {self.output_loc}...")
            self.output_loc.purge_directory()
        elif self.existing_table_handling == ExistingTableHandling.APPEND:
            raise NotImplementedError('APPEND mode not yet supported')
        else:
            raise NotImplementedError(
                f'Teach me how to handle {self.existing_table_handling}')
예제 #3
0
 def test_quote_schema_and_table(self):
     mock_engine = Mock()
     quotable_table = Mock()
     quotable_schema = Mock()
     mock_preparer = mock_engine.dialect.preparer.return_value
     mock_preparer.quote.return_value = '"foo"'
     quoted = quoting.quote_schema_and_table(mock_engine,
                                             quotable_schema,
                                             quotable_table)
     self.assertEqual(mock_preparer.quote.return_value + "." +
                      mock_preparer.quote.return_value,
                      quoted)
     mock_preparer.quote.assert_has_calls([call(quotable_schema),
                                           call(quotable_table)])
 def set_grant_permissions_for_users(self, schema_name: str, table: str,
                                     users: Dict[str, List[str]],
                                     db: Union[sqlalchemy.engine.Engine,
                                               sqlalchemy.engine.Connection]) -> None:
     schema_and_table: str = quote_schema_and_table(self.db.engine, schema_name, table)
     for perm_type in users:
         user_list = users[perm_type]
         for user in user_list:
             user_name: str = quote_user_name(self.db.engine, user)
             if not perm_type.isalpha():
                 raise TypeError("Please make sure your permission types"
                                 " are an acceptable value.")
             perms_sql = f'GRANT {perm_type} ON TABLE {schema_and_table} TO {user_name}'
             db.execute(perms_sql)
예제 #5
0
 def set_grant_permissions_for_groups(self, schema_name: str, table: str,
                                      groups: Dict[str, List[str]],
                                      db: Union[sqlalchemy.engine.Engine,
                                                sqlalchemy.engine.Connection]) -> None:
     schema_and_table = quote_schema_and_table(self.db.engine, schema_name, table)
     for perm_type in groups:
         groups_list = groups[perm_type]
         for group in groups_list:
             group_name: str = quote_group_name(self.db.engine, group)
             if not perm_type.isalpha():
                 raise TypeError("Please make sure your permission types"
                                 " are an acceptable value.")
             perms_sql = f'GRANT {perm_type} ON TABLE {schema_and_table} TO GROUP {group_name}'
             db.execute(perms_sql)
     return None
예제 #6
0
 def add_permissions(self, conn: Connection, driver: DBDriver) -> None:
     schema_and_table: str = quote_schema_and_table(driver.db,
                                                    self.tbl.schema_name,
                                                    self.tbl.table_name)
     if self.tbl.add_group_perms_for is not None:
         logger.info(f"Adding permissions for {schema_and_table} "
                     f"to group {self.tbl.add_group_perms_for}")
         driver.set_grant_permissions_for_groups(
             self.tbl.schema_name, self.tbl.table_name,
             self.tbl.add_group_perms_for, conn)
     if self.tbl.add_user_perms_for is not None:
         logger.info(f"Adding permissions for {schema_and_table} "
                     f"to {self.tbl.add_user_perms_for}")
         driver.\
             set_grant_permissions_for_users(self.tbl.schema_name,
                                             self.tbl.table_name,
                                             self.tbl.add_user_perms_for,
                                             conn)
    def move_and_verify(self, source_dbname: str, target_dbname: str) -> None:
        session = Session()
        records = session.records
        targets = records.targets
        sources = records.sources
        source_engine = session.get_db_engine(source_dbname)
        target_engine = session.get_db_engine(target_dbname)
        source_schema_name = schema_name(source_dbname)
        target_schema_name = schema_name(target_dbname)
        source_table_name = f'itest_source_{BUILD_NUM}_{CURRENT_EPOCH}'
        records_database_fixture = RecordsDatabaseFixture(
            source_engine, source_schema_name, source_table_name)
        records_database_fixture.tear_down()
        records_database_fixture.bring_up()

        existing = ExistingTableHandling.DROP_AND_RECREATE
        source = sources.table(schema_name=source_schema_name,
                               table_name=source_table_name,
                               db_engine=source_engine)
        target = targets.table(schema_name=target_schema_name,
                               table_name=TARGET_TABLE_NAME,
                               db_engine=target_engine,
                               existing_table_handling=existing)
        out = records.move(source, target)
        # redshift doesn't give reliable info on load results, so this
        # will be None or 1
        self.assertNotEqual(0, out.move_count)
        validator = RecordsTableValidator(target_engine,
                                          source_db_engine=source_engine)
        validator.validate(schema_name=target_schema_name,
                           table_name=TARGET_TABLE_NAME)

        quoted_target = quote_schema_and_table(target_engine,
                                               target_schema_name,
                                               TARGET_TABLE_NAME)
        sql = f"DROP TABLE {quoted_target}"
        target_engine.execute(sql)

        records_database_fixture.tear_down()
예제 #8
0
    def prep_table_for_load(self, schema_sql: str,
                            existing_table_handling: ExistingTableHandling,
                            driver: DBDriver) -> None:
        logger.info("Looking for existing table..")
        db = driver.db

        if driver.has_table(table=self.tbl.table_name,
                            schema=self.tbl.schema_name):
            logger.info("Table already exists.")
            how_to_prep = existing_table_handling
            schema_and_table: str = quote_schema_and_table(
                db, self.tbl.schema_name, self.tbl.table_name)
            if (how_to_prep == ExistingTableHandling.TRUNCATE_AND_OVERWRITE):
                logger.info("Truncating...")
                db.execute(f"TRUNCATE TABLE {schema_and_table}")
                logger.info("Truncated.")
            elif (how_to_prep == ExistingTableHandling.DELETE_AND_OVERWRITE):
                logger.info("Deleting rows...")
                db.execute(f"DELETE FROM {schema_and_table} WHERE true")
                logger.info("Deleted")
            elif (how_to_prep == ExistingTableHandling.DROP_AND_RECREATE):
                with db.engine.connect() as conn:
                    with conn.begin():
                        logger.info("Dropping and recreating...")
                        drop_table_sql = f"DROP TABLE {schema_and_table}"
                        conn.execute(drop_table_sql)
                        logger.info(f"Just ran {drop_table_sql}")
                        self.create_table(schema_sql, conn, driver)
            elif (how_to_prep == ExistingTableHandling.APPEND):
                logger.info("Appending rows...")
            else:
                raise ValueError(f"Don't know how to handle {how_to_prep}")
        else:
            with db.engine.connect() as conn:
                with conn.begin():
                    self.create_table(schema_sql, conn, driver)
예제 #9
0
def vertica_import_sql(schema: str,
                       table: str,
                       db_engine,
                       gzip: bool,
                       delimiter: str,
                       trailing_nullcols: bool,
                       record_terminator: str,
                       null_as: str,
                       rejectmax: int,
                       enforcelength: bool,
                       error_tolerance: bool,
                       abort_on_error: bool,
                       load_method: str,
                       no_commit: bool,
                       escape_as: Optional[str] = None,
                       enclosed_by: Optional[str] = None,
                       stream_name: Optional[str] = None,
                       skip: int = 0,
                       rejected_data_table: Optional[str] = None,
                       rejected_data_schema: Optional[str] = None) -> str:

    # https://my.vertica.com/docs/8.1.x/HTML/index.htm#Authoring/SQLReferenceManual/Statements/COPY/COPY.htm
    # https://my.vertica.com/docs/8.1.x/HTML/index.htm#Authoring/SQLReferenceManual/Statements/COPY/Parameters.htm
    # https://my.vertica.com/docs/8.1.x/HTML/index.htm#Authoring/AdministratorsGuide/BulkLoadCOPY/SpecifyingCOPYFROMOptions.htm
    import_sql_template = """
            COPY {schema_and_table}
            FROM STDIN
            {gzip}
            {delimiter_as}
            {trailing_nullcols}
            {null_as}
            {escape}
            {enclosed_by}
            {record_terminator}
            {skip}
            {skip_bytes}
            {trim}
            {rejectmax}
            {rejected_data}
            {exceptions}
            {enforcelength}
            {error_tolerance}
            {abort_on_error}
            {storage}
            {stream_name}
            {no_commit}
            ;
    """

    def quote(value: str) -> str:
        return quote_value(db_engine, value)

    if rejected_data_table is not None and rejected_data_schema is not None:
        rejected_target = quote_schema_and_table(db_engine,
                                                 rejected_data_schema,
                                                 rejected_data_table)
        rejected_data = f"REJECTED DATA AS TABLE {rejected_target}"
    else:
        rejected_data = ''

    import_sql = import_sql_template.format(
        schema_and_table=quote_schema_and_table(db_engine, schema, table),
        # https://forum.vertica.com/discussion/238556/reading-gzip-files-from-s3-into-vertica
        gzip='GZIP' if gzip else '',
        # The capital E in the next line specifies a string literal
        # that will be backslash-interpreted; the call to python's
        # "repr()" built-in will encode the string with backslashes.
        delimiter_as=f"DELIMITER AS E{repr(delimiter)}",
        trailing_nullcols="TRAILING NULLCOLS" if trailing_nullcols else '',
        null_as=f"NULL AS {quote(null_as)}" if null_as is not None else '',
        escape=f"ESCAPE AS E{repr(escape_as)}"
        if escape_as is not None else "NO ESCAPE",
        enclosed_by=f"ENCLOSED BY E{repr(enclosed_by)}"
        if enclosed_by is not None else '',
        record_terminator=f"RECORD TERMINATOR E{repr(record_terminator)}",
        skip=f"SKIP {skip}",
        skip_bytes='',  # only for fixed-width
        trim="",  # only for fixed-width
        rejectmax=f"REJECTMAX {rejectmax}" if rejectmax is not None else '',
        rejected_data=rejected_data,
        exceptions='',  # not yet supported
        enforcelength='ENFORCELENGTH' if enforcelength else '',
        error_tolerance='ERROR TOLERANCE' if error_tolerance else '',
        abort_on_error='ABORT ON ERROR' if abort_on_error else '',
        storage=f"STORAGE {load_method}",
        stream_name=f"STREAM NAME {quote(stream_name)}"
        if stream_name is not None else '',
        no_commit="NO COMMIT" if no_commit else '',
    )

    return import_sql
예제 #10
0
 def quote_schema_and_table(self, schema, table):
     return quote_schema_and_table(self.engine, schema, table)