def test_generate_sql_missing_lookback_config(self):
     """Validate the where clause is filtering properly, when full table and look back window"""
     with pytest.raises(Exception):
         catalog = test_utils.discover_catalog(self.snowflake_conn, {
             'tables': f'{DB_NAME}.{SCHEMA_NAME}.EMPTY_TABLE_1',
             'metadata': {
                 f'{DB_NAME}.{SCHEMA_NAME}.EMPTY_TABLE_1': {
                     'replication-method': 'FULL_TABLE',
                     'rolling_lookback': {'MISSING_REQUIRED_CONFIG'}
                 }
             }
         },
                                               select_all=True)
         catalog_entry = catalog.streams[0]
         columns = list(catalog_entry.schema.properties.keys())
         common.generate_sql_query(catalog_entry, columns)
    def test_generate_sql_full_sync(self):
        """Validate the where clause is filtering properly"""
        catalog = test_utils.discover_catalog(
            self.snowflake_conn, {'tables': f'{SCHEMA_NAME}.empty_table_1'},
            select_all=True)

        catalog_entry = catalog.streams[0]
        columns = list(catalog_entry.schema.properties.keys())
        select_sql = common.generate_sql_query(catalog_entry, columns)
        assert select_sql == f'SELECT "C_PK","C_INT" FROM "{DB_NAME}"."{SCHEMA_NAME}"."EMPTY_TABLE_1"'
 def test_generate_sql_incremental_sync_without_bookmark(self):
     """Validate the where clause is filtering properly, when incremental and no bookmark yet"""
     catalog = test_utils.discover_catalog(self.snowflake_conn, {
         'tables': f'{DB_NAME}.{SCHEMA_NAME}.EMPTY_TABLE_1',
         'metadata': {
             f'{DB_NAME}.{SCHEMA_NAME}.EMPTY_TABLE_1': {
                 'replication-method': 'INCREMENTAL',
                 'replication-key': 'REP_KEY'
             }
         }
     },
                                           select_all=True)
     catalog_entry = catalog.streams[0]
     columns = list(catalog_entry.schema.properties.keys())
     select_sql = common.generate_sql_query(catalog_entry, columns)
     assert select_sql == f'SELECT "C_PK","C_INT" FROM "{DB_NAME}"."{SCHEMA_NAME}"."EMPTY_TABLE_1" ORDER BY "REP_KEY" ASC'
Exemple #4
0
def sync_table(snowflake_conn, catalog_entry, state, columns):
    """Sync table incrementally"""
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id,
                                   state)

    catalog_metadata = metadata.to_map(catalog_entry.metadata)
    stream_metadata = catalog_metadata.get((), {})

    replication_key_metadata = stream_metadata.get('replication-key')
    replication_key_state = singer.get_bookmark(state,
                                                catalog_entry.tap_stream_id,
                                                'replication_key')

    replication_key_value = None

    if replication_key_metadata == replication_key_state:
        replication_key_value = singer.get_bookmark(
            state, catalog_entry.tap_stream_id, 'replication_key_value')
    else:
        state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                      'replication_key',
                                      replication_key_metadata)
        state = singer.clear_bookmark(state, catalog_entry.tap_stream_id,
                                      'replication_key_value')

    stream_version = common.get_stream_version(catalog_entry.tap_stream_id,
                                               state)
    state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                  'version', stream_version)

    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream, version=stream_version)

    singer.write_message(activate_version_message)

    with snowflake_conn.connect_with_backoff() as open_conn:
        with open_conn.cursor() as cur:
            if replication_key_value is not None:
                if catalog_entry.schema.properties[
                        replication_key_metadata].format == 'date-time':
                    replication_key_value = pendulum.parse(
                        replication_key_value)
            select_sql = common.generate_sql_query(
                catalog_entry, columns, bookmark_value=replication_key_value)
            params = {}
            common.sync_query(cur, catalog_entry, state, select_sql, columns,
                              stream_version, params)
 def test_generate_sql_full_sync_lookback(self):
     """Validate the where clause is filtering properly, when full table and look back window"""
     catalog = test_utils.discover_catalog(self.snowflake_conn, {
         'tables': f'{DB_NAME}.{SCHEMA_NAME}.EMPTY_TABLE_1',
         'metadata': {
             f'{DB_NAME}.{SCHEMA_NAME}.EMPTY_TABLE_1': {
                 'replication-method': 'FULL_TABLE',
                 'rolling_lookback': {
                     'time_unit': 'day',
                     'time_amount': '7',
                     'time_column': 'c_datetime'
                 }
             }
         }
     },
                                           select_all=True)
     catalog_entry = catalog.streams[0]
     columns = list(catalog_entry.schema.properties.keys())
     select_sql = common.generate_sql_query(catalog_entry, columns)
     assert select_sql == f'SELECT "C_PK","C_INT" FROM "{DB_NAME}"."{SCHEMA_NAME}"."EMPTY_TABLE_1" WHERE "c_datetime" >= DATEADD(day, -7, SYSTIMESTAMP())'
Exemple #6
0
def sync_table(snowflake_conn, catalog_entry, state, columns, stream_version):
    """Sync table with FULL_TABLE"""
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id,
                                   state)

    bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {})
    version_exists = True if 'version' in bookmark else False

    initial_full_table_complete = singer.get_bookmark(
        state, catalog_entry.tap_stream_id, 'initial_full_table_complete')

    state_version = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                        'version')

    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream, version=stream_version)

    # For the initial replication, emit an ACTIVATE_VERSION message
    # at the beginning so the records show up right away.
    if not initial_full_table_complete and not (version_exists
                                                and state_version is None):
        singer.write_message(activate_version_message)

    with snowflake_conn.connect_with_backoff() as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_sql_query(catalog_entry, columns)
            params = {}

            common.sync_query(cur, catalog_entry, state, select_sql, columns,
                              stream_version, params)

    # clear max pk value and last pk fetched upon successful sync
    singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values')
    singer.clear_bookmark(state, catalog_entry.tap_stream_id,
                          'last_pk_fetched')

    singer.write_message(activate_version_message)