def test_archive_load_files_log_based_replication(self, os_remove_mock,
                                                      dbSync_mock):
        self.config['tap_id'] = 'test_tap_id'
        self.config['archive_load_files'] = True

        with open(
                f'{os.path.dirname(__file__)}/resources/logical-streams.json',
                'r') as f:
            lines = f.readlines()

        instance = dbSync_mock.return_value
        instance.create_schema_if_not_exists.return_value = None
        instance.sync_table.return_value = None
        instance.put_to_stage.return_value = 'some-s3-folder/some-name_date_batch_hash.csg.gz'

        target_snowflake.persist_lines(self.config, lines)

        copy_to_archive_args = instance.copy_to_archive.call_args[0]
        self.assertEqual(copy_to_archive_args[0],
                         'some-s3-folder/some-name_date_batch_hash.csg.gz')
        self.assertEqual(
            copy_to_archive_args[1],
            'test_tap_id/logical1_table2/some-name_date_batch_hash.csg.gz')
        self.assertDictEqual(
            copy_to_archive_args[2], {
                'tap': 'test_tap_id',
                'schema': 'logical1',
                'table': 'logical1_table2',
                'archived-by': 'pipelinewise_target_snowflake'
            })
    def test_archive_load_files_incremental_replication(
            self, os_remove_mock, dbSync_mock):
        self.config['tap_id'] = 'test_tap_id'
        self.config['archive_load_files'] = True
        self.config['s3_bucket'] = 'dummy_bucket'

        with open(
                f'{os.path.dirname(__file__)}/resources/messages-simple-table.json',
                'r') as f:
            lines = f.readlines()

        instance = dbSync_mock.return_value
        instance.create_schema_if_not_exists.return_value = None
        instance.sync_table.return_value = None
        instance.put_to_stage.return_value = 'some-s3-folder/some-name_date_batch_hash.csg.gz'

        target_snowflake.persist_lines(self.config, lines)

        copy_to_archive_args = instance.copy_to_archive.call_args[0]
        self.assertEqual(copy_to_archive_args[0],
                         'some-s3-folder/some-name_date_batch_hash.csg.gz')
        self.assertEqual(
            copy_to_archive_args[1],
            'test_tap_id/test_simple_table/some-name_date_batch_hash.csg.gz')
        self.assertDictEqual(
            copy_to_archive_args[2], {
                'tap': 'test_tap_id',
                'schema': 'tap_mysql_test',
                'table': 'test_simple_table',
                'archived-by': 'pipelinewise_target_snowflake',
                'incremental-key': 'id',
                'incremental-key-min': '1',
                'incremental-key-max': '5'
            })
    def test_persist_lines_with_only_state_messages(self, dbSync_mock,
                                                    flush_streams_mock):
        """
        Given only state messages, target should emit the last one
        """

        self.config['batch_size_rows'] = 5

        with open(
                f'{os.path.dirname(__file__)}/resources/streams_only_state.json',
                'r') as f:
            lines = f.readlines()

        instance = dbSync_mock.return_value
        instance.create_schema_if_not_exists.return_value = None
        instance.sync_table.return_value = None

        # catch stdout
        buf = io.StringIO()
        with redirect_stdout(buf):
            target_snowflake.persist_lines(self.config, lines)

        flush_streams_mock.assert_not_called()

        self.assertEqual(
            buf.getvalue().strip(),
            '{"bookmarks": {"tap_mysql_test-test_simple_table": {"replication_key": "id", '
            '"replication_key_value": 100, "version": 1}}}')
    def test_persist_40_records_with_batch_wait_limit(self, dbSync_mock,
                                                      flush_streams_mock,
                                                      dateTime_mock):

        start_time = datetime(2021, 4, 6, 0, 0, 0)
        increment = 11
        counter = itertools.count()

        # Move time forward by {{increment}} seconds every time utcnow() is called
        dateTime_mock.utcnow.side_effect = lambda: start_time + timedelta(
            seconds=increment * next(counter))

        self.config['batch_size_rows'] = 100
        self.config['batch_wait_limit_seconds'] = 10
        self.config['flush_all_streams'] = True

        # Expecting 40 records
        with open(
                f'{os.path.dirname(__file__)}/resources/logical-streams.json',
                'r') as f:
            lines = f.readlines()

        instance = dbSync_mock.return_value
        instance.create_schema_if_not_exists.return_value = None
        instance.sync_table.return_value = None

        flush_streams_mock.return_value = '{"currently_syncing": null}'

        target_snowflake.persist_lines(self.config, lines)

        # Expecting flush after every records + 1 at the end
        assert flush_streams_mock.call_count == 41
Exemple #5
0
    def test_loading_unicode_characters(self):
        """Loading unicode encoded characters"""
        tap_lines = test_utils.get_test_tap_lines(
            'messages-with-unicode-characters.json')

        # Load with default settings
        target_snowflake.persist_lines(self.config, tap_lines)

        # Get loaded rows from tables
        snowflake = DbSync(self.config)
        target_schema = self.config.get('schema', '')
        table_unicode = snowflake.query(
            "SELECT * FROM {}.test_table_unicode".format(target_schema))

        self.assertEqual(table_unicode, [{
            'C_INT':
            1,
            'C_PK':
            1,
            'C_VARCHAR':
            'Hello world, Καλημέρα κόσμε, コンニチハ'
        }, {
            'C_INT':
            2,
            'C_PK':
            2,
            'C_VARCHAR':
            'Chinese: 和毛泽东 <<重上井冈山>>. 严永欣, 一九八八年.'
        }, {
            'C_INT':
            3,
            'C_PK':
            3,
            'C_VARCHAR':
            'Russian: Зарегистрируйтесь сейчас на Десятую Международную Конференцию по'
        }, {
            'C_INT':
            4,
            'C_PK':
            4,
            'C_VARCHAR':
            'Thai: แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช'
        }, {
            'C_INT':
            5,
            'C_PK':
            5,
            'C_VARCHAR':
            'Arabic: لقد لعبت أنت وأصدقاؤك لمدة وحصلتم علي من إجمالي النقاط'
        }, {
            'C_INT':
            6,
            'C_PK':
            6,
            'C_VARCHAR':
            'Special Characters: [",\'!@£$%^&*()]'
        }])
Exemple #6
0
    def test_loading_tables_with_no_encryption(self):
        """Loading multiple tables from the same input tap with various columns types"""
        tap_lines = test_utils.get_test_tap_lines(
            'messages-with-three-streams.json')

        # Turning off client-side encryption and load
        self.config['client_side_encryption_master_key'] = ''
        target_snowflake.persist_lines(self.config, tap_lines)

        self.assert_three_streams_are_into_snowflake()
Exemple #7
0
    def test_loading_tables_with_client_side_encryption_and_wrong_master_key(
            self):
        """Loading multiple tables from the same input tap with various columns types"""
        tap_lines = test_utils.get_test_tap_lines(
            'messages-with-three-streams.json')

        # Turning on client-side encryption and load but using a well formatted but wrong master key
        self.config[
            'client_side_encryption_master_key'] = "Wr0n6m45t3rKeY0123456789a0123456789a0123456="
        with assert_raises(snowflake.connector.errors.ProgrammingError):
            target_snowflake.persist_lines(self.config, tap_lines)
Exemple #8
0
    def test_loading_tables_with_client_side_encryption(self):
        """Loading multiple tables from the same input tap with various columns types"""
        tap_lines = test_utils.get_test_tap_lines(
            'messages-with-three-streams.json')

        # Turning on client-side encryption and load
        self.config['client_side_encryption_master_key'] = os.environ.get(
            'CLIENT_SIDE_ENCRYPTION_MASTER_KEY')
        target_snowflake.persist_lines(self.config, tap_lines)

        self.assert_three_streams_are_into_snowflake()
Exemple #9
0
    def test_loading_with_multiple_schema(self):
        """Loading table with multiple SCHEMA messages"""
        tap_lines = test_utils.get_test_tap_lines(
            'messages-with-multi-schemas.json')

        # Load with default settings
        target_snowflake.persist_lines(self.config, tap_lines)

        # Check if data loaded correctly
        self.assert_three_streams_are_into_snowflake(
            should_metadata_columns_exist=False,
            should_hard_deleted_rows=False)
Exemple #10
0
    def test_loading_tables_with_hard_delete(self):
        """Loading multiple tables from the same input tap with deleted rows"""
        tap_lines = test_utils.get_test_tap_lines(
            'messages-with-three-streams.json')

        # Turning on hard delete mode
        self.config['hard_delete'] = True
        target_snowflake.persist_lines(self.config, tap_lines)

        # Check if data loaded correctly and metadata columns exist
        self.assert_three_streams_are_into_snowflake(
            should_metadata_columns_exist=True, should_hard_deleted_rows=True)
Exemple #11
0
    def test_loading_tables_with_metadata_columns(self):
        """Loading multiple tables from the same input tap with various columns types"""
        tap_lines = test_utils.get_test_tap_lines(
            'messages-with-three-streams.json')

        # Turning on adding metadata columns
        self.config['add_metadata_columns'] = True
        target_snowflake.persist_lines(self.config, tap_lines)

        # Check if data loaded correctly and metadata columns exist
        self.assert_three_streams_are_into_snowflake(
            should_metadata_columns_exist=True)
    def persist_lines_with_cache(self, lines):
        """Enables table caching option and loads singer messages into snowflake.

        Table caching mechanism is creating and maintaining an extra table in snowflake about
        the table structures. It's very similar to the INFORMATION_SCHEMA.COLUMNS system views
        but querying INFORMATION_SCHEMA is slow especially when lot of taps running
        in parallel.

        Selecting from a real table instead of INFORMATION_SCHEMA and keeping it
        in memory while the target-snowflake is running results better load performance.
        """
        information_schema_cache = target_snowflake.load_information_schema_cache(self.config)
        target_snowflake.persist_lines(self.config, lines, information_schema_cache)
    def test_persist_lines_with_40_records_and_batch_size_of_20_expect_flushing_once(self, dbSync_mock, flush_streams_mock, temp_file_mock):
        self.config['batch_size_rows'] = 20
        self.config['flush_all_streams'] = True

        with open(f'{os.path.dirname(__file__)}/resources/logical-streams.json', 'r') as f:
            lines = f.readlines()

        instance = dbSync_mock.return_value
        instance.create_schema_if_not_exists.return_value = None
        instance.sync_table.return_value = None

        flush_streams_mock.return_value = '{"currently_syncing": null}'

        target_snowflake.persist_lines(self.config, lines)

        flush_streams_mock.assert_called_once()
    def test_persist_lines_with_same_schema_expect_flushing_once(self, dbSync_mock,
                                                                 flush_streams_mock):
        self.config['batch_size_rows'] = 20

        with open(f'{os.path.dirname(__file__)}/resources/same-schemas-multiple-times.json', 'r') as f:
            lines = f.readlines()

        instance = dbSync_mock.return_value
        instance.create_schema_if_not_exists.return_value = None
        instance.sync_table.return_value = None

        flush_streams_mock.return_value = '{"currently_syncing": null}'

        target_snowflake.persist_lines(self.config, lines)

        self.assertEqual(1, flush_streams_mock.call_count)
Exemple #15
0
    def test_verify_snowpipe_usage(self, dbSync_mock, flush_streams_mock):
        with open(
                f'{os.path.dirname(__file__)}/resources/same-schemas-multiple-times.json',
                'r') as f:
            lines = f.readlines()

        instance = dbSync_mock.return_value
        instance.create_schema_if_not_exists.return_value = None
        instance.sync_table.return_value = None

        flush_streams_mock.return_value = '{"currently_syncing": null}'

        target_snowflake.persist_lines(self.config, lines)

        flush_streams_mock.assert_called_once()

        assert target_snowflake._verify_snowpipe_usage(
        ) == 'dict with all key values=1'
Exemple #16
0
 def test_message_order(self):
     """RECORD message without a previously received SCHEMA message should raise an exception"""
     tap_lines = test_utils.get_test_tap_lines('invalid-message-order.json')
     with assert_raises(Exception):
         target_snowflake.persist_lines(self.config, tap_lines)
Exemple #17
0
 def test_invalid_json(self):
     """Receiving invalid JSONs should raise an exception"""
     tap_lines = test_utils.get_test_tap_lines('invalid-json.json')
     with assert_raises(json.decoder.JSONDecodeError):
         target_snowflake.persist_lines(self.config, tap_lines)