def select_found_catalogs(self,
                              conn_id,
                              found_catalogs,
                              only_streams=None):
        selected = []
        for catalog in found_catalogs:
            if only_streams and catalog["tap_stream_id"] not in only_streams:
                continue
            schema = menagerie.select_catalog(conn_id, catalog)

            selected.append({
                "key_properties":
                catalog.get("key_properties"),
                "schema":
                schema,
                "tap_stream_id":
                catalog.get("tap_stream_id"),
                "replication_method":
                catalog.get("replication_method"),
                "replication_key":
                catalog.get("replication_key"),
            })

        for catalog_entry in selected:
            connections.select_catalog_and_fields_via_metadata(
                conn_id, catalog_entry,
                {"annotated-schema": catalog_entry['schema']})
Esempio n. 2
0
    def perform_field_selection(self, conn_id, catalog):
        schema = menagerie.select_catalog(conn_id, catalog)

        return {'key_properties' :     catalog.get('key_properties'),
                'schema' :             schema,
                'tap_stream_id':       catalog.get('tap_stream_id'),
                'replication_method' : catalog.get('replication_method'),
                'replication_key'    : catalog.get('replication_key')}
Esempio n. 3
0
    def test_run(self):

        conn_id = connections.ensure_connection(self, payload_hook=None)

        # Run the tap in check mode
        check_job_name = runner.run_check_mode(self, conn_id)

        # Verify the check's exit status
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
        menagerie.verify_check_exit_status(self, exit_status, check_job_name)

        # Verify that there are catalogs found
        catalog_entries = menagerie.get_catalogs(conn_id)

        # Select all streams and all fields
        for entry in catalog_entries:

            if entry.get('tap_stream_id') in self.expected_sync_streams():
                schema = menagerie.select_catalog(conn_id, entry)

                catalog_entry = {
                    'key_properties': entry.get('key_properties'),
                    'schema': schema,
                    'tap_stream_id': entry.get('tap_stream_id'),
                    'replication_method': entry.get('replication_method'),
                    'replication_key': entry.get('replication_key')
                }

                connections.select_catalog_and_fields_via_metadata(
                    conn_id, catalog_entry, schema)

        # found_catalogs = menagerie.get_catalogs(conn_id)
        self.assertGreater(
            len(catalog_entries),
            0,
            msg="unable to locate schemas for connection {}".format(conn_id))

        set_of_discovered_streams = {
            entry['tap_stream_id']
            for entry in catalog_entries
        }
        self.assertTrue(
            self.expected_check_streams().issubset(set_of_discovered_streams),
            msg="Expected check streams are not a subset of discovered streams"
        )

        menagerie.set_state(conn_id, {})

        # Verify that tap and target exit codes are 0
        first_record_count = self.run_sync_and_get_record_count(conn_id)

        # verify that we only sync selected streams
        self.assertEqual(set(first_record_count.keys()),
                         self.expected_sync_streams())

        first_state = menagerie.get_state(conn_id)

        first_sync_records = runner.get_records_from_target_output()
        first_max_bookmarks = self.max_bookmarks_by_stream(first_sync_records)
        first_min_bookmarks = self.min_bookmarks_by_stream(first_sync_records)

        # Run second sync
        second_record_count = self.run_sync_and_get_record_count(conn_id)
        second_state = menagerie.get_state(conn_id)

        second_sync_records = runner.get_records_from_target_output()
        second_max_bookmarks = self.max_bookmarks_by_stream(
            second_sync_records)
        second_min_bookmarks = self.min_bookmarks_by_stream(
            second_sync_records)

        for stream in self.expected_sync_streams():
            # Verify first sync returns more data or same amount of data
            self.assertGreaterEqual(
                first_record_count.get(stream, 0),
                second_record_count.get(stream, 0),
                msg="Second sync didn't always return less records for stream {}"
                .format(stream))

            self.assertGreaterEqual(second_state['bookmarks'][stream],
                                    first_state['bookmarks'][stream])