def test_run(self):

        self.setUpTestEnvironment(COMPRESSION_FOLDER_PATH)

        runner.run_check_job_and_check_status(self)

        found_catalogs = menagerie.get_catalogs(self.conn_id)
        self.assertEqual(
            len(found_catalogs),
            1,
            msg="unable to locate schemas for connection {}".format(
                self.conn_id))

        found_catalog_names = set(
            map(lambda c: c['tap_stream_id'], found_catalogs))
        subset = self.expected_check_streams().issubset(found_catalog_names)
        self.assertTrue(
            subset,
            msg="Expected check streams are not subset of discovered catalog")

        # Clear state before our run
        menagerie.set_state(self.conn_id, {})

        self.select_specific_catalog(found_catalogs,
                                     "gz_file_having_empty_csv")

        runner.run_sync_job_and_check_status(self)

        expected_records = 0
        # Verify actual rows were synced
        records = runner.get_upserts_from_target_output()

        self.assertEqual(expected_records, len(records))
Esempio n. 2
0
    def run_sync(self, expected_streams):
        conn_id = connections.ensure_connection(self)

        found_catalogs = self.run_and_verify_check_mode(conn_id)

        # table and field selection
        test_catalogs = [
            catalog for catalog in found_catalogs
            if catalog.get('stream_name') in expected_streams
        ]

        self.perform_and_verify_table_and_field_selection(
            conn_id, test_catalogs)

        sync_job_name = runner.run_sync_mode(self, conn_id)

        # Verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)
        return runner.get_upserts_from_target_output()
Esempio n. 3
0
    def test_run(self):
        conn_id = connections.ensure_connection(self)

        # run in check mode
        check_job_name = runner.run_check_mode(self, conn_id)

        # verify check  exit codes
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
        menagerie.verify_check_exit_status(self, exit_status, check_job_name)

        # verify the tap discovered the right streams
        found_catalogs = [
            fc for fc in menagerie.get_catalogs(conn_id)
            if fc['tap_stream_id'] in self.expected_check_streams()
        ]

        self.assertGreaterEqual(
            len(found_catalogs),
            2,
            msg="unable to locate schemas for connection {}".format(conn_id))

        found_catalog_names = set(
            map(lambda c: c['tap_stream_id'], found_catalogs))
        diff = self.expected_check_streams().symmetric_difference(
            found_catalog_names)
        self.assertEqual(
            len(diff),
            0,
            msg="discovered schemas do not match: {}".format(diff))

        # verify that persisted streams have the correct properties
        test_catalog_cows = list(
            filter(
                lambda c: c['stream_name'] ==
                'postgres_logical_replication_test_cows', found_catalogs))[0]
        self.assertEqual('postgres_logical_replication_test_cows',
                         test_catalog_cows['stream_name'])

        test_catalog_chickens = list(
            filter(
                lambda c: c['stream_name'
                            ] == 'postgres_logical_replication_test_chickens',
                found_catalogs))[0]
        self.assertEqual('postgres_logical_replication_test_chickens',
                         test_catalog_chickens['stream_name'])
        print("discovered streams are correct")

        additional_md = [{
            "breadcrumb": [],
            "metadata": {
                'replication-method': 'LOG_BASED'
            }
        }]
        connections.select_catalog_and_fields_via_metadata(
            conn_id, test_catalog_cows,
            menagerie.get_annotated_schema(conn_id,
                                           test_catalog_cows['stream_id']),
            additional_md)
        connections.select_catalog_and_fields_via_metadata(
            conn_id, test_catalog_chickens,
            menagerie.get_annotated_schema(conn_id,
                                           test_catalog_chickens['stream_id']),
            additional_md)

        # clear state
        menagerie.set_state(conn_id, {})

        #run sync job
        sync_job_name = runner.run_sync_mode(self, conn_id)

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())

        self.assertEqual(
            record_count_by_stream, {
                'public_postgres_logical_replication_test_cows': 1,
                'public_postgres_logical_replication_test_chickens': 1
            })
        records_by_stream = runner.get_records_from_target_output()

        table_version_cows = records_by_stream[
            'public_postgres_logical_replication_test_cows']['table_version']
        self.assertEqual(
            records_by_stream['public_postgres_logical_replication_test_cows']
            ['messages'][0]['action'], 'activate_version')
        self.assertEqual(
            records_by_stream['public_postgres_logical_replication_test_cows']
            ['messages'][1]['action'], 'upsert')
        self.assertEqual(
            records_by_stream['public_postgres_logical_replication_test_cows']
            ['messages'][2]['action'], 'activate_version')

        table_version_chickens = records_by_stream[
            'public_postgres_logical_replication_test_chickens'][
                'table_version']
        self.assertEqual(
            records_by_stream[
                'public_postgres_logical_replication_test_chickens']
            ['messages'][0]['action'], 'activate_version')
        self.assertEqual(
            records_by_stream[
                'public_postgres_logical_replication_test_chickens']
            ['messages'][1]['action'], 'upsert')
        self.assertEqual(
            records_by_stream[
                'public_postgres_logical_replication_test_chickens']
            ['messages'][2]['action'], 'activate_version')

        # verify state and bookmarks
        state = menagerie.get_state(conn_id)
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")

        bookmark_cows = state['bookmarks'][
            'dev-public-postgres_logical_replication_test_cows']
        self.assertIsNotNone(bookmark_cows['lsn'],
                             msg="expected bookmark for stream to have an lsn")
        lsn_cows_1 = bookmark_cows['lsn']
        self.assertEqual(bookmark_cows['version'],
                         table_version_cows,
                         msg="expected bookmark for stream to match version")

        bookmark_chickens = state['bookmarks'][
            'postgres-public-postgres_logical_replication_test_chickens']
        self.assertIsNotNone(bookmark_chickens['lsn'],
                             msg="expected bookmark for stream to have an lsn")
        lsn_chickens_1 = bookmark_chickens['lsn']
        self.assertEqual(bookmark_chickens['version'],
                         table_version_chickens,
                         msg="expected bookmark for stream to match version")

        #----------------------------------------------------------------------
        # invoke the sync job again after adding records
        #----------------------------------------------------------------------
        print("inserting 1 more cows and 1 more chickens")

        with db_utils.get_test_connection('dev') as conn:
            conn.autocommit = True
            with conn.cursor() as cur:
                #insert another cow
                self.cows_rec_2 = {'cow_name': "betty cow", 'cow_age': 21}
                insert_record(cur, test_table_name_cows, self.cows_rec_2)

        with db_utils.get_test_connection('postgres') as conn:
            conn.autocommit = True
            with conn.cursor() as cur:
                #insert another chicken
                self.chicken_rec_2 = {
                    'chicken_name': "burt chicken",
                    'chicken_age': 14
                }
                insert_record(cur, test_table_name_chickens,
                              self.chicken_rec_2)

        sync_job_name = runner.run_sync_mode(self, conn_id)

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        self.assertEqual(
            record_count_by_stream, {
                'public_postgres_logical_replication_test_cows': 1,
                'public_postgres_logical_replication_test_chickens': 1
            })

        upserts = []
        for u in runner.get_upserts_from_target_output():
            self.assertIsNotNone(u.get('_sdc_lsn'))
            del u['_sdc_lsn']
            upserts.append(u)

        self.assertEqual([{
            '_sdc_deleted_at': None,
            'cow_age': 21,
            'id': 2,
            'cow_name': 'betty cow'
        }, {
            'chicken_name': 'burt chicken',
            '_sdc_deleted_at': None,
            'chicken_age': 14,
            'id': 2
        }], upserts)

        print("inserted record is correct")

        state = menagerie.get_state(conn_id)
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")
        cows_bookmark = state['bookmarks'][
            'dev-public-postgres_logical_replication_test_cows']
        self.assertIsNotNone(
            cows_bookmark['lsn'],
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to have an scn"
        )
        lsn_cows_2 = cows_bookmark['lsn']
        self.assertTrue(lsn_cows_2 >= lsn_cows_1)

        chickens_bookmark = state['bookmarks'][
            'postgres-public-postgres_logical_replication_test_chickens']
        self.assertIsNotNone(
            chickens_bookmark['lsn'],
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to have an scn"
        )
        lsn_chickens_2 = chickens_bookmark['lsn']
        self.assertTrue(lsn_chickens_2 >= lsn_chickens_1)

        #table_version does NOT change
        self.assertEqual(
            chickens_bookmark['version'],
            table_version_chickens,
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to match version"
        )

        #table_version does NOT change
        self.assertEqual(
            cows_bookmark['version'],
            table_version_cows,
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to match version"
        )