Example #1
0
                         set(expected_inserted_record.keys()),
                         msg="keys for expected_record_1 are wrong: {}".format(
                             set(actual_record_1.keys()).symmetric_difference(
                                 set(expected_inserted_record.keys()))))

        for k in actual_record_1.keys():
            self.assertEqual(actual_record_1[k],
                             expected_inserted_record[k],
                             msg="{} != {} for key {}".format(
                                 actual_record_1[k],
                                 expected_inserted_record[k], k))

        print("inserted record is correct")

        # verify state and bookmarks
        state = menagerie.get_state(conn_id)

        bookmark = state['bookmarks'][
            'dev-public-postgres_full_table_replication_array_test']
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")

        self.assertIsNone(
            bookmark.get('lsn'),
            msg=
            "expected bookmark for stream to have NO lsn because we are using full-table replication"
        )


SCENARIOS.add(PostgresFullTableRepArrays)
Example #2
0
                    msg=
                    "incorrect replication_key specified in bookmark for stream `{}`"
                    .format(k))
            else:
                self.assertFalse(
                    'version' in v,
                    msg=
                    "expected bookmark for stream `{}` to not have a version key"
                    .format(k))
                self.assertTrue(
                    'initial_full_table_complete' in v,
                    msg=
                    "expected bookmark for stream `{}` to have a true initial_full_table_complete key"
                    .format(k))

        print("state and bookmarks are correct")

        # verify incremental table_version didn't change
        incremental_table_new_table_version = bookmarks[
            'tap_tester_mysql_0-incremental']['version']

        self.assertEqual(
            incremental_table_initial_table_version,
            incremental_table_new_table_version,
            msg=
            "Expected incrementally-replicated table's table_version to remain unchanged over multiple invocations."
        )


SCENARIOS.add(MySQLFullAndIncremental)
                # verify that each stream has less records than the first connection sync
                self.assertGreater(
                    first_sync_record_count.get(stream, 0),
                    second_sync_record_count.get(stream, 0),
                    msg="second had more records, start_date usage not verified"
                )

                # verify all data from 2nd sync >= start_date
                target_mark = second_min_bookmarks.get(stream, {"mark": None})
                target_value = next(iter(
                    target_mark.values()))  # there should be only one

                if target_value:

                    # it's okay if there isn't target data for a stream
                    try:
                        target_value = self.local_to_utc(parse(target_value))

                        # verify that the minimum bookmark sent to the target for the second sync
                        # is greater than or equal to the start date
                        self.assertGreaterEqual(
                            target_value,
                            self.local_to_utc(parse(self.start_date)))

                    except (OverflowError, ValueError, TypeError):
                        print("bookmarks cannot be converted to dates, "
                              "can't test start_date for {}".format(stream))


SCENARIOS.add(StartDateTest)
Example #4
0
        found_catalogs = menagerie.get_catalogs(conn_id)
        self.select_all_streams_and_fields(conn_id, found_catalogs, select_all_fields=False)

        # Run a sync job using orchestrator
        record_count_by_stream = self.run_sync(conn_id)

        actual_fields_by_stream = runner.examine_target_output_for_fields()

        for stream in self.expected_streams():
            with self.subTest(stream=stream):

                # verify that you get more than a page of data
                # SKIP THIS ASSERTION FOR STREAMS WHERE YOU CANNOT GET
                # MORE THAN 1 PAGE OF DATA IN THE TEST ACCOUNT
                self.assertGreater(
                    record_count_by_stream.get(stream, -1),
                    self.expected_metadata().get(stream, {}).get(self.API_LIMIT, 0),
                    msg="The number of records is not over the stream max limit")

                # verify that only the automatic fields are sent to the target
                expected_fields_for_stream = (self.expected_primary_keys().get(stream, set()) |
                                              self.top_level_replication_key_fields().get(stream, set()) |
                                              self.expected_foreign_keys().get(stream, set()))
                self.assertEqual(
                    actual_fields_by_stream.get(stream, set()),
                    expected_fields_for_stream,
                    msg="The fields sent to the target are not the automatic fields.\nExpected: {}\nActual: {}".format(expected_fields_for_stream, actual_fields_by_stream.get(stream, set())))


SCENARIOS.add(MinimumSelectionTest)
Example #5
0
        for k, v in update_message['data'].items():
            self.assertEqual(v,
                             expected_updated_rec[k],
                             msg="{} != {} for key {}".format(
                                 v, expected_updated_rec[k], k))

        #check state again
        state = menagerie.get_state(conn_id)
        chicken_bookmark = state['bookmarks'][
            'logical_1-public-postgres_logical_replication_test']
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")
        self.assertIsNotNone(
            chicken_bookmark['lsn'],
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to have an scn"
        )
        lsn_7 = chicken_bookmark['lsn']
        self.assertTrue(lsn_7 >= lsn_6)

        #table_version does NOT change
        self.assertEqual(
            chicken_bookmark['version'],
            table_version,
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to match version"
        )


SCENARIOS.add(PostgresLogicalRep)
        streams_with_bookmarks = self.expected_sync_streams()
        streams_with_bookmarks.remove('campaigns')
        streams_with_bookmarks.remove('deal_pipelines')
        bad_streams = streams_with_bookmarks.intersection(
            record_count_by_stream.keys())
        self.assertEqual(
            len(bad_streams),
            0,
            msg="still pulled down records from {} despite future bookmarks".
            format(bad_streams))

        state = menagerie.get_state(conn_id)

        # NB: Companies and engagements won't set a bookmark in the future.
        state["bookmarks"].pop("companies")
        state["bookmarks"].pop("engagements")
        future_bookmarks["bookmarks"].pop("companies")
        future_bookmarks["bookmarks"].pop("engagements")

        self.assertEqual(
            state,
            future_bookmarks,
            msg=
            "state should not have been modified because we didn't replicate any data"
        )
        bookmarks = state.get('bookmarks')
        bookmark_streams = set(state.get('bookmarks').keys())


SCENARIOS.add(HubSpotBookmarks2)
        # ----------- Subsequent Oplog Sync ---------
        #  -----------------------------------

        # Run sync
        sync_job_name = runner.run_sync_mode(self, conn_id)

        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        # verify the persisted schema was correct
        messages_by_stream = runner.get_records_from_target_output()

        for stream_name in self.expected_sync_streams():
            stream_records = [
                x for x in messages_by_stream[stream_name]['messages']
                if x.get('action') == 'upsert'
            ]
            #actual_keys = set()
            for record in stream_records:
                self.assertIn(record['data'].keys(),
                              projection_mapping['expected_keys'])
                #actual_keys = actual_keys.union(set(record['data'].keys()))
            #self.assertTrue(actual_keys.issubset(projection_mapping['expected_keys']))

    def test_run(self):
        for projection_mapping in self.projection_expected_keys_list():
            self.run_single_projection(projection_mapping)


SCENARIOS.add(MongoDBProjection)
Example #8
0
        first_state = menagerie.get_state(conn_id)

        first_sync_records = runner.get_records_from_target_output()
        first_max_bookmarks = self.max_bookmarks_by_stream(first_sync_records)
        first_min_bookmarks = self.min_bookmarks_by_stream(first_sync_records)

        # Run second sync
        second_record_count = self.run_sync_and_get_record_count(conn_id)
        second_state = menagerie.get_state(conn_id)

        second_sync_records = runner.get_records_from_target_output()
        second_max_bookmarks = self.max_bookmarks_by_stream(
            second_sync_records)
        second_min_bookmarks = self.min_bookmarks_by_stream(
            second_sync_records)

        for stream in self.expected_sync_streams():
            # Verify first sync returns more data or same amount of data
            self.assertGreaterEqual(
                first_record_count.get(stream, 0),
                second_record_count.get(stream, 0),
                msg="Second sync didn't always return less records for stream {}"
                .format(stream))

            self.assertGreaterEqual(second_state['bookmarks'][stream],
                                    first_state['bookmarks'][stream])


SCENARIOS.add(LinkedinAdsSyncTest)
            self.assertEqual(
                'activate_version',
                records_by_stream[stream_name]['messages'][51]['action'])

        # assert that final state has no last_id_fetched and max_id_value bookmarks
        final_state = menagerie.get_state(conn_id)
        self.assertNotEqual(
            original_version,
            final_state.get('bookmarks', {}).get('simple_db-simple_coll_1',
                                                 {}).get('version'))

        # assert that all rows in the collection were sync'd
        for stream_id, row_count in self.expected_row_counts().items():
            self.assertGreaterEqual(record_count_by_stream[stream_id],
                                    row_count)

        # assert that each stream has a initial_full_table_complete=True bookmark
        self.assertIsNotNone(
            final_state.get('bookmarks', {}).get('simple_db-simple_coll_1',
                                                 {}).get('oplog_ts_time'))
        self.assertIsNotNone(
            final_state.get('bookmarks', {}).get('simple_db-simple_coll_1',
                                                 {}).get('oplog_ts_inc'))
        self.assertTrue(
            final_state.get('bookmarks',
                            {}).get('simple_db-simple_coll_1',
                                    {}).get('initial_full_table_complete'))


SCENARIOS.add(MongoDBOplogAgedOut)
Example #10
0
                # verify that you get at least one new record on the second sync
                self.assertGreaterEqual(
                    len(second_sync_created_objects),
                    1,
                    msg="second sync didn't have created objects",
                )

                if stream == "balance_transactions":
                    sources = [
                        record.get("data", {}).get("source")
                        for record in second_sync_created_objects
                    ]

                    self.assertTrue(new_objects['payouts']['id'] in sources)
                    self.assertTrue(new_objects['charges']['id'] in sources)

                    continue

                # verify the new object is in the list of created objects
                # from the second sync
                self.assertTrue(
                    any(new_objects[stream]["id"] == record.get("data",
                                                                {}).get("id")
                        for record in second_sync_created_objects))

                if stream in streams_to_create:
                    delete_object(stream, new_objects[stream]["id"])


SCENARIOS.add(CreateObjectTest)
Example #11
0
                # the first run
                first_data = next(
                    record["data"] for record in first_sync_created.get(
                        stream, {}).get("messages", [])
                    if record.get("data", {}).get("id") == updated[stream])

                second_data = next(
                    record["data"] for record in second_sync_updated.get(
                        stream, {}).get("messages", [])
                    if record.get("data", {}).get("id") == updated[stream])

                # verify the updated timestamp is greater in the second sync
                self.assertGreater(
                    second_data["updated"],
                    first_data["updated"],
                    "updated timestamp for second sync is not greater than first sync",
                )

                # verify the metadata[test] value actually changed
                self.assertNotEqual(
                    second_data["metadata"].get("test_value", 0),
                    first_data["metadata"].get("test_value", 0),
                    "the test metadata should be different",
                )

                if stream in new_objects:
                    delete_object(stream, new_objects[stream]["id"])


SCENARIOS.add(EventUpdatesTest)
Example #12
0
                    first_sync_record_count.get(stream, 0),
                    second_sync_record_count.get(stream, 0),
                    msg=
                    "second syc didn't have less records, bookmark usage not verified"
                )

                # verify all data from 2nd sync >= 1st bookmark
                target_value = second_min_bookmarks.get(stream, {
                    None: None
                }).get(stream_bookmark_key)
                try:
                    if target_value:
                        if isinstance(target_value, str):
                            target_value = self.local_to_utc(
                                parse(target_value))
                        if isinstance(target_value, int):
                            target_value = self.local_to_utc(
                                dt.utcfromtimestamp(target_value))

                except (OverflowError, ValueError, TypeError):
                    print(
                        "bookmarks cannot be converted to dates, comparing values directly"
                    )

                # verify that the minimum bookmark sent to the target for the second sync
                # is greater than or equal to the bookmark from the first sync
                self.assertGreaterEqual(target_value, state_value)


SCENARIOS.add(BookmarkTest)
Example #13
0
            target_min_value = first_min_bookmarks.get(
                stream, {None: None}).get(stream_bookmark_key)

            if target_value:
                # Convert everything to datetime.
                state_value = utils.strptime_with_tz(state_value)
                target_value = utils.strptime_with_tz(target_value)
                target_min_value = utils.strptime_with_tz(target_min_value)

                # verify that there is data with different bookmark values - setup necessary
                self.assertTrue(target_value >= target_min_value, msg="Data isn't set up to be able to test bookmarks")

                # verify state agrees with target data after 1st sync
                self.assertEqual(state_value, target_value, msg="The bookmark value isn't correct based on target data")

                # verify that you get less data the 2nd time around
                self.assertGreater(
                    first_sync_record_count.get(stream, 0),
                    second_sync_record_count.get(stream, 0),
                    msg="second sync for stream {} didn't have less records, bookmark usage not verified".format(stream))

                if len(second_sync_records) > 0 and len(second_min_bookmarks) > 0:
                    # verify all data from 2nd sync >= 1st bookmark
                    target_value = second_min_bookmarks.get(stream, {None: None}).get(stream_bookmark_key)
                    target_value = utils.strptime_with_tz(target_value)
                    # verify that the minimum bookmark sent to the target for the second sync
                    # is greater than or equal to the bookmark from the first sync
                    self.assertTrue(target_value >= state_value)

SCENARIOS.add(AsanaBase)
            self.assertEqual(config['num_rows'] - 1000,
                             record_count_by_stream[table_name])

            # activateVersionMessage as the last message and not the first
            self.assertNotEqual(
                'activate_version',
                records_by_stream[table_name]['messages'][0]['action'])
            self.assertEqual(
                'activate_version',
                records_by_stream[table_name]['messages'][-1]['action'])

            # assert that the state has an initial_full_table_complete == True
            self.assertTrue(
                state['bookmarks'][table_name]['initial_full_table_complete'])

            # assert that there is a version bookmark in state and it is
            # the same version as the state passed in
            self.assertEqual(
                interrupted_state['bookmarks'][table_name]['version'],
                state['bookmarks'][table_name]['version'])

            self.assertIsNone(
                state['bookmarks'][table_name].get('last_evaluated_key'))

            self.assertTrue(state['bookmarks'][table_name].get(
                'initial_full_table_complete', False))


SCENARIOS.add(DynamoDBFullTableInterruptible)
Example #15
0
                        }),
                    msg=
                    "Not all non key properties are set to available in metadata"
                )

                # Verify row-count metadata matches expectations
                self.assertEqual(expected_row_count,
                                 stream_properties['row-count'])

                # Verify selected metadata is None for all streams
                self.assertNotIn('selected', stream_properties.keys())

                # Verify is-view metadata is False
                self.assertFalse(stream_properties['is-view'])

                # Verify no forced-replication-method is present in metadata
                self.assertNotIn(self.REPLICATION_METHOD,
                                 stream_properties.keys())

                # Verify schema and db match expectations
                self.assertEqual(test_schema_name,
                                 stream_properties['schema-name'])
                self.assertEqual(test_db, stream_properties['database-name'])

                # Verify schema types match expectations
                self.assertDictEqual(expected_fields_to_datatypes,
                                     actual_fields_to_datatypes)


SCENARIOS.add(PostgresDiscovery)
                first_data = [
                    record["data"] for record in first_sync_records.get(
                        stream, {}).get("messages", {"data": {}})
                ]
                second_data = [
                    record["data"] for record in second_sync_records.get(
                        stream, {}).get("messages", {"data": {}})
                ]

                same_records = 0
                for first_record in first_data:
                    first_value = json.dumps(first_record, sort_keys=True)

                    for compare_record in second_data:
                        compare_value = json.dumps(compare_record,
                                                   sort_keys=True)

                        if first_value == compare_value:
                            second_data.remove(compare_record)
                            same_records += 1
                            break

                self.assertEqual(
                    len(first_data),
                    same_records,
                    msg=
                    "Not all data from the first sync was in the second sync")


SCENARIOS.add(FullReplicationTest)
Example #17
0
            self.assertEqual(
                'activate_version',
                messages_by_stream[table_name]['messages'][-1]['action'])

            # assert that the state has an initial_full_table_complete == True
            self.assertTrue(
                state['bookmarks'][table_name]['initial_full_table_complete'])
            # assert that there is a version bookmark in state
            first_versions[table_name] = state['bookmarks'][table_name][
                'version']
            self.assertIsNotNone(first_versions[table_name])

        for config in table_configs:
            table_name = config['TableName']

            for message in messages_by_stream[table_name]['messages']:
                if message['action'] == 'upsert':
                    if not message['data'].get('_sdc_deleted_at'):
                        top_level_keys = {*message['data'].keys()}
                        self.assertEqual(config['top_level_keys'],
                                         top_level_keys)
                        for list_key in config['top_level_list_keys']:
                            self.assertTrue(
                                isinstance(message['data'][list_key], list))
                        self.assertEqual(
                            config['nested_map_keys']['map_field'],
                            {*message['data']['map_field'].keys()})


SCENARIOS.add(DynamoDBLogBasedProjections)
Example #18
0
        zero_count_streams = {
            k
            for k, v in record_count_by_stream.items() if v == 0
        }
        self.assertFalse(
            zero_count_streams,
            msg="The following streams did not sync any rows {}".format(
                zero_count_streams),
        )

        # # Verify that all streams sync only one row for incremental sync
        sync_job_name = runner.run_sync_mode(self, conn_id)
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)
        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        # Exclude streams in which multiple rows may exist for a bookmark value
        error_incremental_streams = {
            k
            for k, v in record_count_by_stream.items()
            if v > 1 and k not in config["exclude_streams"]
        }
        self.assertFalse(
            error_incremental_streams,
            msg="The following streams synced more than 1 row {}".format(
                error_incremental_streams),
        )


SCENARIOS.add(TapCombinedTest)
Example #19
0
        self.assertTrue(lsn_cows_2 >= lsn_cows_1)

        chickens_bookmark = state['bookmarks'][
            'dev-public-postgres_logical_replication_test_chickens']
        self.assertIsNotNone(
            chickens_bookmark['lsn'],
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to have an scn"
        )
        lsn_chickens_2 = chickens_bookmark['lsn']
        self.assertTrue(lsn_chickens_2 >= lsn_chickens_1)

        #table_version does NOT change
        self.assertEqual(
            chickens_bookmark['version'],
            table_version_chickens,
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to match version"
        )

        #table_version does NOT change
        self.assertEqual(
            cows_bookmark['version'],
            table_version_cows,
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to match version"
        )


SCENARIOS.add(PostgresLogicalRepMultipleTables)
        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        # check that version from state is unchanged
        state = menagerie.get_state(conn_id)
        bookmark = state['bookmarks'][self.tap_stream_id()]

        self.assertEqual(expected_table_version, bookmark['version'])

        # verify the persisted schema was correct
        records_by_stream = runner.get_records_from_target_output()
        self.maxDiff = None
        for stream, recs in records_by_stream.items():
            self.assertEqual(
                recs['schema'],
                expected_schemas[stream],
                msg=
                "Persisted schema did not match expected schema for stream `{}`."
                .format(stream))

        # record count should be empty as we did not persist anything to the gate
        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, expected_sync_streams, expected_pks)

        self.assertEqual(record_count_by_stream, {})


SCENARIOS.add(MySQLBinlog)
            "date_field": "2019-08-15T19:29:14.578000Z",
            "string_field": "a sample string",
            "object_field": {"obj_field_2_key": "obj_field_2_val",
                             "obj_field_1_key": "obj_field_1_val"},
            "null_field": None,
            "regex_field": {"flags": 0, "pattern": ".*"},
            "object_id_field": "313233343536373839313233",
            "64_bit_integer_field": 34359738368,
            "32_bit_integer_field": 32,
            "array_field": ["array_item_1",
                            "array_item_2",
                            "array_item_3"],
            "binary_data_field": "YSBiaW5hcnkgc3RyaW5n",
            "javaScript_with_scope_field": {"scope": "{'x': 1}",
                                            "value": "function incrementX() { x++; }"},
            "double_field": decimal.Decimal('4.3'),
            "boolean_field": True,
            "decimal_field": decimal.Decimal('1.34'),
            'uuid_field': "3e139ff5-d622-45c6-bf9e-1dfec72820c4",
            "dbref_field": {"id": "313233343536373839313233",
                            "database": "some_database",
                            "collection": "some_collection"}
        }

        self.assertEquals(expected_record, records_by_stream['datatype_coll_1']['messages'][1]['data'])




SCENARIOS.add(MongoDBDatatype)
Example #22
0
            'age': 99,
            'updated_at': '2111-01-01T12:12:12.222111+00:00',
            'size': 'big'
        }
        self.assertEqual(
            actual_chicken_record,
            expected_chicken_record,
            msg=
            "Expected `various_types` upsert record data to be {}, but target output {}"
            .format(expected_chicken_record, actual_chicken_record))

        print("records are correct")

        # verify state and bookmarks
        state = menagerie.get_state(conn_id)

        chicken_bookmark = state['bookmarks']['postgres-public-chicken_view']
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")
        self.assertEqual(
            chicken_bookmark['version'],
            table_version,
            msg="expected bookmark for stream ROOT-CHICKEN to match version")
        self.assertEqual(chicken_bookmark['replication_key'], 'updated_at')
        self.assertEqual(chicken_bookmark['replication_key_value'],
                         '2111-01-01T12:12:12.222111+00:00')
        print("bookmarks are correct")


SCENARIOS.add(PostgresViewsIncrementalReplication)
Example #23
0
        self.assertEqual(
            records_by_stream[test_table_name]['messages'][0]['action'],
            'upsert')
        print("inserted record is correct")

        state = menagerie.get_state(conn_id)
        chicken_bookmark = state['bookmarks'][
            'dev-public-postgres_logical_replication_array_test']
        self.assertIsNone(state['currently_syncing'],
                          msg="expected state's currently_syncing to be None")

        self.assertIsNotNone(
            chicken_bookmark['lsn'],
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to have an scn"
        )
        lsn_2 = chicken_bookmark['lsn']

        self.assertTrue(lsn_2 >= lsn_1)

        #table_version does NOT change
        self.assertEqual(
            chicken_bookmark['version'],
            table_version,
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to match version"
        )


SCENARIOS.add(PostgresLogicalRepArrays)
Example #24
0
        connections.select_catalog_and_fields_via_metadata(
            conn_id, chicken_catalog,
            menagerie.get_annotated_schema(conn_id,
                                           chicken_catalog['stream_id']),
            replication_md)

        # clear state
        menagerie.set_state(conn_id, {})

        sync_job_name = runner.run_sync_mode(self, conn_id)

        # verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)

        self.assertEqual(exit_status['tap_exit_status'], 1)
        # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())

        self.assertEqual(record_count_by_stream, {})
        print("records are correct")

        # verify state and bookmarks
        state = menagerie.get_state(conn_id)
        self.assertEqual(state, {}, msg="expected state to be empty")


SCENARIOS.add(PostgresViewsLogicalReplication)
        for k, v in record_count_by_stream.items():
            self.assertGreaterEqual(v, 6)

        # Verify that we got 2 records with _SDC_DELETED_AT
        self.assertEqual(
            2,
            len([
                x['data'] for x in records_by_stream['simple_coll_1']
                if x['data'].get('_sdc_deleted_at')
            ]))
        self.assertEqual(
            2,
            len([
                x['data'] for x in records_by_stream['simple_coll_2']
                if x['data'].get('_sdc_deleted_at')
            ]))
        # Verify that the _id of the records sent are the same set as the
        # _ids of the documents changed
        actual = set([
            ObjectId(x['data']['_id'])
            for x in records_by_stream['simple_coll_1']
        ]).union(
            set([
                ObjectId(x['data']['_id'])
                for x in records_by_stream['simple_coll_2']
            ]))
        self.assertEqual(changed_ids, actual)


SCENARIOS.add(MongoDBOplog)
Example #26
0
        state = menagerie.get_state(conn_id)

        # Add 10 rows to the DB
        self.addMoreData(10)
        # Delete some rows
        self.deleteData(range(40, 50))
        # Change some rows
        self.updateData(10, 60, 'boolean_field', False)

        ################################
        # Run sync again and check that records did come through
        ################################
        # Disable streams forces shards to close
        self.disableStreams(expected_streams)
        sync_job_name = runner.run_sync_mode(self, conn_id)

        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        # verify the persisted schema was correct
        records_by_stream = runner.get_records_from_target_output()

        # Check that we have 31 messages come through (10 upserts, 10 deletes, 10 updated records and 1 activate version)
        for stream in records_by_stream.values():
            self.assertEqual(31, len(stream['messages']))

        state = menagerie.get_state(conn_id)


SCENARIOS.add(DynamoDBLogBased)
Example #27
0
            self.assertGreater(int(updated_log_file_suffix),
                               int(expected_log_file_suffix))

        expected_log_file = bookmark['log_file']
        expected_log_pos = bookmark['log_pos']

        expected_rec_2 = copy.deepcopy(rec)

        # check for expected records
        records_for_stream = runner.get_records_from_target_output()[
            self.table_name()]
        messages_for_stream = records_for_stream['messages']
        message_actions = [rec['action'] for rec in messages_for_stream]

        self.assertEqual(message_actions, ['upsert'])

        upsert_records = [
            m['data'] for m in messages_for_stream if m['action'] == 'upsert'
        ]
        del upsert_records[0]['_sdc_deleted_at']

        expected_json = json.loads(expected_rec_2.get('our_json', {}))
        actual_json = json.loads(upsert_records[0].get('our_json', {}))

        self.assertTrue(len(actual_json.keys()) > 0)
        self.assertEqual(expected_json, actual_json)


SCENARIOS.add(MySQLBinlogJson)
        for stream in self.expected_streams():
            with self.subTest(stream=stream):
                # verify that we can paginate with all fields selected
                self.assertGreater(
                    record_count_by_stream.get(stream, -1),
                    self.expected_metadata().get(stream, {}).get(self.API_LIMIT),
                    msg="The number of records is not over the stream max limit")

                # verify that the automatic fields are sent to the target
                self.assertTrue(
                    actual_fields_by_stream.get(stream, set()).issuperset(
                        self.expected_primary_keys().get(stream, set()) |
                        self.top_level_replication_key_fields().get(stream, set()) |
                        self.expected_foreign_keys().get(stream, set())),
                    msg="The fields sent to the target don't include all automatic fields"
                )

                # verify we have more fields sent to the target than just automatic fields
                # SKIP THIS ASSERTION IF ALL FIELDS ARE INTENTIONALLY AUTOMATIC FOR THIS STREAM
                self.assertTrue(
                    actual_fields_by_stream.get(stream, set()).symmetric_difference(
                        self.expected_primary_keys().get(stream, set()) |
                        self.expected_replication_keys().get(stream, set()) |
                        self.expected_foreign_keys().get(stream, set())),
                    msg="The fields sent to the target don't include non-automatic fields"
                )


SCENARIOS.add(PaginationTest)
Example #29
0
        #        insert 3, 4, 5
        #        update 0, 4
        #        delete 1, 5

        #      Resulting Synced Records: 2, 3, 0, 4

        # verify replicated records still match expectations
        self.assertDictEqual(self.expected_records[2],
                             messages[0]['data'])  # existing insert
        self.assertDictEqual(self.expected_records[3],
                             messages[1]['data'])  # new insert
        self.assertDictEqual(self.expected_records[0],
                             messages[2]['data'])  # existing update
        self.assertDictEqual(self.expected_records[4],
                             messages[3]['data'])  # new insert / update

        # grab bookmarked state
        state = menagerie.get_state(conn_id)
        bookmark = state['bookmarks'][
            'dev-public-postgres_full_table_replication_test']

        # verify state and bookmarks meet expectations
        self.assertIsNone(state['currently_syncing'])
        self.assertIsNone(bookmark.get('lsn'))
        self.assertIsNone(bookmark.get('replication_key'))
        self.assertIsNone(bookmark.get('replication_key_value'))
        self.assertEqual(table_version_3, bookmark['version'])


SCENARIOS.add(PostgresFullTable)
Example #30
0
        self.assertTrue(lsn_cows_2 >= lsn_cows_1)

        chickens_bookmark = state['bookmarks'][
            'postgres-public-postgres_logical_replication_test_chickens']
        self.assertIsNotNone(
            chickens_bookmark['lsn'],
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to have an scn"
        )
        lsn_chickens_2 = chickens_bookmark['lsn']
        self.assertTrue(lsn_chickens_2 >= lsn_chickens_1)

        #table_version does NOT change
        self.assertEqual(
            chickens_bookmark['version'],
            table_version_chickens,
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to match version"
        )

        #table_version does NOT change
        self.assertEqual(
            cows_bookmark['version'],
            table_version_cows,
            msg=
            "expected bookmark for stream public-postgres_logical_replication_test to match version"
        )


SCENARIOS.add(PostgresLogicalRepMultipleDBs)