def test_replicate_mongodb_to_pg(self):
        """Replicate mongodb to Postgres"""

        def assert_columns_exist(table):
            """Helper inner function to test if every table and column exists in the target"""
            assertions.assert_cols_in_table(self.run_query_target_postgres, 'ppw_e2e_tap_mongodb', table,
                                            ['_id', 'document', '_sdc_extracted_at',
                                             '_sdc_batched_at', '_sdc_deleted_at'])

        def assert_row_counts_equal(target_schema, table, count_in_source):
            assert count_in_source == \
                   self.run_query_target_postgres(f'select count(_id) from {target_schema}.{table}')[0][0]

        # Run tap first time - fastsync and singer should be triggered
        assertions.assert_run_tap_success(TAP_MONGODB_ID, TARGET_ID, ['fastsync', 'singer'])
        assert_columns_exist('listings')
        assert_columns_exist('my_collection')

        listing_count = self.mongodb_con['listings'].count_documents({})
        my_coll_count = self.mongodb_con['my_collection'].count_documents({})

        assert_row_counts_equal('ppw_e2e_tap_mongodb', 'listings', listing_count)
        assert_row_counts_equal('ppw_e2e_tap_mongodb', 'my_collection', my_coll_count)

        result_insert = self.mongodb_con.my_collection.insert_many([
            {
                'age': randint(10, 30),
                'id': 1001,
                'uuid': uuid.uuid4(),
                'ts': Timestamp(12030, 500)
            },
            {
                'date': datetime.utcnow(),
                'id': 1002,
                'uuid': uuid.uuid4(),
                'regex': bson.Regex(r'^[A-Z]\\w\\d{2,6}.*$')
            },
            {
                'uuid': uuid.uuid4(),
                'id': 1003,
                'nested_json': {'a': 1, 'b': 3, 'c': {'key': bson.datetime.datetime(2020, 5, 3, 10, 0, 0)}}
            }
        ])
        my_coll_count += len(result_insert.inserted_ids)

        result_del = self.mongodb_con.my_collection.delete_one({'_id': result_insert.inserted_ids[0]})
        my_coll_count -= result_del.deleted_count

        result_update = self.mongodb_con.my_collection.update_many({}, {'$set': {'id': 0}})

        assertions.assert_run_tap_success(TAP_MONGODB_ID, TARGET_ID, ['fastsync', 'singer'])

        assert result_update.modified_count == self.run_query_target_postgres(
            'select count(_id) from ppw_e2e_tap_mongodb.my_collection where cast(document->>\'id\' as int) = 0')[0][0]

        assert_row_counts_equal('ppw_e2e_tap_mongodb', 'my_collection', my_coll_count)
Beispiel #2
0
    def test_replicate_mongodb_to_sf(self):
        """Replicate mongodb to Snowflake"""
        def assert_columns_exist(table):
            """Helper inner function to test if every table and column exists in the target"""
            assertions.assert_cols_in_table(
                self.run_query_target_snowflake,
                'ppw_e2e_tap_mongodb',
                table,
                [
                    '_ID',
                    'DOCUMENT',
                    '_SDC_EXTRACTED_AT',
                    '_SDC_BATCHED_AT',
                    '_SDC_DELETED_AT',
                ],
            )

        def assert_row_counts_equal(target_schema, table, count_in_source):
            assert (count_in_source == self.run_query_target_snowflake(
                f'select count(_id) from {target_schema}.{table}')[0][0])

        # Run tap first time - fastsync and singer should be triggered
        assertions.assert_run_tap_success(TAP_MONGODB_ID, TARGET_ID,
                                          ['fastsync', 'singer'])
        assert_columns_exist('listings')
        assert_columns_exist('my_collection')
        assert_columns_exist('all_datatypes')

        listing_count = self.mongodb_con['listings'].count_documents({})
        my_coll_count = self.mongodb_con['my_collection'].count_documents({})
        all_datatypes_count = self.mongodb_con[
            'all_datatypes'].count_documents({})

        assert_row_counts_equal('ppw_e2e_tap_mongodb', 'listings',
                                listing_count)
        assert_row_counts_equal('ppw_e2e_tap_mongodb', 'my_collection',
                                my_coll_count)
        assert_row_counts_equal('ppw_e2e_tap_mongodb', 'all_datatypes',
                                all_datatypes_count)

        result_insert = self.mongodb_con.my_collection.insert_many([
            {
                'age': randint(10, 30),
                'id': 1001,
                'uuid': uuid.uuid4(),
                'ts': Timestamp(12030, 500),
            },
            {
                'date': datetime.utcnow(),
                'id': 1002,
                'uuid': uuid.uuid4(),
                'regex': bson.Regex(r'^[A-Z]\\w\\d{2,6}.*$'),
            },
            {
                'uuid':
                uuid.uuid4(),
                'id':
                1003,
                'decimal':
                bson.Decimal128(decimal.Decimal('5.64547548425446546546644')),
                'nested_json': {
                    'a': 1,
                    'b': 3,
                    'c': {
                        'key': bson.datetime.datetime(2020, 5, 3, 10, 0, 0)
                    },
                },
            },
        ])
        my_coll_count += len(result_insert.inserted_ids)

        result_del = self.mongodb_con.my_collection.delete_one(
            {'_id': result_insert.inserted_ids[0]})
        my_coll_count -= result_del.deleted_count

        result_update = self.mongodb_con.my_collection.update_many(
            {}, {'$set': {
                'id': 0
            }})

        assertions.assert_run_tap_success(TAP_MONGODB_ID, TARGET_ID,
                                          ['singer'])

        assert (result_update.modified_count == self.run_query_target_snowflake(
            'select count(_id) from ppw_e2e_tap_mongodb.my_collection where document:id = 0'
        )[0][0])

        assert_row_counts_equal('ppw_e2e_tap_mongodb', 'my_collection',
                                my_coll_count)
    def test_replicate_mongodb_to_sf(self):
        """
        Test replicate MongoDB to Snowflake
        """

        # Run tap first time - fastsync and singer should be triggered
        assertions.assert_run_tap_success(self.tap_id, self.target_id,
                                          ['fastsync', 'singer'])
        self.assert_columns_exist('listings')
        self.assert_columns_exist('my_collection')
        self.assert_columns_exist('all_datatypes')

        listing_count = self.mongodb_con['listings'].count_documents({})
        my_coll_count = self.mongodb_con['my_collection'].count_documents({})
        all_datatypes_count = self.mongodb_con[
            'all_datatypes'].count_documents({})

        self.assert_row_counts_equal(
            f'ppw_e2e_tap_mongodb{self.e2e_env.sf_schema_postfix}',
            'listings',
            listing_count,
        )
        self.assert_row_counts_equal(
            f'ppw_e2e_tap_mongodb{self.e2e_env.sf_schema_postfix}',
            'my_collection',
            my_coll_count,
        )
        self.assert_row_counts_equal(
            f'ppw_e2e_tap_mongodb{self.e2e_env.sf_schema_postfix}',
            'all_datatypes',
            all_datatypes_count,
        )

        result_insert = self.mongodb_con.my_collection.insert_many([
            {
                'age': randint(10, 30),
                'id': 1001,
                'uuid': uuid.uuid4(),
                'ts': bson.Timestamp(12030, 500),
            },
            {
                'date': datetime.utcnow(),
                'id': 1002,
                'uuid': uuid.uuid4(),
                'regex': bson.Regex(r'^[A-Z]\\w\\d{2,6}.*$'),
            },
            {
                'uuid':
                uuid.uuid4(),
                'id':
                1003,
                'decimal':
                bson.Decimal128(decimal.Decimal('5.64547548425446546546644')),
                'nested_json': {
                    'a': 1,
                    'b': 3,
                    'c': {
                        'key': bson.datetime.datetime(2020, 5, 3, 10, 0, 0)
                    },
                },
            },
        ])
        my_coll_count += len(result_insert.inserted_ids)

        result_del = self.mongodb_con.my_collection.delete_one(
            {'_id': result_insert.inserted_ids[0]})
        my_coll_count -= result_del.deleted_count

        result_update = self.mongodb_con.my_collection.update_many(
            {}, {'$set': {
                'id': 0
            }})

        assertions.assert_run_tap_success(self.tap_id, self.target_id,
                                          ['singer'])

        self.assertEqual(
            result_update.modified_count,
            self.e2e_env.run_query_target_snowflake(
                f'select count(_id) from ppw_e2e_tap_mongodb{self.e2e_env.sf_schema_postfix}.my_collection'
                f' where document:id = 0')[0][0],
        )

        self.assert_row_counts_equal(
            f'ppw_e2e_tap_mongodb{self.e2e_env.sf_schema_postfix}',
            'my_collection',
            my_coll_count,
        )