def test_split_filtered_empty(self, mock_client): # filtering doesn't match any documents filtered_mongo_source = self._create_source( filter={'x': { '$lt': 0 }}, bucket_auto=self.bucket_auto) mock_client.return_value = _MockMongoClient(self._docs) for size_mb, (bucket_auto_count, split_vector_count) in [(1, (1, 5)), (2, (1, 3)), (10, (1, 1))]: size = size_mb * 1024 * 1024 splits = list( filtered_mongo_source.split( start_position=None, stop_position=None, desired_bundle_size=size)) if self.bucket_auto: # Note: if filter matches no docs - one split covers entire range self.assertEqual(len(splits), bucket_auto_count) else: # Note: splitVector mode does not respect filter self.assertEqual(len(splits), split_vector_count) reference_info = ( filtered_mongo_source, # range to match no documents: _ObjectIdHelper.increment_id(self._docs[-1]['_id'], 1), _ObjectIdHelper.increment_id(self._docs[-1]['_id'], 2), ) sources_info = ([ (split.source, split.start_position, split.stop_position) for split in splits ]) source_test_utils.assert_sources_equal_reference_source( reference_info, sources_info)
def test_increment_id(self): test_cases = [ (objectid.ObjectId('000000000000000100000000'), objectid.ObjectId('0000000000000000ffffffff')), (objectid.ObjectId('000000010000000000000000'), objectid.ObjectId('00000000ffffffffffffffff')), ] for (first, second) in test_cases: self.assertEqual(second, _ObjectIdHelper.increment_id(first, -1)) self.assertEqual(first, _ObjectIdHelper.increment_id(second, 1)) for _ in range(100): id = objectid.ObjectId() self.assertLess(id, _ObjectIdHelper.increment_id(id, 1)) self.assertGreater(id, _ObjectIdHelper.increment_id(id, -1))
def test_increment_id(self): test_cases = [ ( objectid.ObjectId("000000000000000100000000"), objectid.ObjectId("0000000000000000ffffffff"), ), ( objectid.ObjectId("000000010000000000000000"), objectid.ObjectId("00000000ffffffffffffffff"), ), ] for first, second in test_cases: self.assertEqual(second, _ObjectIdHelper.increment_id(first, -1)) self.assertEqual(first, _ObjectIdHelper.increment_id(second, 1)) for _ in range(100): _id = objectid.ObjectId() self.assertLess(_id, _ObjectIdHelper.increment_id(_id, 1)) self.assertGreater(_id, _ObjectIdHelper.increment_id(_id, -1))
def test_read(self, mock_client): mock_tracker = mock.MagicMock() test_cases = [ { # range covers the first(inclusive) to third(exclusive) documents 'start': self._ids[0], 'stop': self._ids[2], 'expected': self._docs[0:2] }, { # range covers from the first to the third documents 'start': _ObjectIdHelper.int_to_id(0), # smallest possible id 'stop': self._ids[2], 'expected': self._docs[0:2] }, { # range covers from the third to last documents 'start': self._ids[2], 'stop': _ObjectIdHelper.int_to_id(2**96 - 1), # largest possible id 'expected': self._docs[2:] }, { # range covers all documents 'start': _ObjectIdHelper.int_to_id(0), 'stop': _ObjectIdHelper.int_to_id(2**96 - 1), 'expected': self._docs }, { # range doesn't include any document 'start': _ObjectIdHelper.increment_id(self._ids[2], 1), 'stop': _ObjectIdHelper.increment_id(self._ids[3], -1), 'expected': [] }, ] mock_client.return_value = _MockMongoClient(self._docs) for case in test_cases: mock_tracker.start_position.return_value = case['start'] mock_tracker.stop_position.return_value = case['stop'] result = list(self.mongo_source.read(mock_tracker)) self.assertListEqual(case['expected'], result)
def test_split_single_document(self, mock_client): mock_client.return_value = _MockMongoClient(self._docs[0:1]) for size_mb in [1, 5]: size = size_mb * 1024 * 1024 splits = list( self.mongo_source.split( start_position=None, stop_position=None, desired_bundle_size=size)) self.assertEqual(len(splits), 1) self.assertEqual(splits[0].start_position, self._docs[0]['_id']) self.assertEqual( splits[0].stop_position, _ObjectIdHelper.increment_id(self._docs[0]['_id'], 1))
def _increment_id( self, _id: Union[ObjectId, int, str], inc: int, ) -> Union[ObjectId, int, str]: """Helper method to increment `_id` of different types.""" if isinstance(_id, ObjectId): return _ObjectIdHelper.increment_id(_id, inc) if isinstance(_id, int): return _id + inc if isinstance(_id, str): index = self._ids.index(_id) + inc if index <= 0: return self._ids[0] if index >= len(self._ids): return self._ids[-1] return self._ids[index]