Esempio n. 1
0
  def test_split_filtered_empty(self, mock_client):
    # filtering doesn't match any documents
    filtered_mongo_source = self._create_source(
        filter={'x': {
            '$lt': 0
        }}, bucket_auto=self.bucket_auto)

    mock_client.return_value = _MockMongoClient(self._docs)
    for size_mb, (bucket_auto_count, split_vector_count) in [(1, (1, 5)),
                                                             (2, (1, 3)),
                                                             (10, (1, 1))]:
      size = size_mb * 1024 * 1024
      splits = list(
          filtered_mongo_source.split(
              start_position=None, stop_position=None,
              desired_bundle_size=size))

      if self.bucket_auto:
        # Note: if filter matches no docs - one split covers entire range
        self.assertEqual(len(splits), bucket_auto_count)
      else:
        # Note: splitVector mode does not respect filter
        self.assertEqual(len(splits), split_vector_count)
      reference_info = (
          filtered_mongo_source,
          # range to match no documents:
          _ObjectIdHelper.increment_id(self._docs[-1]['_id'], 1),
          _ObjectIdHelper.increment_id(self._docs[-1]['_id'], 2),
      )
      sources_info = ([
          (split.source, split.start_position, split.stop_position)
          for split in splits
      ])
      source_test_utils.assert_sources_equal_reference_source(
          reference_info, sources_info)
    def test_increment_id(self):
        test_cases = [
            (objectid.ObjectId('000000000000000100000000'),
             objectid.ObjectId('0000000000000000ffffffff')),
            (objectid.ObjectId('000000010000000000000000'),
             objectid.ObjectId('00000000ffffffffffffffff')),
        ]
        for (first, second) in test_cases:
            self.assertEqual(second, _ObjectIdHelper.increment_id(first, -1))
            self.assertEqual(first, _ObjectIdHelper.increment_id(second, 1))

        for _ in range(100):
            id = objectid.ObjectId()
            self.assertLess(id, _ObjectIdHelper.increment_id(id, 1))
            self.assertGreater(id, _ObjectIdHelper.increment_id(id, -1))
Esempio n. 3
0
    def test_increment_id(self):
        test_cases = [
            (
                objectid.ObjectId("000000000000000100000000"),
                objectid.ObjectId("0000000000000000ffffffff"),
            ),
            (
                objectid.ObjectId("000000010000000000000000"),
                objectid.ObjectId("00000000ffffffffffffffff"),
            ),
        ]
        for first, second in test_cases:
            self.assertEqual(second, _ObjectIdHelper.increment_id(first, -1))
            self.assertEqual(first, _ObjectIdHelper.increment_id(second, 1))

        for _ in range(100):
            _id = objectid.ObjectId()
            self.assertLess(_id, _ObjectIdHelper.increment_id(_id, 1))
            self.assertGreater(_id, _ObjectIdHelper.increment_id(_id, -1))
 def test_read(self, mock_client):
     mock_tracker = mock.MagicMock()
     test_cases = [
         {
             # range covers the first(inclusive) to third(exclusive) documents
             'start': self._ids[0],
             'stop': self._ids[2],
             'expected': self._docs[0:2]
         },
         {
             # range covers from the first to the third documents
             'start': _ObjectIdHelper.int_to_id(0),  # smallest possible id
             'stop': self._ids[2],
             'expected': self._docs[0:2]
         },
         {
             # range covers from the third to last documents
             'start': self._ids[2],
             'stop':
             _ObjectIdHelper.int_to_id(2**96 - 1),  # largest possible id
             'expected': self._docs[2:]
         },
         {
             # range covers all documents
             'start': _ObjectIdHelper.int_to_id(0),
             'stop': _ObjectIdHelper.int_to_id(2**96 - 1),
             'expected': self._docs
         },
         {
             # range doesn't include any document
             'start': _ObjectIdHelper.increment_id(self._ids[2], 1),
             'stop': _ObjectIdHelper.increment_id(self._ids[3], -1),
             'expected': []
         },
     ]
     mock_client.return_value = _MockMongoClient(self._docs)
     for case in test_cases:
         mock_tracker.start_position.return_value = case['start']
         mock_tracker.stop_position.return_value = case['stop']
         result = list(self.mongo_source.read(mock_tracker))
         self.assertListEqual(case['expected'], result)
Esempio n. 5
0
 def test_split_single_document(self, mock_client):
   mock_client.return_value = _MockMongoClient(self._docs[0:1])
   for size_mb in [1, 5]:
     size = size_mb * 1024 * 1024
     splits = list(
         self.mongo_source.split(
             start_position=None, stop_position=None,
             desired_bundle_size=size))
     self.assertEqual(len(splits), 1)
     self.assertEqual(splits[0].start_position, self._docs[0]['_id'])
     self.assertEqual(
         splits[0].stop_position,
         _ObjectIdHelper.increment_id(self._docs[0]['_id'], 1))
Esempio n. 6
0
    def _increment_id(
        self,
        _id: Union[ObjectId, int, str],
        inc: int,
    ) -> Union[ObjectId, int, str]:
        """Helper method to increment `_id` of different types."""

        if isinstance(_id, ObjectId):
            return _ObjectIdHelper.increment_id(_id, inc)

        if isinstance(_id, int):
            return _id + inc

        if isinstance(_id, str):
            index = self._ids.index(_id) + inc
            if index <= 0:
                return self._ids[0]
            if index >= len(self._ids):
                return self._ids[-1]
            return self._ids[index]