def test_first_record_non_split_point(self): tracker = range_trackers.GroupedShuffleRangeTracker( self.bytes_to_position([3, 0, 0]), self.bytes_to_position([5, 0, 0])) with self.assertRaises(ValueError): tracker.try_return_record_at(False, self.bytes_to_position([3, 4, 5]))
def test_try_return_record_before_start(self): tracker = range_trackers.GroupedShuffleRangeTracker( self.bytes_to_position([3, 0, 0]), self.bytes_to_position([5, 0, 0])) with self.assertRaises(ValueError): tracker.try_return_record_at(True, self.bytes_to_position([1, 2, 3]))
def test_try_return_record_finite_range(self): tracker = range_trackers.GroupedShuffleRangeTracker( self.bytes_to_position([1, 0, 0]), self.bytes_to_position([5, 0, 0])) self.assertTrue( tracker.try_return_record_at(True, self.bytes_to_position([1, 2, 3]))) self.assertTrue( tracker.try_return_record_at(True, self.bytes_to_position([1, 2, 5]))) self.assertTrue( tracker.try_return_record_at(True, self.bytes_to_position([3, 6, 8, 10]))) self.assertTrue( tracker.try_return_record_at( True, self.bytes_to_position([4, 255, 255, 255]))) # Should fail for positions that are lexicographically equal to or larger # than the defined stop position. self.assertFalse( copy.copy(tracker).try_return_record_at( True, self.bytes_to_position([5, 0, 0]))) self.assertFalse( copy.copy(tracker).try_return_record_at( True, self.bytes_to_position([5, 0, 1]))) self.assertFalse( copy.copy(tracker).try_return_record_at( True, self.bytes_to_position([6, 0, 0])))
def test_try_return_record_with_non_split_point(self): tracker = range_trackers.GroupedShuffleRangeTracker( self.bytes_to_position([1, 0, 0]), self.bytes_to_position([5, 0, 0])) self.assertTrue( tracker.try_return_record_at(True, self.bytes_to_position([1, 2, 3]))) self.assertTrue( tracker.try_return_record_at(False, self.bytes_to_position([1, 2, 3]))) self.assertTrue( tracker.try_return_record_at(False, self.bytes_to_position([1, 2, 3]))) self.assertTrue( tracker.try_return_record_at(True, self.bytes_to_position([1, 2, 5]))) self.assertTrue( tracker.try_return_record_at(False, self.bytes_to_position([1, 2, 5]))) self.assertTrue( tracker.try_return_record_at(True, self.bytes_to_position([3, 6, 8, 10]))) self.assertTrue( tracker.try_return_record_at( True, self.bytes_to_position([4, 255, 255, 255])))
def test_try_test_split_at_position_finite_range(self): tracker = range_trackers.GroupedShuffleRangeTracker( self.bytes_to_position([0, 0, 0]), self.bytes_to_position([10, 20, 30])) # Should fail before first record is returned. self.assertFalse(tracker.try_split(self.bytes_to_position([0, 0, 0]))) self.assertFalse( tracker.try_split(self.bytes_to_position([3, 4, 5, 6]))) self.assertTrue(tracker.try_claim(self.bytes_to_position([1, 2, 3]))) # Should now succeed. self.assertTrue(tracker.try_split(self.bytes_to_position([3, 4, 5, 6]))) # Should not split at same or larger position. self.assertFalse( tracker.try_split(self.bytes_to_position([3, 4, 5, 6]))) self.assertFalse( tracker.try_split(self.bytes_to_position([3, 4, 5, 6, 7]))) self.assertFalse( tracker.try_split(self.bytes_to_position([4, 5, 6, 7]))) # Should split at smaller position. self.assertTrue(tracker.try_split(self.bytes_to_position([3, 2, 1]))) # But not at a position at or before last returned record. self.assertFalse(tracker.try_split(self.bytes_to_position([1, 2, 3]))) self.assertTrue(tracker.try_claim(self.bytes_to_position([2, 3, 4]))) self.assertTrue(tracker.try_claim(self.bytes_to_position([3, 2, 0]))) self.assertFalse(tracker.try_claim(self.bytes_to_position([3, 2, 1])))
def test_try_split_at_position_infinite_range(self): tracker = range_trackers.GroupedShuffleRangeTracker('', '') # Should fail before first record is returned. self.assertFalse( tracker.try_split(self.bytes_to_position([3, 4, 5, 6]))) self.assertTrue(tracker.try_claim(self.bytes_to_position([1, 2, 3]))) # Should now succeed. self.assertIsNotNone( tracker.try_split(self.bytes_to_position([3, 4, 5, 6]))) # Should not split at same or larger position. self.assertIsNone( tracker.try_split(self.bytes_to_position([3, 4, 5, 6]))) self.assertIsNone( tracker.try_split(self.bytes_to_position([3, 4, 5, 6, 7]))) self.assertIsNone( tracker.try_split(self.bytes_to_position([4, 5, 6, 7]))) # Should split at smaller position. self.assertIsNotNone( tracker.try_split(self.bytes_to_position([3, 2, 1]))) self.assertTrue(tracker.try_claim(self.bytes_to_position([2, 3, 4]))) # Should not split at a position we're already past. self.assertIsNone(tracker.try_split(self.bytes_to_position([2, 3, 4]))) self.assertIsNone(tracker.try_split(self.bytes_to_position([2, 3, 3]))) self.assertTrue(tracker.try_claim(self.bytes_to_position([3, 2, 0]))) self.assertFalse(tracker.try_claim(self.bytes_to_position([3, 2, 1])))
def test_try_return_identical_positions(self): tracker = range_trackers.GroupedShuffleRangeTracker( self.bytes_to_position([3, 0, 0]), self.bytes_to_position([5, 0, 0])) self.assertTrue(tracker.try_claim(self.bytes_to_position([3, 4, 5]))) with self.assertRaises(ValueError): tracker.try_claim(self.bytes_to_position([3, 4, 5]))
def test_non_split_point_record_with_different_position(self): tracker = range_trackers.GroupedShuffleRangeTracker( self.bytes_to_position([3, 0, 0]), self.bytes_to_position([5, 0, 0])) self.assertTrue(tracker.try_claim(self.bytes_to_position([3, 4, 5]))) with self.assertRaises(ValueError): tracker.set_current_position(self.bytes_to_position([3, 4, 6]))
def test_try_return_non_monotonic(self): tracker = range_trackers.GroupedShuffleRangeTracker( self.bytes_to_position([3, 0, 0]), self.bytes_to_position([5, 0, 0])) tracker.try_return_record_at(True, self.bytes_to_position([3, 4, 5])) tracker.try_return_record_at(True, self.bytes_to_position([3, 4, 6])) with self.assertRaises(ValueError): tracker.try_return_record_at(True, self.bytes_to_position([3, 2, 1]))
def test_try_return_record_in_infinite_range(self): tracker = range_trackers.GroupedShuffleRangeTracker('', '') self.assertTrue( tracker.try_return_record_at(True, self.bytes_to_position([1, 2, 3]))) self.assertTrue( tracker.try_return_record_at(True, self.bytes_to_position([1, 2, 5]))) self.assertTrue( tracker.try_return_record_at(True, self.bytes_to_position([3, 6, 8, 10])))
def __init__(self, shuffle_source, reader=None): super(GroupedShuffleReader, self).__init__(shuffle_source, reader) self._range_tracker = range_trackers.GroupedShuffleRangeTracker( decoded_start_pos=shuffle_source.start_position, decoded_stop_pos=shuffle_source.end_position)