Esempio n. 1
0
 def test_try_claim(self):
   tracker = range_trackers.UnsplittableRangeTracker(
       range_trackers.OffsetRangeTracker(100, 200))
   self.assertTrue(tracker.try_claim(110))
   self.assertTrue(tracker.try_claim(140))
   self.assertTrue(tracker.try_claim(183))
   self.assertFalse(tracker.try_claim(210))
Esempio n. 2
0
 def get_range_tracker(self, start_position, stop_position):
   if start_position is None:
     start_position = 0
   if stop_position is None:
     stop_position = self._num_records
   tracker = range_trackers.OffsetRangeTracker(start_position, stop_position)
   if self._dynamic_splitting == 'none':
     tracker = range_trackers.UnsplittableRangeTracker(tracker)
   return tracker
Esempio n. 3
0
  def test_try_split_fails(self):
    tracker = range_trackers.UnsplittableRangeTracker(
        range_trackers.OffsetRangeTracker(100, 200))
    self.assertTrue(tracker.try_claim(110))
    # Out of range
    self.assertFalse(tracker.try_split(109))
    self.assertFalse(tracker.try_split(210))

    # Within range. But splitting is still unsuccessful.
    self.assertFalse(copy.copy(tracker).try_split(111))
    self.assertFalse(copy.copy(tracker).try_split(130))
    self.assertFalse(copy.copy(tracker).try_split(199))
Esempio n. 4
0
    def get_range_tracker(self, start_position, stop_position):
        """Implements :class:`~apache_beam.io.iobase.BoundedSource.get_range_tracker`"""
        if start_position is None:
            start_position = 0
        if stop_position is None:
            stop_position = range_trackers.OffsetRangeTracker.OFFSET_INFINITY

        # Use an unsplittable range tracker. This means that a collection can
        # only be read sequentially for now.
        range_tracker = range_trackers.OffsetRangeTracker(start_position, stop_position)
        range_tracker = range_trackers.UnsplittableRangeTracker(range_tracker)

        return range_tracker
Esempio n. 5
0
    def get_range_tracker(self, start_position=0, stop_position=None):
        """
        Implement the method `apache_beam.io.iobase.BoundedSource.get_range_tracker`.

        `BillboardSource` uses an unsplittable range tracker, which means that a
        collection can only be read sequentially. However, the range tracker
        must still be defined.
        """
        self.logger.debug('Creating the range tracker.')
        stop_position = range_trackers.OffsetRangeTracker.OFFSET_INFINITY
        range_tracker = range_trackers.OffsetRangeTracker(0, stop_position)
        range_tracker = range_trackers.UnsplittableRangeTracker(range_tracker)

        return range_tracker
Esempio n. 6
0
    def get_range_tracker(self, start_position=0, stop_position=None):
        """
        Implements class: `apache_beam.io.iobase.BoundedSource.get_range_tracker`

        This class uses an unsplittable range tracker. This means that a
        collection can only be read sequentially. However, the ranger must be
        defined.

        """
        stop_position = range_trackers.OffsetRangeTracker.OFFSET_INFINITY
        range_tracker = range_trackers.OffsetRangeTracker(0, stop_position)
        range_tracker = range_trackers.UnsplittableRangeTracker(range_tracker)

        return range_tracker
Esempio n. 7
0
    def get_range_tracker(self, start_position, stop_position):
        if start_position is None:
            start_position = self._start_offset
        if stop_position is None:
            # If file is unsplittable we choose OFFSET_INFINITY as the default end
            # offset so that all data of the source gets read. Choosing size of the
            # file as end offset will be wrong for certain unsplittable source, for
            # e.g., compressed sources.
            stop_position = (self._stop_offset if self._splittable else
                             range_trackers.OffsetRangeTracker.OFFSET_INFINITY)

        range_tracker = range_trackers.OffsetRangeTracker(
            start_position, stop_position)
        if not self._splittable:
            range_tracker = range_trackers.UnsplittableRangeTracker(
                range_tracker)

        return range_tracker