Ejemplo n.º 1
0
 def get_progress(self):
   last_group_start = self._range_tracker.last_group_start()
   if last_group_start is None:
     return None
   reader_position = iobase.ReaderPosition(
       shuffle_position=base64.urlsafe_b64encode(last_group_start))
   return iobase.ReaderProgress(position=reader_position)
Ejemplo n.º 2
0
    def run_update_stop_position(self, start_offset, end_offset, stop_offset,
                                 records_to_read, file_path):
        source = fileio.TextFileSource(file_path, start_offset, end_offset)

        records_of_first_split = ''

        with source.reader() as reader:
            reader_iter = iter(reader)
            i = 0

            try:
                while i < records_to_read:
                    records_of_first_split += next(reader_iter)
                    i += 1
            except StopIteration:
                # Invalid case, given source does not contain this many records.
                return

            last_record_start_after_reading = reader.range_tracker.last_record_start

            if stop_offset <= last_record_start_after_reading:
                expected_split_response = None
            elif stop_offset == start_offset or stop_offset == end_offset:
                expected_split_response = None
            elif records_to_read == 0:
                expected_split_response = None  # unstarted
            else:
                expected_split_response = iobase.DynamicSplitResultWithPosition(
                    stop_position=iobase.ReaderPosition(
                        byte_offset=stop_offset))

            split_response = self.try_splitting_reader_at(
                reader,
                iobase.DynamicSplitRequest(progress=iobase.ReaderProgress(
                    iobase.ReaderPosition(byte_offset=stop_offset))),
                expected_split_response)

            # Reading remaining records from the updated reader.
            for line in reader:
                records_of_first_split += line

        if split_response is not None:
            # Total contents received by reading the two splits should be equal to the
            # result obtained by reading the original source.
            records_of_original = ''
            records_of_second_split = ''

            with source.reader() as original_reader:
                for line in original_reader:
                    records_of_original += line

            new_source = fileio.TextFileSource(
                file_path, split_response.stop_position.byte_offset,
                end_offset)
            with new_source.reader() as reader:
                for line in reader:
                    records_of_second_split += line

            self.assertEqual(records_of_original,
                             records_of_first_split + records_of_second_split)
Ejemplo n.º 3
0
def cloud_progress_to_reader_progress(cloud_progress):
    reader_position = None
    if cloud_progress.position is not None:
        reader_position = cloud_position_to_reader_position(
            cloud_progress.position)
    return iobase.ReaderProgress(reader_position,
                                 cloud_progress.percentComplete,
                                 cloud_progress.remainingTime)
Ejemplo n.º 4
0
    def test_reader_progress_to_cloud_progress_percent_complete(self):
        reader_progress = iobase.ReaderProgress(percent_complete=0.123)

        cloud_progress = apiclient.reader_progress_to_cloud_progress(
            reader_progress)
        self.assertIsNotNone(cloud_progress)
        self.assertIsInstance(cloud_progress, dataflow.ApproximateProgress)
        self.assertIsNotNone(cloud_progress.percentComplete)
        self.assertEquals(0.123, cloud_progress.percentComplete)
Ejemplo n.º 5
0
    def test_update_stop_position_percent_complete_for_position(self):
        lines = ['aaaa', 'bbbb', 'cccc', 'dddd', 'eeee']
        source = fileio.TextFileSource(
            file_path=self.create_temp_file('\n'.join(lines)))
        with source.reader() as reader:
            # Reading two lines
            reader_iter = iter(reader)
            next(reader_iter)
            next(reader_iter)
            next(reader_iter)

            # Splitting at end of the range should be unsuccessful
            self.try_splitting_reader_at(
                reader,
                iobase.DynamicSplitRequest(
                    iobase.ReaderProgress(position=iobase.ReaderPosition(
                        byte_offset=0))), None)
            self.try_splitting_reader_at(
                reader,
                iobase.DynamicSplitRequest(
                    iobase.ReaderProgress(position=iobase.ReaderPosition(
                        byte_offset=25))), None)

            # Splitting at positions on or before start offset of the last record
            self.try_splitting_reader_at(
                reader,
                iobase.DynamicSplitRequest(
                    iobase.ReaderProgress(position=iobase.ReaderPosition(
                        byte_offset=5))), None)
            self.try_splitting_reader_at(
                reader,
                iobase.DynamicSplitRequest(
                    iobase.ReaderProgress(position=iobase.ReaderPosition(
                        byte_offset=10))), None)

            # Splitting at a position after the start offset of the last record should
            # be successful
            self.try_splitting_reader_at(
                reader,
                iobase.DynamicSplitRequest(
                    iobase.ReaderProgress(position=iobase.ReaderPosition(
                        byte_offset=15))),
                iobase.DynamicSplitResultWithPosition(
                    iobase.ReaderPosition(byte_offset=15)))
Ejemplo n.º 6
0
    def test_reader_progress_to_cloud_progress_position(self):
        reader_position = iobase.ReaderPosition(byte_offset=9999)
        reader_progress = iobase.ReaderProgress(position=reader_position)

        cloud_progress = apiclient.reader_progress_to_cloud_progress(
            reader_progress)
        self.assertIsNotNone(cloud_progress)
        self.assertIsInstance(cloud_progress, dataflow.ApproximateProgress)
        self.assertIsNotNone(cloud_progress.position)
        self.assertIsInstance(cloud_progress.position, dataflow.Position)
        self.assertEquals(9999, cloud_progress.position.byteOffset)
Ejemplo n.º 7
0
  def test_dynamic_splitting_with_range(self):
    source = GroupedShuffleSource(
        config_bytes='not used',
        coder=Base64Coder(),
        start_position=base64.urlsafe_b64encode('0'),
        end_position=base64.urlsafe_b64encode('3'))

    chunks = [TEST_CHUNK1, TEST_CHUNK2]

    with source.reader(test_reader=FakeShuffleReader(chunks)) as reader:
      reader_iter = iter(reader)
      next(reader_iter)

      # Cannot split if split request is out of range
      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(iobase.ReaderProgress(
              position=iobase.ReaderPosition(
                  shuffle_position=base64.urlsafe_b64encode('0')))),
          None)
      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(iobase.ReaderProgress(
              position=iobase.ReaderPosition(
                  shuffle_position=base64.urlsafe_b64encode('3')))),
          None)
      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(iobase.ReaderProgress(
              position=iobase.ReaderPosition(
                  shuffle_position=base64.urlsafe_b64encode('4')))),
          None)

      # Successful split.
      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(iobase.ReaderProgress(
              position=iobase.ReaderPosition(
                  shuffle_position=base64.urlsafe_b64encode('2')))),
          iobase.DynamicSplitResultWithPosition(iobase.ReaderPosition(
              shuffle_position=base64.urlsafe_b64encode('2'))))
Ejemplo n.º 8
0
  def test_dynamic_splitting(self):
    source = GroupedShuffleSource(
        config_bytes='not used', coder=Base64Coder())

    chunks = [TEST_CHUNK1, TEST_CHUNK2]

    with source.reader(test_reader=FakeShuffleReader(chunks)) as reader:
      # Cannot split an unstarted reader
      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(iobase.ReaderProgress(
              position=iobase.ReaderPosition(
                  shuffle_position=base64.urlsafe_b64encode('1')))),
          None)

      reader_iter = iter(reader)
      next(reader_iter)
      next(reader_iter)
      # Cannot split since the provided split position is smaller than or equal
      # to the current position '1'.
      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(iobase.ReaderProgress(
              position=iobase.ReaderPosition(
                  shuffle_position=base64.urlsafe_b64encode('0')))),
          None)
      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(iobase.ReaderProgress(
              position=iobase.ReaderPosition(
                  shuffle_position=base64.urlsafe_b64encode('1')))),
          None)

      # Successful split.
      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(iobase.ReaderProgress(
              position=iobase.ReaderPosition(
                  shuffle_position=base64.urlsafe_b64encode('3')))),
          iobase.DynamicSplitResultWithPosition(iobase.ReaderPosition(
              shuffle_position=base64.urlsafe_b64encode('3'))))
Ejemplo n.º 9
0
    def get_progress(self):
        if (self.current_index >= self.source.end_index
                or self.source.start_index >= self.source.end_index):
            percent_complete = 1
        elif self.current_index == self.source.start_index:
            percent_complete = 0
        else:
            percent_complete = (
                float(self.current_index - self.source.start_index) /
                (self.source.end_index - self.source.start_index))

        return iobase.ReaderProgress(percent_complete=percent_complete)
Ejemplo n.º 10
0
  def get_progress(self):
    if self.current_reader_index < 0 or self.current_reader is None:
      return

    index = self.current_reader_index
    inner_position = None

    sub_reader_progress = self.current_reader.get_progress()
    if sub_reader_progress is not None:
      sub_reader_position = sub_reader_progress.position
      if sub_reader_position is not None:
        inner_position = sub_reader_position
      else:
        raise ValueError('A concat source should only be created with '
                         'sub-sources that create readers that perform '
                         'progress reporting and dynamic work rebalancing '
                         'using positions')
      return iobase.ReaderProgress(
          position=iobase.ReaderPosition(
              concat_position=iobase.ConcatPosition(index, inner_position)))
Ejemplo n.º 11
0
  def test_in_memory_source_dynamic_split(self):
    source = inmemory.InMemorySource([10, 20, 30, 40, 50, 60],
                                     coder=FakeCoder())

    # Unstarted reader
    with source.reader() as reader:
      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(
              iobase.ReaderProgress(
                  position=iobase.ReaderPosition(record_index=2))),
          None)

    # Proposed split position out of range
    with source.reader() as reader:
      reader_iter = iter(reader)
      next(reader_iter)
      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(
              iobase.ReaderProgress(
                  position=iobase.ReaderPosition(record_index=-1))),
          None)
      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(
              iobase.ReaderProgress(
                  position=iobase.ReaderPosition(record_index=10))),
          None)

    # Already read past proposed split position
    with source.reader() as reader:
      reader_iter = iter(reader)
      next(reader_iter)
      next(reader_iter)
      next(reader_iter)
      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(
              iobase.ReaderProgress(
                  position=iobase.ReaderPosition(record_index=1))),
          None)

      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(
              iobase.ReaderProgress(
                  position=iobase.ReaderPosition(record_index=2))),
          None)

    # Successful split
    with source.reader() as reader:
      reader_iter = iter(reader)
      next(reader_iter)
      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(
              iobase.ReaderProgress(
                  position=iobase.ReaderPosition(record_index=4))),
          iobase.DynamicSplitResultWithPosition(
              stop_position=iobase.ReaderPosition(record_index=4)))

      self.try_splitting_reader_at(
          reader,
          iobase.DynamicSplitRequest(
              iobase.ReaderProgress(
                  position=iobase.ReaderPosition(record_index=2))),
          iobase.DynamicSplitResultWithPosition(
              stop_position=iobase.ReaderPosition(record_index=2)))
Ejemplo n.º 12
0
    def get_progress(self):
        if self._current_index is None:
            return None

        return iobase.ReaderProgress(position=iobase.ReaderPosition(
            record_index=self._current_index))
Ejemplo n.º 13
0
 def get_progress(self):
     return iobase.ReaderProgress(position=iobase.ReaderPosition(
         byte_offset=self.range_tracker.last_record_start))
 def get_progress(self):
     return iobase.ReaderProgress(position=iobase.ReaderPosition(
         record_index=self.current_index))