def get_progress(self): last_group_start = self._range_tracker.last_group_start() if last_group_start is None: return None reader_position = iobase.ReaderPosition( shuffle_position=base64.urlsafe_b64encode(last_group_start)) return iobase.ReaderProgress(position=reader_position)
def run_update_stop_position(self, start_offset, end_offset, stop_offset, records_to_read, file_path): source = fileio.TextFileSource(file_path, start_offset, end_offset) records_of_first_split = '' with source.reader() as reader: reader_iter = iter(reader) i = 0 try: while i < records_to_read: records_of_first_split += next(reader_iter) i += 1 except StopIteration: # Invalid case, given source does not contain this many records. return last_record_start_after_reading = reader.range_tracker.last_record_start if stop_offset <= last_record_start_after_reading: expected_split_response = None elif stop_offset == start_offset or stop_offset == end_offset: expected_split_response = None elif records_to_read == 0: expected_split_response = None # unstarted else: expected_split_response = iobase.DynamicSplitResultWithPosition( stop_position=iobase.ReaderPosition( byte_offset=stop_offset)) split_response = self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest(progress=iobase.ReaderProgress( iobase.ReaderPosition(byte_offset=stop_offset))), expected_split_response) # Reading remaining records from the updated reader. for line in reader: records_of_first_split += line if split_response is not None: # Total contents received by reading the two splits should be equal to the # result obtained by reading the original source. records_of_original = '' records_of_second_split = '' with source.reader() as original_reader: for line in original_reader: records_of_original += line new_source = fileio.TextFileSource( file_path, split_response.stop_position.byte_offset, end_offset) with new_source.reader() as reader: for line in reader: records_of_second_split += line self.assertEqual(records_of_original, records_of_first_split + records_of_second_split)
def cloud_progress_to_reader_progress(cloud_progress): reader_position = None if cloud_progress.position is not None: reader_position = cloud_position_to_reader_position( cloud_progress.position) return iobase.ReaderProgress(reader_position, cloud_progress.percentComplete, cloud_progress.remainingTime)
def test_reader_progress_to_cloud_progress_percent_complete(self): reader_progress = iobase.ReaderProgress(percent_complete=0.123) cloud_progress = apiclient.reader_progress_to_cloud_progress( reader_progress) self.assertIsNotNone(cloud_progress) self.assertIsInstance(cloud_progress, dataflow.ApproximateProgress) self.assertIsNotNone(cloud_progress.percentComplete) self.assertEquals(0.123, cloud_progress.percentComplete)
def test_update_stop_position_percent_complete_for_position(self): lines = ['aaaa', 'bbbb', 'cccc', 'dddd', 'eeee'] source = fileio.TextFileSource( file_path=self.create_temp_file('\n'.join(lines))) with source.reader() as reader: # Reading two lines reader_iter = iter(reader) next(reader_iter) next(reader_iter) next(reader_iter) # Splitting at end of the range should be unsuccessful self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest( iobase.ReaderProgress(position=iobase.ReaderPosition( byte_offset=0))), None) self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest( iobase.ReaderProgress(position=iobase.ReaderPosition( byte_offset=25))), None) # Splitting at positions on or before start offset of the last record self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest( iobase.ReaderProgress(position=iobase.ReaderPosition( byte_offset=5))), None) self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest( iobase.ReaderProgress(position=iobase.ReaderPosition( byte_offset=10))), None) # Splitting at a position after the start offset of the last record should # be successful self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest( iobase.ReaderProgress(position=iobase.ReaderPosition( byte_offset=15))), iobase.DynamicSplitResultWithPosition( iobase.ReaderPosition(byte_offset=15)))
def test_reader_progress_to_cloud_progress_position(self): reader_position = iobase.ReaderPosition(byte_offset=9999) reader_progress = iobase.ReaderProgress(position=reader_position) cloud_progress = apiclient.reader_progress_to_cloud_progress( reader_progress) self.assertIsNotNone(cloud_progress) self.assertIsInstance(cloud_progress, dataflow.ApproximateProgress) self.assertIsNotNone(cloud_progress.position) self.assertIsInstance(cloud_progress.position, dataflow.Position) self.assertEquals(9999, cloud_progress.position.byteOffset)
def test_dynamic_splitting_with_range(self): source = GroupedShuffleSource( config_bytes='not used', coder=Base64Coder(), start_position=base64.urlsafe_b64encode('0'), end_position=base64.urlsafe_b64encode('3')) chunks = [TEST_CHUNK1, TEST_CHUNK2] with source.reader(test_reader=FakeShuffleReader(chunks)) as reader: reader_iter = iter(reader) next(reader_iter) # Cannot split if split request is out of range self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest(iobase.ReaderProgress( position=iobase.ReaderPosition( shuffle_position=base64.urlsafe_b64encode('0')))), None) self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest(iobase.ReaderProgress( position=iobase.ReaderPosition( shuffle_position=base64.urlsafe_b64encode('3')))), None) self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest(iobase.ReaderProgress( position=iobase.ReaderPosition( shuffle_position=base64.urlsafe_b64encode('4')))), None) # Successful split. self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest(iobase.ReaderProgress( position=iobase.ReaderPosition( shuffle_position=base64.urlsafe_b64encode('2')))), iobase.DynamicSplitResultWithPosition(iobase.ReaderPosition( shuffle_position=base64.urlsafe_b64encode('2'))))
def test_dynamic_splitting(self): source = GroupedShuffleSource( config_bytes='not used', coder=Base64Coder()) chunks = [TEST_CHUNK1, TEST_CHUNK2] with source.reader(test_reader=FakeShuffleReader(chunks)) as reader: # Cannot split an unstarted reader self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest(iobase.ReaderProgress( position=iobase.ReaderPosition( shuffle_position=base64.urlsafe_b64encode('1')))), None) reader_iter = iter(reader) next(reader_iter) next(reader_iter) # Cannot split since the provided split position is smaller than or equal # to the current position '1'. self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest(iobase.ReaderProgress( position=iobase.ReaderPosition( shuffle_position=base64.urlsafe_b64encode('0')))), None) self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest(iobase.ReaderProgress( position=iobase.ReaderPosition( shuffle_position=base64.urlsafe_b64encode('1')))), None) # Successful split. self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest(iobase.ReaderProgress( position=iobase.ReaderPosition( shuffle_position=base64.urlsafe_b64encode('3')))), iobase.DynamicSplitResultWithPosition(iobase.ReaderPosition( shuffle_position=base64.urlsafe_b64encode('3'))))
def get_progress(self): if (self.current_index >= self.source.end_index or self.source.start_index >= self.source.end_index): percent_complete = 1 elif self.current_index == self.source.start_index: percent_complete = 0 else: percent_complete = ( float(self.current_index - self.source.start_index) / (self.source.end_index - self.source.start_index)) return iobase.ReaderProgress(percent_complete=percent_complete)
def get_progress(self): if self.current_reader_index < 0 or self.current_reader is None: return index = self.current_reader_index inner_position = None sub_reader_progress = self.current_reader.get_progress() if sub_reader_progress is not None: sub_reader_position = sub_reader_progress.position if sub_reader_position is not None: inner_position = sub_reader_position else: raise ValueError('A concat source should only be created with ' 'sub-sources that create readers that perform ' 'progress reporting and dynamic work rebalancing ' 'using positions') return iobase.ReaderProgress( position=iobase.ReaderPosition( concat_position=iobase.ConcatPosition(index, inner_position)))
def test_in_memory_source_dynamic_split(self): source = inmemory.InMemorySource([10, 20, 30, 40, 50, 60], coder=FakeCoder()) # Unstarted reader with source.reader() as reader: self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest( iobase.ReaderProgress( position=iobase.ReaderPosition(record_index=2))), None) # Proposed split position out of range with source.reader() as reader: reader_iter = iter(reader) next(reader_iter) self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest( iobase.ReaderProgress( position=iobase.ReaderPosition(record_index=-1))), None) self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest( iobase.ReaderProgress( position=iobase.ReaderPosition(record_index=10))), None) # Already read past proposed split position with source.reader() as reader: reader_iter = iter(reader) next(reader_iter) next(reader_iter) next(reader_iter) self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest( iobase.ReaderProgress( position=iobase.ReaderPosition(record_index=1))), None) self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest( iobase.ReaderProgress( position=iobase.ReaderPosition(record_index=2))), None) # Successful split with source.reader() as reader: reader_iter = iter(reader) next(reader_iter) self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest( iobase.ReaderProgress( position=iobase.ReaderPosition(record_index=4))), iobase.DynamicSplitResultWithPosition( stop_position=iobase.ReaderPosition(record_index=4))) self.try_splitting_reader_at( reader, iobase.DynamicSplitRequest( iobase.ReaderProgress( position=iobase.ReaderPosition(record_index=2))), iobase.DynamicSplitResultWithPosition( stop_position=iobase.ReaderPosition(record_index=2)))
def get_progress(self): if self._current_index is None: return None return iobase.ReaderProgress(position=iobase.ReaderPosition( record_index=self._current_index))
def get_progress(self): return iobase.ReaderProgress(position=iobase.ReaderPosition( byte_offset=self.range_tracker.last_record_start))
def get_progress(self): return iobase.ReaderProgress(position=iobase.ReaderPosition( record_index=self.current_index))