def try_split(self, bundle_split_request): split_response = beam_fn_api_pb2.ProcessBundleSplitResponse() with self.splitting_lock: for op in self.ops.values(): if isinstance(op, DataInputOperation): desired_split = bundle_split_request.desired_splits.get( op.transform_id) if desired_split: split = op.try_split(desired_split.fraction_of_remainder, desired_split.estimated_input_elements) if split: (primary_end, element_primary, element_residual, residual_start, ) = split if element_primary: split_response.primary_roots.add().CopyFrom( self.delayed_bundle_application( *element_primary).application) if element_residual: split_response.residual_roots.add().CopyFrom( self.delayed_bundle_application(*element_residual)) split_response.channel_splits.extend([ beam_fn_api_pb2.ProcessBundleSplitResponse.ChannelSplit( transform_id=op.transform_id, last_primary_element=primary_end, first_residual_element=residual_start)]) return split_response
def test_inactive_bundle_processor_returns_empty_split_response(self): bundle_processor = mock.MagicMock() bundle_processor_cache = BundleProcessorCache(None, None, {}) bundle_processor_cache.activate('instruction_id') worker = SdkWorker(bundle_processor_cache) split_request = beam_fn_api_pb2.InstructionRequest( instruction_id='split_instruction_id', process_bundle_split=beam_fn_api_pb2.ProcessBundleSplitRequest( instruction_id='instruction_id')) self.assertEqual( worker.do_instruction(split_request), beam_fn_api_pb2.InstructionResponse( instruction_id='split_instruction_id', process_bundle_split=beam_fn_api_pb2.ProcessBundleSplitResponse())) # Add a mock bundle processor as if it was running before it's released bundle_processor_cache.active_bundle_processors['instruction_id'] = ( 'descriptor_id', bundle_processor) bundle_processor_cache.release('instruction_id') self.assertEqual( worker.do_instruction(split_request), beam_fn_api_pb2.InstructionResponse( instruction_id='split_instruction_id', process_bundle_split=beam_fn_api_pb2.ProcessBundleSplitResponse()))
def process_bundle_split( self, request, # type: beam_fn_api_pb2.ProcessBundleSplitRequest instruction_id # type: str ): # type: (...) -> beam_fn_api_pb2.InstructionResponse try: processor = self.bundle_processor_cache.lookup(request.instruction_id) except RuntimeError: return beam_fn_api_pb2.InstructionResponse( instruction_id=instruction_id, error=traceback.format_exc()) # Return an empty response if we aren't running. This can happen # if the ProcessBundleRequest has not started or already finished. process_bundle_split = ( processor.try_split(request) if processor else beam_fn_api_pb2.ProcessBundleSplitResponse()) return beam_fn_api_pb2.InstructionResponse( instruction_id=instruction_id, process_bundle_split=process_bundle_split)
def _generate_splits_for_testing( self, split_manager, inputs, # type: Mapping[str, execution.PartitionableBuffer] process_bundle_id): # type: (...) -> List[beam_fn_api_pb2.ProcessBundleSplitResponse] split_results = [ ] # type: List[beam_fn_api_pb2.ProcessBundleSplitResponse] read_transform_id, buffer_data = only_element(inputs.items()) byte_stream = b''.join(buffer_data) num_elements = len( list( self.bundle_context_manager.get_input_coder_impl( read_transform_id).decode_all(byte_stream))) # Start the split manager in case it wants to set any breakpoints. split_manager_generator = split_manager(num_elements) try: split_fraction = next(split_manager_generator) done = False except StopIteration: done = True # Send all the data. self._send_input_to_worker(process_bundle_id, read_transform_id, [byte_stream]) assert self._worker_handler is not None # Execute the requested splits. while not done: if split_fraction is None: split_result = None else: split_request = beam_fn_api_pb2.InstructionRequest( process_bundle_split=beam_fn_api_pb2. ProcessBundleSplitRequest( instruction_id=process_bundle_id, desired_splits={ read_transform_id: beam_fn_api_pb2.ProcessBundleSplitRequest. DesiredSplit(fraction_of_remainder=split_fraction, estimated_input_elements=num_elements) })) split_response = self._worker_handler.control_conn.push( split_request).get( ) # type: beam_fn_api_pb2.InstructionResponse for t in (0.05, 0.1, 0.2): if ('Unknown process bundle' in split_response.error or split_response.process_bundle_split == beam_fn_api_pb2.ProcessBundleSplitResponse()): time.sleep(t) split_response = self._worker_handler.control_conn.push( split_request).get() if ('Unknown process bundle' in split_response.error or split_response.process_bundle_split == beam_fn_api_pb2.ProcessBundleSplitResponse()): # It may have finished too fast. split_result = None elif split_response.error: raise RuntimeError(split_response.error) else: split_result = split_response.process_bundle_split split_results.append(split_result) try: split_fraction = split_manager_generator.send(split_result) except StopIteration: break return split_results