Example #1
0
  def try_split(self, bundle_split_request):
    split_response = beam_fn_api_pb2.ProcessBundleSplitResponse()
    with self.splitting_lock:
      for op in self.ops.values():
        if isinstance(op, DataInputOperation):
          desired_split = bundle_split_request.desired_splits.get(
              op.transform_id)
          if desired_split:
            split = op.try_split(desired_split.fraction_of_remainder,
                                 desired_split.estimated_input_elements)
            if split:
              (primary_end, element_primary, element_residual, residual_start,
              ) = split
              if element_primary:
                split_response.primary_roots.add().CopyFrom(
                    self.delayed_bundle_application(
                        *element_primary).application)
              if element_residual:
                split_response.residual_roots.add().CopyFrom(
                    self.delayed_bundle_application(*element_residual))
              split_response.channel_splits.extend([
                  beam_fn_api_pb2.ProcessBundleSplitResponse.ChannelSplit(
                      transform_id=op.transform_id,
                      last_primary_element=primary_end,
                      first_residual_element=residual_start)])

    return split_response
Example #2
0
  def test_inactive_bundle_processor_returns_empty_split_response(self):
    bundle_processor = mock.MagicMock()
    bundle_processor_cache = BundleProcessorCache(None, None, {})
    bundle_processor_cache.activate('instruction_id')
    worker = SdkWorker(bundle_processor_cache)
    split_request = beam_fn_api_pb2.InstructionRequest(
        instruction_id='split_instruction_id',
        process_bundle_split=beam_fn_api_pb2.ProcessBundleSplitRequest(
            instruction_id='instruction_id'))
    self.assertEqual(
        worker.do_instruction(split_request),
        beam_fn_api_pb2.InstructionResponse(
            instruction_id='split_instruction_id',
            process_bundle_split=beam_fn_api_pb2.ProcessBundleSplitResponse()))

    # Add a mock bundle processor as if it was running before it's released
    bundle_processor_cache.active_bundle_processors['instruction_id'] = (
        'descriptor_id', bundle_processor)
    bundle_processor_cache.release('instruction_id')
    self.assertEqual(
        worker.do_instruction(split_request),
        beam_fn_api_pb2.InstructionResponse(
            instruction_id='split_instruction_id',
            process_bundle_split=beam_fn_api_pb2.ProcessBundleSplitResponse()))
Example #3
0
 def process_bundle_split(
     self,
     request,  # type: beam_fn_api_pb2.ProcessBundleSplitRequest
     instruction_id  # type: str
 ):
   # type: (...) -> beam_fn_api_pb2.InstructionResponse
   try:
     processor = self.bundle_processor_cache.lookup(request.instruction_id)
   except RuntimeError:
     return beam_fn_api_pb2.InstructionResponse(
         instruction_id=instruction_id, error=traceback.format_exc())
   # Return an empty response if we aren't running. This can happen
   # if the ProcessBundleRequest has not started or already finished.
   process_bundle_split = (
       processor.try_split(request)
       if processor else beam_fn_api_pb2.ProcessBundleSplitResponse())
   return beam_fn_api_pb2.InstructionResponse(
       instruction_id=instruction_id,
       process_bundle_split=process_bundle_split)
Example #4
0
    def _generate_splits_for_testing(
            self,
            split_manager,
            inputs,  # type: Mapping[str, execution.PartitionableBuffer]
            process_bundle_id):
        # type: (...) -> List[beam_fn_api_pb2.ProcessBundleSplitResponse]
        split_results = [
        ]  # type: List[beam_fn_api_pb2.ProcessBundleSplitResponse]
        read_transform_id, buffer_data = only_element(inputs.items())
        byte_stream = b''.join(buffer_data)
        num_elements = len(
            list(
                self.bundle_context_manager.get_input_coder_impl(
                    read_transform_id).decode_all(byte_stream)))

        # Start the split manager in case it wants to set any breakpoints.
        split_manager_generator = split_manager(num_elements)
        try:
            split_fraction = next(split_manager_generator)
            done = False
        except StopIteration:
            done = True

        # Send all the data.
        self._send_input_to_worker(process_bundle_id, read_transform_id,
                                   [byte_stream])

        assert self._worker_handler is not None

        # Execute the requested splits.
        while not done:
            if split_fraction is None:
                split_result = None
            else:
                split_request = beam_fn_api_pb2.InstructionRequest(
                    process_bundle_split=beam_fn_api_pb2.
                    ProcessBundleSplitRequest(
                        instruction_id=process_bundle_id,
                        desired_splits={
                            read_transform_id:
                            beam_fn_api_pb2.ProcessBundleSplitRequest.
                            DesiredSplit(fraction_of_remainder=split_fraction,
                                         estimated_input_elements=num_elements)
                        }))
                split_response = self._worker_handler.control_conn.push(
                    split_request).get(
                    )  # type: beam_fn_api_pb2.InstructionResponse
                for t in (0.05, 0.1, 0.2):
                    if ('Unknown process bundle' in split_response.error
                            or split_response.process_bundle_split
                            == beam_fn_api_pb2.ProcessBundleSplitResponse()):
                        time.sleep(t)
                        split_response = self._worker_handler.control_conn.push(
                            split_request).get()
                if ('Unknown process bundle' in split_response.error
                        or split_response.process_bundle_split
                        == beam_fn_api_pb2.ProcessBundleSplitResponse()):
                    # It may have finished too fast.
                    split_result = None
                elif split_response.error:
                    raise RuntimeError(split_response.error)
                else:
                    split_result = split_response.process_bundle_split
                    split_results.append(split_result)
            try:
                split_fraction = split_manager_generator.send(split_result)
            except StopIteration:
                break
        return split_results