예제 #1
0
    def test_inactive_bundle_processor_returns_empty_split_response(self):
        bundle_processor = mock.MagicMock()
        bundle_processor_cache = BundleProcessorCache(None, None, {})
        bundle_processor_cache.activate('instruction_id')
        worker = SdkWorker(bundle_processor_cache)
        split_request = beam_fn_api_pb2.InstructionRequest(
            instruction_id='split_instruction_id',
            process_bundle_split=beam_fn_api_pb2.ProcessBundleSplitRequest(
                instruction_id='instruction_id'))
        self.assertEqual(
            worker.do_instruction(split_request),
            beam_fn_api_pb2.InstructionResponse(
                instruction_id='split_instruction_id',
                process_bundle_split=beam_fn_api_pb2.
                ProcessBundleSplitResponse()))

        # Add a mock bundle processor as if it was running before it's released
        bundle_processor_cache.active_bundle_processors['instruction_id'] = (
            'descriptor_id', bundle_processor)
        bundle_processor_cache.release('instruction_id')
        self.assertEqual(
            worker.do_instruction(split_request),
            beam_fn_api_pb2.InstructionResponse(
                instruction_id='split_instruction_id',
                process_bundle_split=beam_fn_api_pb2.
                ProcessBundleSplitResponse()))
예제 #2
0
    def test_failed_bundle_processor_returns_failed_split_response(self):
        bundle_processor = mock.MagicMock()
        bundle_processor_cache = BundleProcessorCache(None, None, {})
        bundle_processor_cache.activate('instruction_id')
        worker = SdkWorker(bundle_processor_cache)

        # Add a mock bundle processor as if it was running before it's discarded
        bundle_processor_cache.active_bundle_processors['instruction_id'] = (
            'descriptor_id', bundle_processor)
        bundle_processor_cache.discard('instruction_id')
        split_request = beam_fn_api_pb2.InstructionRequest(
            instruction_id='split_instruction_id',
            process_bundle_split=beam_fn_api_pb2.ProcessBundleSplitRequest(
                instruction_id='instruction_id'))
        hc.assert_that(
            worker.do_instruction(split_request).error,
            hc.contains_string(
                'Bundle processing associated with instruction_id has failed'))
예제 #3
0
  def _generate_splits_for_testing(self,
                                   split_manager,
                                   inputs,  # type: Mapping[str, PartitionableBuffer]
                                   process_bundle_id):
    # type: (...) -> List[beam_fn_api_pb2.ProcessBundleSplitResponse]
    split_results = []  # type: List[beam_fn_api_pb2.ProcessBundleSplitResponse]
    read_transform_id, buffer_data = only_element(inputs.items())
    byte_stream = b''.join(buffer_data)
    num_elements = len(
        list(
            self.bundle_context_manager.get_input_coder_impl(
                read_transform_id).decode_all(byte_stream)))

    # Start the split manager in case it wants to set any breakpoints.
    split_manager_generator = split_manager(num_elements)
    try:
      split_fraction = next(split_manager_generator)
      done = False
    except StopIteration:
      done = True

    # Send all the data.
    self._send_input_to_worker(
        process_bundle_id, read_transform_id, [byte_stream])

    assert self._worker_handler is not None

    # Execute the requested splits.
    while not done:
      if split_fraction is None:
        split_result = None
      else:
        split_request = beam_fn_api_pb2.InstructionRequest(
            process_bundle_split=beam_fn_api_pb2.ProcessBundleSplitRequest(
                instruction_id=process_bundle_id,
                desired_splits={
                    read_transform_id: beam_fn_api_pb2.
                    ProcessBundleSplitRequest.DesiredSplit(
                        fraction_of_remainder=split_fraction,
                        estimated_input_elements=num_elements)
                }))
        split_response = self._worker_handler.control_conn.push(
            split_request).get()  # type: beam_fn_api_pb2.InstructionResponse
        for t in (0.05, 0.1, 0.2):
          waiting = ('Instruction not running', 'not yet scheduled')
          if any(msg in split_response.error for msg in waiting):
            time.sleep(t)
            split_response = self._worker_handler.control_conn.push(
                split_request).get()
        if 'Unknown process bundle' in split_response.error:
          # It may have finished too fast.
          split_result = None
        elif split_response.error:
          raise RuntimeError(split_response.error)
        else:
          split_result = split_response.process_bundle_split
          split_results.append(split_result)
      try:
        split_fraction = split_manager_generator.send(split_result)
      except StopIteration:
        break
    return split_results