def run_pipeline(self, pipeline): """Execute test pipeline and verify test matcher""" options = pipeline._options.view_as(TestOptions) on_success_matcher = options.on_success_matcher is_streaming = options.view_as(StandardOptions).streaming # [BEAM-1889] Do not send this to remote workers also, there is no need to # send this option to remote executors. options.on_success_matcher = None self.result = super(TestDirectRunner, self).run_pipeline(pipeline) try: if not is_streaming: self.result.wait_until_finish() if on_success_matcher: from hamcrest import assert_that as hc_assert_that hc_assert_that(self.result, pickler.loads(on_success_matcher)) finally: if not PipelineState.is_terminal(self.result.state): self.result.cancel() self.result.wait_until_finish() return self.result
def _mark_all_computed(self): # type: () -> None """Marks all the PCollections upon a successful pipeline run.""" if not self._result: return while not PipelineState.is_terminal(self._result.state): with self._result_lock: bcj = ie.current_env().get_background_caching_job(self._user_pipeline) if bcj and bcj.is_done(): self._result.wait_until_finish() elif time.time() - self._start >= self._duration_secs: self._result.cancel() self._result.wait_until_finish() elif all(s.is_done() for s in self._streams.values()): self._result.cancel() self._result.wait_until_finish() time.sleep(0.1) # Mark the PCollection as computed so that Interactive Beam wouldn't need to # re-compute. if self._result.state is PipelineState.DONE and self._set_computed: ie.current_env().mark_pcollection_computed(self._pcolls)
def run_pipeline(self, pipeline, options): """Execute test pipeline and verify test matcher""" test_options = options.view_as(TestOptions) on_success_matcher = test_options.on_success_matcher is_streaming = options.view_as(StandardOptions).streaming # [BEAM-1889] Do not send this to remote workers also, there is no need to # send this option to remote executors. test_options.on_success_matcher = None self.result = super(TestDirectRunner, self).run_pipeline(pipeline, options) try: if not is_streaming: self.result.wait_until_finish() if on_success_matcher: from hamcrest import assert_that as hc_assert_that hc_assert_that(self.result, pickler.loads(on_success_matcher)) finally: if not PipelineState.is_terminal(self.result.state): self.result.cancel() self.result.wait_until_finish() return self.result
def _cancel(self): self._timer_triggered = True if not PipelineState.is_terminal(self._pipeline_result.state): try: self._pipeline_result.cancel() except NotImplementedError: # Ignore the cancel invocation if it is never implemented by the runner. pass
def cancel(self): """Cancels this background caching job. """ if not PipelineState.is_terminal(self._pipeline_result.state): try: self._pipeline_result.cancel() except NotImplementedError: # Ignore the cancel invocation if it is never implemented by the runner. pass
def _background_caching_job_condition_checker(self): while True: with self._result_lock: if PipelineState.is_terminal(self._pipeline_result.state): break if self._should_end_condition_checker(): self.cancel() break time.sleep(0.5)
def wait_pipeline_result(self, result: PipelineResult): if self.wait_until_finish and self.execution_timeout_sec: timeout_in_milliseconds = 1000 * ( self.execution_timeout_sec - self.pipeline_level_execution_timeout_shift) result.wait_until_finish(timeout_in_milliseconds) if not PipelineState.is_terminal(result.state): result.cancel() raise RuntimeError( f'Job {self.id} timed out ({self.execution_timeout_sec})')
def wait_until_finish(self, duration=None): if not PipelineState.is_terminal(self.state): if duration: raise NotImplementedError( 'DirectRunner does not support duration argument.') try: self._executor.await_completion() self._state = PipelineState.DONE except: # pylint: disable=broad-except self._state = PipelineState.FAILED raise return self._state
def clear(self, pipeline): # type: (beam.Pipeline) -> bool """Clears all recordings of the given pipeline. Returns True if cleared.""" description = self.describe(pipeline) if (not PipelineState.is_terminal(description['state']) and description['state'] != PipelineState.STOPPED): _LOGGER.warning( 'Trying to clear a recording with a running pipeline. Did ' 'you forget to call ib.recordings.stop?') return False ie.current_env().cleanup(pipeline) return True
def record(self, pipeline): # type: (beam.Pipeline) -> bool """Starts a background source recording job for the given pipeline. Returns True if the recording job was started. """ description = self.describe(pipeline) if (not PipelineState.is_terminal(description['state']) and description['state'] != PipelineState.STOPPED): _LOGGER.warning( 'Trying to start a recording with a running pipeline. Did ' 'you forget to call ib.recordings.stop?') return False if description['size'] > 0: _LOGGER.warning( 'A recording already exists for this pipeline. To start a ' 'recording, make sure to call ib.recordings.clear first.') return False recording_manager = ie.current_env().get_recording_manager( pipeline, create_if_absent=True) return recording_manager.record_pipeline()
def _background_caching_job_condition_checker(self): while not PipelineState.is_terminal(self._pipeline_result.state): if self._should_end_condition_checker(): break time.sleep(5)