def execute_posix_tail(path, stream): # open a subprocess to tail the file and print to stdout tail_cmd = "tail -F -c +0 {}".format(path).split(" ") stream = stream if _fileno(stream) else None try: tail_process = None watcher_process = None # A thrown interrupt while opening the subprocesses can leave us unable to clean them up. # wait until we have a process we can terminate before raising the KeyboardInterrupt with delay_interrupts(): tail_process = subprocess.Popen(tail_cmd, stdout=stream) # open a watcher process to check for the orphaning of the tail process (e.g. when the # current process is suddenly killed) watcher_file = os.path.abspath(watch_orphans.__file__) watcher_process = subprocess.Popen( [sys.executable, watcher_file, str(os.getpid()), str(tail_process.pid),] ) yield (tail_process.pid, watcher_process.pid) finally: if tail_process: _clean_up_subprocess(tail_process) if watcher_process: _clean_up_subprocess(watcher_process)
def __iter__(self): # Since interrupts can't be raised at arbitrary points safely, delay them until designated # checkpoints during the execution. # To be maximally certain that interrupts are always caught during an execution process, # you can safely add an additional `with delay_interrupts()` at the very beginning of the # process that performs the execution with delay_interrupts(): for event in self.execution_context_manager.prepare_context(): yield event self.pipeline_context = self.execution_context_manager.get_context( ) generator_closed = False try: if self.pipeline_context: # False if we had a pipeline init failure for event in self.iterator( execution_plan=self.execution_plan, pipeline_context=self.pipeline_context, ): yield event except GeneratorExit: # Shouldn't happen, but avoid runtime-exception in case this generator gets GC-ed # (see https://amir.rachum.com/blog/2017/03/03/generator-cleanup/). generator_closed = True raise finally: for event in self.execution_context_manager.shutdown_context(): if not generator_closed: yield event
def start_run_in_subprocess(serialized_execute_run_args, recon_pipeline, event_queue, termination_event): with delay_interrupts(): _run_in_subprocess( serialized_execute_run_args, recon_pipeline, termination_event, subprocess_status_handler=event_queue.put, run_event_handler=lambda x: None, )
def test_delay_interrupt(): outer_interrupt = False inner_interrupt = False try: with delay_interrupts(): try: os.kill(os.getpid(), signal.SIGINT) time.sleep(1) except KeyboardInterrupt: inner_interrupt = True except KeyboardInterrupt: outer_interrupt = True assert outer_interrupt assert not inner_interrupt # Verify standard interrupt handler is restored standard_interrupt = False try: os.kill(os.getpid(), signal.SIGINT) time.sleep(1) except KeyboardInterrupt: standard_interrupt = True assert standard_interrupt outer_interrupt = False inner_interrupt = False # No exception if no signal thrown try: with delay_interrupts(): try: time.sleep(1) except KeyboardInterrupt: inner_interrupt = True except KeyboardInterrupt: outer_interrupt = True assert not outer_interrupt assert not inner_interrupt
def test_delay_interrupt(): outer_interrupt = False inner_interrupt = False try: with delay_interrupts(): try: _send_interrupt_to_self() except KeyboardInterrupt: inner_interrupt = True except KeyboardInterrupt: outer_interrupt = True assert outer_interrupt assert not inner_interrupt # Verify standard interrupt handler is restored standard_interrupt = False try: _send_interrupt_to_self() except KeyboardInterrupt: standard_interrupt = True assert standard_interrupt outer_interrupt = False inner_interrupt = False # No exception if no signal thrown try: with delay_interrupts(): try: time.sleep(5) except KeyboardInterrupt: inner_interrupt = True except KeyboardInterrupt: outer_interrupt = True assert not outer_interrupt assert not inner_interrupt
def execute_run_command(input_file, output_file): # Ensure that interrupts from the run launcher only happen inside user code or specially # designated checkpoints with delay_interrupts(): args = check.inst(read_unary_input(input_file), ExecuteRunArgs) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) with DagsterInstance.from_ref(args.instance_ref) as instance: with ipc_write_stream(output_file) as ipc_stream: def send_to_stream(event): ipc_stream.send(event) return _execute_run_command_body(recon_pipeline, args.pipeline_run_id, instance, send_to_stream)
def test_raise_delayed_interrupts(): interrupt_from_check = False interrupt_after_delay = False try: with delay_interrupts(): _send_interrupt_to_self() try: raise_delayed_interrupts() except KeyboardInterrupt: interrupt_from_check = True except KeyboardInterrupt: interrupt_after_delay = True assert interrupt_from_check assert not interrupt_after_delay
def test_calling_raise_interrupts_immediately_also_raises_any_delayed_interrupts(): interrupt_from_raise_interrupts_immediately = False interrupt_after_delay = False try: with delay_interrupts(): _send_interrupt_to_self() try: with raise_interrupts_immediately(): pass except KeyboardInterrupt: interrupt_from_raise_interrupts_immediately = True except KeyboardInterrupt: interrupt_after_delay = True assert interrupt_from_raise_interrupts_immediately assert not interrupt_after_delay
def test_interrupt_inside_nested_delay_and_raise(): interrupt_inside_nested_raise = False interrupt_after_delay = False try: with delay_interrupts(): with raise_interrupts_immediately(): try: _send_interrupt_to_self() except KeyboardInterrupt: interrupt_inside_nested_raise = True except KeyboardInterrupt: interrupt_after_delay = True assert interrupt_inside_nested_raise assert not interrupt_after_delay
def _execute_command_in_child_process(event_queue, command): """Wraps the execution of a ChildProcessCommand. Handles errors and communicates across a queue with the parent process.""" check.inst_param(command, "command", ChildProcessCommand) with delay_interrupts(): pid = os.getpid() event_queue.put(ChildProcessStartEvent(pid=pid)) try: for step_event in command.execute(): event_queue.put(step_event) event_queue.put(ChildProcessDoneEvent(pid=pid)) except (Exception, KeyboardInterrupt): # pylint: disable=broad-except event_queue.put( ChildProcessSystemErrorEvent( pid=pid, error_info=serializable_error_info_from_exc_info(sys.exc_info()) ) )
def execute_windows_tail(path, stream): # Cannot use multiprocessing here because we already may be in a daemonized process # Instead, invoke a thin script to poll a file and dump output to stdout. We pass the current # pid so that the poll process kills itself if it becomes orphaned poll_file = os.path.abspath(poll_compute_logs.__file__) stream = stream if _fileno(stream) else None try: tail_process = None # A thrown interrupt while opening the subprocess can leave us unable to clean them up. # wait until we have a process we can terminate before raising the KeyboardInterrupt with delay_interrupts(): tail_process = subprocess.Popen( [sys.executable, poll_file, path, str(os.getpid())], stdout=stream ) yield (tail_process.pid, None) finally: if tail_process: time.sleep(2 * poll_compute_logs.POLLING_INTERVAL) tail_process.terminate()
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count): """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _input_values_from_intermediate_storage( step_context): if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj elif isinstance(input_value, MultipleStepOutputsListWrapper): for op in input_value: yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(op, value_name=input_name)) inputs[input_name] = [op.obj for op in input_value] else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, Output): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) # We only want to log exactly one step success event or failure event if possible, # so wait to handle any interrupts (that normally log a failure event) until the success # event has finished with delay_interrupts(): yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def pipeline_execute_command(**kwargs): with delay_interrupts(): with DagsterInstance.get() as instance: execute_execute_command(instance, kwargs)
for block in iter(lambda: file.read(1024), None): if block: print(block, end="", file=stream) # pylint: disable=print-call else: if pop_delayed_interrupts() or ( parent_pid and current_process_is_orphaned(parent_pid)): return time.sleep(POLLING_INTERVAL) def execute_polling(args): if not args or len(args) != 3: return filepath = args[0] parent_pid = int(args[1]) ipc_output_file = args[2] # Signal to the calling process that we have started and are # ready to receive the signal to terminate once execution has finished with open(ipc_output_file, "w"): pass tail_polling(filepath, sys.stdout, parent_pid) if __name__ == "__main__": with delay_interrupts(): execute_polling(sys.argv[1:])