def launch_step(self, step_context, prior_attempts_count): step_run_ref = step_context_to_step_run_ref(step_context, prior_attempts_count) run_id = step_context.pipeline_run.run_id step_run_dir = os.path.join(self.scratch_dir, run_id, step_run_ref.step_key) os.makedirs(step_run_dir) step_run_ref_file_path = os.path.join(step_run_dir, PICKLED_STEP_RUN_REF_FILE_NAME) with open(step_run_ref_file_path, "wb") as step_pickle_file: pickle.dump(step_run_ref, step_pickle_file) command_tokens = [ sys.executable, "-m", "dagster.core.execution.plan.local_external_step_main", step_run_ref_file_path, ] # If this is being called within a `delay_interrupts` context, allow interrupts # while waiting for the subprocess to complete, so that we can terminate slow or # hanging steps with raise_interrupts_immediately(): subprocess.call(command_tokens, stdout=sys.stdout, stderr=sys.stderr) events_file_path = os.path.join(step_run_dir, PICKLED_EVENTS_FILE_NAME) file_manager = LocalFileManager(".") events_file_handle = LocalFileHandle(events_file_path) events_data = file_manager.read_data(events_file_handle) events = pickle.loads(events_data) yield from events
def wait_for_completion(self, log, s3, run_id, step_key, emr_step_id, check_interval=15): """ We want to wait for the EMR steps to complete, and while that's happening, we want to yield any events that have been written to S3 for us by the remote process. After the the EMR steps complete, we want a final chance to fetch events before finishing the step. """ done = False all_events = [] # If this is being called within a `delay_interrupts` context, allow interrupts # while waiting for the pyspark execution to complete, so that we can terminate slow or # hanging steps while not done: with raise_interrupts_immediately(): time.sleep(check_interval ) # AWS rate-limits us if we poll it too often done = self.emr_job_runner.is_emr_step_complete( log, self.cluster_id, emr_step_id) all_events_new = self.read_events(s3, run_id, step_key) if len(all_events_new) > len(all_events): for i in range(len(all_events), len(all_events_new)): yield all_events_new[i] all_events = all_events_new
def launch_step(self, step_context, prior_attempts_count): step_run_ref = step_context_to_step_run_ref( step_context, prior_attempts_count, self.local_pipeline_package_path) run_id = step_context.pipeline_run.run_id log = step_context.log step_key = step_run_ref.step_key self._upload_artifacts(log, step_run_ref, run_id, step_key) task = self._get_databricks_task(run_id, step_key) databricks_run_id = self.databricks_runner.submit_run( self.run_config, task) try: # If this is being called within a `delay_interrupts` context, allow interrupts while # waiting for the execution to complete, so that we can terminate slow or hanging steps with raise_interrupts_immediately(): self.databricks_runner.wait_for_run_to_complete( log, databricks_run_id) finally: if self.wait_for_logs: self._log_logs_from_cluster(log, databricks_run_id) for event in self.get_step_events(run_id, step_key): log_step_event(step_context, event) yield event
def test_raise_interrupts_immediately_no_op(): with raise_interrupts_immediately(): try: _send_interrupt_to_self() except KeyboardInterrupt: standard_interrupt = True assert standard_interrupt
def test_calling_raise_interrupts_immediately_also_raises_any_delayed_interrupts(): interrupt_from_raise_interrupts_immediately = False interrupt_after_delay = False try: with delay_interrupts(): _send_interrupt_to_self() try: with raise_interrupts_immediately(): pass except KeyboardInterrupt: interrupt_from_raise_interrupts_immediately = True except KeyboardInterrupt: interrupt_after_delay = True assert interrupt_from_raise_interrupts_immediately assert not interrupt_after_delay
def test_interrupt_inside_nested_delay_and_raise(): interrupt_inside_nested_raise = False interrupt_after_delay = False try: with delay_interrupts(): with raise_interrupts_immediately(): try: _send_interrupt_to_self() except KeyboardInterrupt: interrupt_inside_nested_raise = True except KeyboardInterrupt: interrupt_after_delay = True assert interrupt_inside_nested_raise assert not interrupt_after_delay