def launch_step(self, step_context, prior_attempts_count): step_run_ref = step_context_to_step_run_ref( step_context, prior_attempts_count, self.local_pipeline_package_path) run_id = step_context.pipeline_run.run_id log = step_context.log step_key = step_run_ref.step_key self._upload_artifacts(log, step_run_ref, run_id, step_key) task = self._get_databricks_task(run_id, step_key) databricks_run_id = self.databricks_runner.submit_run( self.run_config, task) try: # If this is being called within a `delay_interrupts` context, allow interrupts while # waiting for the execution to complete, so that we can terminate slow or hanging steps with raise_interrupts_immediately(): self.databricks_runner.wait_for_run_to_complete( log, databricks_run_id) finally: if self.wait_for_logs: self._log_logs_from_cluster(log, databricks_run_id) for event in self.get_step_events(run_id, step_key): log_step_event(step_context, event) yield event
def launch_step(self, step_context, prior_attempts_count): step_run_ref = step_context_to_step_run_ref( step_context, prior_attempts_count, self.local_dagster_job_package_path) run_id = step_context.pipeline_run.run_id log = step_context.log step_key = step_run_ref.step_key self._upload_artifacts(log, step_run_ref, run_id, step_key) task = self._get_databricks_task(run_id, step_key) databricks_run_id = self.databricks_runner.submit_run( self.run_config, task) try: # If this is being called within a `capture_interrupts` context, allow interrupts while # waiting for the execution to complete, so that we can terminate slow or hanging steps with raise_execution_interrupts(): yield from self.step_events_iterator(step_context, step_key, databricks_run_id) finally: self.log_compute_logs(log, run_id, step_key) # this is somewhat obsolete if self.wait_for_logs: self._log_logs_from_cluster(log, databricks_run_id)
def launch_step(self, step_context, prior_attempts_count): step_run_ref = step_context_to_step_run_ref( step_context, prior_attempts_count, self.local_pipeline_package_path ) run_id = step_context.pipeline_run.run_id log = step_context.log step_key = step_run_ref.step_key self._post_artifacts(log, step_run_ref, run_id, step_key) emr_step_def = self._get_emr_step_def(run_id, step_key, step_context.solid.name) emr_step_id = self.emr_job_runner.add_job_flow_steps(log, self.cluster_id, [emr_step_def])[ 0 ] return self.wait_for_completion_and_log(log, run_id, step_key, emr_step_id, step_context)
def test_step_context_to_step_run_ref(): with DagsterInstance.ephemeral() as instance: step_context = initialize_step_context("", instance) step = step_context.step step_run_ref = step_context_to_step_run_ref(step_context, 0) assert step_run_ref.run_config == step_context.pipeline_run.run_config assert step_run_ref.run_id == step_context.pipeline_run.run_id rehydrated_step_context = step_run_ref_to_step_context( step_run_ref, instance) rehydrated_step = rehydrated_step_context.step assert rehydrated_step.pipeline_name == step.pipeline_name assert rehydrated_step.step_inputs == step.step_inputs assert rehydrated_step.step_outputs == step.step_outputs assert rehydrated_step.kind == step.kind assert rehydrated_step.solid_handle.name == step.solid_handle.name assert rehydrated_step.logging_tags == step.logging_tags assert rehydrated_step.tags == step.tags
def test_step_context_to_step_run_ref(): step_context = initialize_step_context('') step = step_context.step step_run_ref = step_context_to_step_run_ref(step_context, 0) assert step_run_ref.run_config == step_context.pipeline_run.run_config assert step_run_ref.run_id == step_context.pipeline_run.run_id rehydrated_step_context = step_run_ref_to_step_context(step_run_ref) assert rehydrated_step_context.required_resource_keys == step_context.required_resource_keys rehydrated_step = rehydrated_step_context.step assert rehydrated_step.pipeline_name == step.pipeline_name assert rehydrated_step.key_suffix == step.key_suffix assert rehydrated_step.step_inputs == step.step_inputs assert rehydrated_step.step_outputs == step.step_outputs assert rehydrated_step.kind == step.kind assert rehydrated_step.solid_handle.name == step.solid_handle.name assert rehydrated_step.logging_tags == step.logging_tags assert rehydrated_step.tags == step.tags
def launch_step(self, step_context, prior_attempts_count): step_run_ref = step_context_to_step_run_ref( step_context, prior_attempts_count, self.local_pipeline_package_path) run_id = step_context.pipeline_run.run_id log = step_context.log step_key = step_run_ref.step_key self._post_artifacts(log, step_run_ref, run_id, step_key) emr_step_def = self._get_emr_step_def(run_id, step_key, step_context.solid.name) emr_step_id = self.emr_job_runner.add_job_flow_steps( log, self.cluster_id, [emr_step_def])[0] self.emr_job_runner.wait_for_emr_steps_to_complete( log, self.cluster_id, [emr_step_id]) if self.wait_for_logs: self._log_logs_from_s3(log, emr_step_id) for event in self.get_step_events(step_context, run_id, step_key): log_step_event(step_context, event) yield event
def launch_step(self, step_context, prior_attempts_count): step_run_ref = step_context_to_step_run_ref( step_context, prior_attempts_count, self.local_pipeline_package_path) run_id = step_context.pipeline_run.run_id log = step_context.log step_key = step_run_ref.step_key self._upload_artifacts(log, step_run_ref, run_id, step_key) task = self._get_databricks_task(run_id, step_key) databricks_run_id = self.databricks_runner.submit_run( self.run_config, task) try: self.databricks_runner.wait_for_run_to_complete( log, databricks_run_id) finally: if self.wait_for_logs: self._log_logs_from_cluster(log, databricks_run_id) for event in self.get_step_events(run_id, step_key): log_step_event(step_context, event) yield event