def get_external_execution_plan( self, external_pipeline: ExternalPipeline, run_config: Dict[str, Any], mode: str, step_keys_to_execute: Optional[List[str]], known_state: Optional[KnownExecutionState], ) -> ExternalExecutionPlan: check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.dict_param(run_config, "run_config") check.str_param(mode, "mode") check.opt_list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) check.opt_inst_param(known_state, "known_state", KnownExecutionState) execution_plan_snapshot_or_error = sync_get_external_execution_plan_grpc( api_client=self.client, pipeline_origin=external_pipeline.get_external_origin(), run_config=run_config, mode=mode, pipeline_snapshot_id=external_pipeline. identifying_pipeline_snapshot_id, solid_selection=external_pipeline.solid_selection, step_keys_to_execute=step_keys_to_execute, known_state=known_state, ) return ExternalExecutionPlan( execution_plan_snapshot=execution_plan_snapshot_or_error, represented_pipeline=external_pipeline, )
def get_subset_external_pipeline(self, selector): from ..schema.pipelines.pipeline_errors import GrapheneInvalidSubsetError from ..schema.pipelines.pipeline import GraphenePipeline from .utils import UserFacingGraphQLError check.inst_param(selector, "selector", PipelineSelector) # We have to grab the pipeline from the location instead of the repository directly # since we may have to request a subset we don't have in memory yet repository_location = self.repository_locations_dict[ selector.location_name] external_repository = repository_location.get_repository( selector.repository_name) subset_result = repository_location.get_subset_external_pipeline_result( selector) if not subset_result.success: error_info = subset_result.error raise UserFacingGraphQLError( GrapheneInvalidSubsetError( message="{message}{cause_message}".format( message=error_info.message, cause_message="\n{}".format(error_info.cause.message) if error_info.cause else "", ), pipeline=GraphenePipeline( self.get_full_external_pipeline(selector)), )) return ExternalPipeline( subset_result.external_pipeline_data, repository_handle=external_repository.handle, )
def _launch_scheduled_executions(instance, repo_location, external_repo, external_schedule, tick_context): pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=external_schedule.pipeline_name, solid_selection=external_schedule.solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) schedule_execution_data = repo_location.get_external_schedule_execution_data( instance=instance, repository_handle=external_repo.handle, schedule_name=external_schedule.name, scheduled_execution_time= None, # No way to know this in general for this scheduler ) if not schedule_execution_data.run_requests: # Update tick to skipped state and return tick_context.update_state(JobTickStatus.SKIPPED) tick_context.stream.send(ScheduledExecutionSkipped()) return for run_request in schedule_execution_data.run_requests: _launch_run(instance, repo_location, external_schedule, external_pipeline, tick_context, run_request) tick_context.update_state(JobTickStatus.SUCCESS)
def get_external_pipeline(self, selector: PipelineSelector) -> ExternalPipeline: """Return the ExternalPipeline for a specific pipeline. Subclasses only need to implement get_subset_external_pipeline_result to handle the case where a solid selection is specified, which requires access to the underlying PipelineDefinition to generate the subsetted pipeline snapshot.""" if not selector.solid_selection: return self.get_repository( selector.repository_name).get_full_external_pipeline( selector.pipeline_name) repo_handle = self.get_repository(selector.repository_name).handle return ExternalPipeline( self.get_subset_external_pipeline_result( selector).external_pipeline_data, repo_handle)
def _launch_scheduled_execution(instance, repo_location, external_repo, external_schedule, tick, stream): pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=external_schedule.pipeline_name, solid_selection=external_schedule.solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) schedule_execution_data = repo_location.get_external_schedule_execution_data( instance=instance, repository_handle=external_repo.handle, schedule_name=external_schedule.name, schedule_execution_data_mode=ScheduleExecutionDataMode. LAUNCH_SCHEDULED_EXECUTION, scheduled_execution_time= None, # No way to know this in general for this scheduler ) run_config = {} schedule_tags = {} execution_plan_snapshot = None errors = [] if isinstance(schedule_execution_data, ExternalScheduleExecutionErrorData): error = schedule_execution_data.error tick.update_with_status(ScheduleTickStatus.FAILURE, error=error) stream.send(ScheduledExecutionFailed(run_id=None, errors=[error])) return elif not schedule_execution_data.should_execute: # Update tick to skipped state and return tick.update_with_status(ScheduleTickStatus.SKIPPED) stream.send(ScheduledExecutionSkipped()) return else: run_config = schedule_execution_data.run_config schedule_tags = schedule_execution_data.tags try: external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, run_config, external_schedule.mode, step_keys_to_execute=None, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot except DagsterSubprocessError as e: errors.extend(e.subprocess_error_infos) except Exception as e: # pylint: disable=broad-except errors.append(serializable_error_info_from_exc_info( sys.exc_info())) pipeline_tags = external_pipeline.tags or {} check_tags(pipeline_tags, "pipeline_tags") tags = merge_dicts(pipeline_tags, schedule_tags) # Enter the run in the DB with the information we have possibly_invalid_pipeline_run = instance.create_run( pipeline_name=external_schedule.pipeline_name, run_id=None, run_config=run_config, mode=external_schedule.mode, solids_to_execute=external_pipeline.solids_to_execute, step_keys_to_execute=None, solid_selection=external_pipeline.solid_selection, status=None, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, ) tick.update_with_status(ScheduleTickStatus.SUCCESS, run_id=possibly_invalid_pipeline_run.run_id) # If there were errors, inject them into the event log and fail the run if len(errors) > 0: for error in errors: instance.report_engine_event( error.message, possibly_invalid_pipeline_run, EngineEventData.engine_error(error), ) instance.report_run_failed(possibly_invalid_pipeline_run) stream.send( ScheduledExecutionFailed( run_id=possibly_invalid_pipeline_run.run_id, errors=errors)) return try: launched_run = instance.launch_run( possibly_invalid_pipeline_run.run_id, external_pipeline) except Exception: # pylint: disable=broad-except stream.send( ScheduledExecutionFailed( run_id=possibly_invalid_pipeline_run.run_id, errors=[error])) return stream.send(ScheduledExecutionSuccess(run_id=launched_run.run_id)) return