def get_subset_external_pipeline(context, selector): from ..schema.pipelines.pipeline_errors import GrapheneInvalidSubsetError from ..schema.pipelines.pipeline import GraphenePipeline check.inst_param(selector, "selector", PipelineSelector) repository_location = context.get_repository_location( selector.location_name) external_repository = repository_location.get_repository( selector.repository_name) try: subset_result = repository_location.get_subset_external_pipeline_result( selector) except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) raise UserFacingGraphQLError( GrapheneInvalidSubsetError( message="{message}{cause_message}".format( message=error_info.message, cause_message="\n{}".format(error_info.cause.message) if error_info.cause else "", ), pipeline=GraphenePipeline( context.get_full_external_pipeline(selector)), )) return ExternalPipeline( subset_result.external_pipeline_data, repository_handle=external_repository.handle, )
def get_external_pipeline(self): if self._cached_external_pipeline is None: from dagster.core.host_representation import ExternalPipeline self._cached_external_pipeline = ExternalPipeline.from_pipeline_def( self) return self._cached_external_pipeline
def external_pipeline_from_location_handle(repository_location_handle, external_pipeline_origin, solid_selection): check.inst_param(repository_location_handle, "repository_location_handle", RepositoryLocationHandle) check.inst_param(external_pipeline_origin, "external_pipeline_origin", ExternalPipelineOrigin) repo_location = repository_location_handle.create_location() repo_name = external_pipeline_origin.external_repository_origin.repository_name pipeline_name = external_pipeline_origin.pipeline_name check.invariant( repo_location.has_repository(repo_name), "Could not find repository {repo_name} in location {repo_location_name}" .format(repo_name=repo_name, repo_location_name=repo_location.name), ) external_repo = repo_location.get_repository(repo_name) pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=pipeline_name, solid_selection=solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) return external_pipeline
def external_pipeline_from_location_handle(repository_location_handle, pipeline_name, solid_selection): check.inst_param(repository_location_handle, "repository_location_handle", RepositoryLocationHandle) repo_location = RepositoryLocation.from_handle(repository_location_handle) repo_dict = repo_location.get_repositories() check.invariant( len(repo_dict) == 1, "Reconstructed repository location should have exactly one repository", ) external_repo = next(iter(repo_dict.values())) pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=pipeline_name, solid_selection=solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) return external_pipeline
def external_pipeline_from_run(pipeline_run): check.inst_param(pipeline_run, "pipeline_run", PipelineRun) external_pipeline_origin = check.inst( pipeline_run.external_pipeline_origin, ExternalPipelineOrigin) with RepositoryLocationHandle.create_from_repository_location_origin( external_pipeline_origin.external_repository_origin. repository_location_origin) as repo_location_handle: repo_location = RepositoryLocation.from_handle(repo_location_handle) repo_dict = repo_location.get_repositories() check.invariant( len(repo_dict) == 1, "Reconstructed repository location should have exactly one repository", ) external_repo = next(iter(repo_dict.values())) pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=pipeline_run.pipeline_name, solid_selection=pipeline_run.solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) yield external_pipeline
def get_external_pipeline(self, name, solid_subset): check.str_param(name, 'name') check.list_param(solid_subset, 'solid_subset', of_type=str) return ExternalPipeline.from_pipeline_def( self.get_reconstructable_pipeline(name).get_definition(), solid_subset=solid_subset, )
def get_external_pipeline(self, handle, solid_subset): check.inst_param(handle, 'handle', PipelineHandle) check.invariant( handle.environment_name == self.name, 'Received invalid handle, environment name mismatch', ) return ExternalPipeline.from_pipeline_def( self.get_reconstructable_pipeline( handle.pipeline_name).get_definition(), solid_subset)
def external_pipeline_from_recon_pipeline(recon_pipeline, solid_selection, repository_handle): if solid_selection: sub_pipeline = recon_pipeline.subset_for_execution(solid_selection) pipeline_def = sub_pipeline.get_definition() else: pipeline_def = recon_pipeline.get_definition() return ExternalPipeline( external_pipeline_data_from_def(pipeline_def), repository_handle=repository_handle, )
def external_pipeline_from_recon_pipeline(recon_pipeline, solid_subset, repository_handle): full_pipeline_def = recon_pipeline.get_definition() pipeline_def = (full_pipeline_def.subset_for_execution(solid_subset) if solid_subset else full_pipeline_def) return ExternalPipeline( external_pipeline_data_from_def(pipeline_def), repository_handle=repository_handle, )
def _evaluate_sensor( context, instance, repo_location, external_repo, external_sensor, job_state, sensor_debug_crash_flags=None, ): context.logger.info( f"Checking for new runs for sensor: {external_sensor.name}") sensor_runtime_data = repo_location.get_external_sensor_execution_data( instance, external_repo.handle, external_sensor.name, job_state.job_specific_data.last_tick_timestamp if job_state.job_specific_data else None, job_state.job_specific_data.last_run_key if job_state.job_specific_data else None, ) if isinstance(sensor_runtime_data, ExternalSensorExecutionErrorData): context.logger.error( f"Failed to resolve sensor for {external_sensor.name} : {sensor_runtime_data.error.to_string()}" ) context.update_state(JobTickStatus.FAILURE, error=sensor_runtime_data.error) yield return assert isinstance(sensor_runtime_data, ExternalSensorExecutionData) if not sensor_runtime_data.run_requests: if sensor_runtime_data.skip_message: context.logger.info( f"Sensor returned false for {external_sensor.name}, skipping: " f"{sensor_runtime_data.skip_message}") context.update_state(JobTickStatus.SKIPPED, skip_reason=sensor_runtime_data.skip_message) else: context.logger.info( f"Sensor returned false for {external_sensor.name}, skipping") context.update_state(JobTickStatus.SKIPPED) yield return pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=external_sensor.pipeline_name, solid_selection=external_sensor.solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) skipped_runs = [] for run_request in sensor_runtime_data.run_requests: run = _get_or_create_sensor_run(context, instance, repo_location, external_sensor, external_pipeline, run_request) if isinstance(run, SkippedSensorRun): skipped_runs.append(run) yield continue _check_for_debug_crash(sensor_debug_crash_flags, "RUN_CREATED") try: context.logger.info("Launching run for {sensor_name}".format( sensor_name=external_sensor.name)) instance.submit_run(run.run_id, external_pipeline) context.logger.info( "Completed launch of run {run_id} for {sensor_name}".format( run_id=run.run_id, sensor_name=external_sensor.name)) except Exception: # pylint: disable=broad-except context.logger.error( f"Run {run.run_id} created successfully but failed to launch: " f"{str(serializable_error_info_from_exc_info(sys.exc_info()))}" ) yield _check_for_debug_crash(sensor_debug_crash_flags, "RUN_LAUNCHED") context.add_run(run_id=run.run_id, run_key=run_request.run_key) if skipped_runs: run_keys = [skipped.run_key for skipped in skipped_runs] skipped_count = len(skipped_runs) context.logger.info( f"Skipping {skipped_count} {'run' if skipped_count == 1 else 'runs'} for sensor " f"{external_sensor.name} already completed with run keys: {seven.json.dumps(run_keys)}" ) if context.run_count: context.update_state(JobTickStatus.SUCCESS) else: context.update_state(JobTickStatus.SKIPPED) yield
def _evaluate_sensor( context, instance, repo_location, external_repo, external_sensor, job_state, sensor_debug_crash_flags=None, ): sensor_runtime_data = repo_location.get_external_sensor_execution_data( instance, external_repo.handle, external_sensor.name, job_state.job_specific_data.last_tick_timestamp if job_state.job_specific_data else None, job_state.job_specific_data.last_run_key if job_state.job_specific_data else None, ) if isinstance(sensor_runtime_data, ExternalSensorExecutionErrorData): context.logger.error( "Failed to resolve sensor for {sensor_name} : {error_info}".format( sensor_name=external_sensor.name, error_info=sensor_runtime_data.error.to_string(), )) context.update_state(JobTickStatus.FAILURE, error=sensor_runtime_data.error) return assert isinstance(sensor_runtime_data, ExternalSensorExecutionData) if not sensor_runtime_data.run_requests: if sensor_runtime_data.skip_message: context.logger.info( f"Sensor returned false for {external_sensor.name}, skipping: " f"{sensor_runtime_data.skip_message}") context.update_state(JobTickStatus.SKIPPED, skip_reason=sensor_runtime_data.skip_message) else: context.logger.info( f"Sensor returned false for {external_sensor.name}, skipping") context.update_state(JobTickStatus.SKIPPED) return pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=external_sensor.pipeline_name, solid_selection=external_sensor.solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) for run_request in sensor_runtime_data.run_requests: run = _get_or_create_sensor_run(context, instance, repo_location, external_sensor, external_pipeline, run_request) if not run: # we already found and resolved a run continue _check_for_debug_crash(sensor_debug_crash_flags, "RUN_CREATED") try: context.logger.info("Launching run for {sensor_name}".format( sensor_name=external_sensor.name)) instance.submit_run(run.run_id, external_pipeline) context.logger.info( "Completed launch of run {run_id} for {sensor_name}".format( run_id=run.run_id, sensor_name=external_sensor.name)) except Exception: # pylint: disable=broad-except context.logger.error( "Run {run_id} created successfully but failed to launch.". format(run_id=run.run_id)) _check_for_debug_crash(sensor_debug_crash_flags, "RUN_LAUNCHED") context.add_run(run_id=run.run_id, run_key=run_request.run_key) if context.run_count: context.update_state(JobTickStatus.SUCCESS) else: context.update_state(JobTickStatus.SKIPPED)
def _create_external_pipeline_run( instance, repo_location, external_repo, external_pipeline, run_config, mode, preset, tags, solid_selection, run_id, ): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(repo_location, "repo_location", RepositoryLocation) check.inst_param(external_repo, "external_repo", ExternalRepository) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.opt_dict_param(run_config, "run_config") check.opt_str_param(mode, "mode") check.opt_str_param(preset, "preset") check.opt_dict_param(tags, "tags", key_type=str) check.opt_list_param(solid_selection, "solid_selection", of_type=str) check.opt_str_param(run_id, "run_id") run_config, mode, tags, solid_selection = _check_execute_external_pipeline_args( external_pipeline, run_config, mode, preset, tags, solid_selection, ) pipeline_name = external_pipeline.name pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=pipeline_name, solid_selection=solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) if subset_pipeline_result.success == False: raise DagsterLaunchFailedError( "Failed to load external pipeline subset: {error_message}".format( error_message=subset_pipeline_result.error.message), serializable_error_info=subset_pipeline_result.error, ) external_pipeline_subset = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) pipeline_mode = mode or external_pipeline_subset.get_default_mode_name() external_execution_plan = repo_location.get_external_execution_plan( external_pipeline_subset, run_config, pipeline_mode, step_keys_to_execute=None, ) if isinstance(external_execution_plan, ExecutionPlanSnapshotErrorData): raise DagsterLaunchFailedError( "Failed to load external execution plan", serializable_error_info=external_execution_plan.error, ) else: execution_plan_snapshot = external_execution_plan.execution_plan_snapshot return instance.create_run( pipeline_name=pipeline_name, run_id=run_id, run_config=run_config, mode=pipeline_mode, solids_to_execute=external_pipeline_subset.solids_to_execute, step_keys_to_execute=None, solid_selection=solid_selection, status=None, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline_subset.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline_subset. parent_pipeline_snapshot, external_pipeline_origin=external_pipeline_subset.get_external_origin( ), )
def _schedule_runs_at_time( instance, logger, repo_location, external_repo, external_schedule, schedule_time, tick_context, debug_crash_flags, ): schedule_name = external_schedule.name pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=external_schedule.pipeline_name, solid_selection=external_schedule.solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) schedule_execution_data = repo_location.get_external_schedule_execution_data( instance=instance, repository_handle=external_repo.handle, schedule_name=external_schedule.name, scheduled_execution_time=schedule_time, ) if isinstance(schedule_execution_data, ExternalScheduleExecutionErrorData): error = schedule_execution_data.error logger.error( f"Failed to fetch schedule data for {external_schedule.name}: {error.to_string()}" ) tick_context.update_state(JobTickStatus.FAILURE, error=error) return if not schedule_execution_data.run_requests: logger.info( f"No run requests returned for {external_schedule.name}, skipping") # Update tick to skipped state and return tick_context.update_state(JobTickStatus.SKIPPED) return for run_request in schedule_execution_data.run_requests: run = _get_existing_run_for_request(instance, external_schedule, schedule_time, run_request) if run: if run.status != PipelineRunStatus.NOT_STARTED: # A run already exists and was launched for this time period, # but the scheduler must have crashed before the tick could be put # into a SUCCESS state logger.info( f"Run {run.run_id} already completed for this execution of {external_schedule.name}" ) tick_context.add_run(run_id=run.run_id, run_key=run_request.run_key) continue else: logger.info( f"Run {run.run_id} already created for this execution of {external_schedule.name}" ) else: run = _create_scheduler_run( instance, logger, schedule_time, repo_location, external_schedule, external_pipeline, run_request, ) _check_for_debug_crash(debug_crash_flags, "RUN_CREATED") if run.status != PipelineRunStatus.FAILURE: try: instance.submit_run(run.run_id, external_pipeline) logger.info( f"Completed scheduled launch of run {run.run_id} for {schedule_name}" ) except Exception: # pylint: disable=broad-except logger.error( f"Run {run.run_id} created successfully but failed to launch." ) _check_for_debug_crash(debug_crash_flags, "RUN_LAUNCHED") tick_context.add_run(run_id=run.run_id, run_key=run_request.run_key) _check_for_debug_crash(debug_crash_flags, "RUN_ADDED") _check_for_debug_crash(debug_crash_flags, "TICK_SUCCESS") tick_context.update_state(JobTickStatus.SUCCESS)
def _check_execute_external_pipeline_args( external_pipeline: ExternalPipeline, run_config: Dict[str, object], mode: Optional[str], preset: Optional[str], tags: Optional[Mapping[str, object]], solid_selection: Optional[List[str]], ) -> Tuple[Dict[str, object], str, Mapping[str, object], Optional[List[str]]]: check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) run_config = check.opt_dict_param(run_config, "run_config") check.opt_str_param(mode, "mode") check.opt_str_param(preset, "preset") check.invariant( not (mode is not None and preset is not None), "You may set only one of `mode` (got {mode}) or `preset` (got {preset})." .format(mode=mode, preset=preset), ) tags = check.opt_dict_param(tags, "tags", key_type=str) check.opt_list_param(solid_selection, "solid_selection", of_type=str) if preset is not None: pipeline_preset = external_pipeline.get_preset(preset) if pipeline_preset.run_config is not None: check.invariant( (not run_config) or (pipeline_preset.run_config == run_config), "The environment set in preset '{preset}' does not agree with the environment " "passed in the `run_config` argument.".format(preset=preset), ) run_config = pipeline_preset.run_config # load solid_selection from preset if pipeline_preset.solid_selection is not None: check.invariant( solid_selection is None or solid_selection == pipeline_preset.solid_selection, "The solid_selection set in preset '{preset}', {preset_subset}, does not agree with " "the `solid_selection` argument: {solid_selection}".format( preset=preset, preset_subset=pipeline_preset.solid_selection, solid_selection=solid_selection, ), ) solid_selection = pipeline_preset.solid_selection check.invariant( mode is None or mode == pipeline_preset.mode, "Mode {mode} does not agree with the mode set in preset '{preset}': " "('{preset_mode}')".format(preset=preset, preset_mode=pipeline_preset.mode, mode=mode), ) mode = pipeline_preset.mode tags = merge_dicts(pipeline_preset.tags, tags) if mode is not None: if not external_pipeline.has_mode(mode): raise DagsterInvariantViolationError(( "You have attempted to execute pipeline {name} with mode {mode}. " "Available modes: {modes}").format( name=external_pipeline.name, mode=mode, modes=external_pipeline.available_modes, )) else: if len(external_pipeline.available_modes) > 1: raise DagsterInvariantViolationError(( "Pipeline {name} has multiple modes (Available modes: {modes}) and you have " "attempted to execute it without specifying a mode. Set " "mode property on the PipelineRun object.").format( name=external_pipeline.name, modes=external_pipeline.available_modes)) mode = external_pipeline.get_default_mode_name() tags = merge_dicts(external_pipeline.tags, tags) return ( run_config, mode, tags, solid_selection, )
def _schedule_run_at_time( instance, logger, repo_location, external_repo, external_schedule, schedule_time, tick_holder, debug_crash_flags, ): schedule_name = external_schedule.name pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=external_schedule.pipeline_name, solid_selection=external_schedule.solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) # Rule out the case where the scheduler crashed between creating a run for this time # and launching it runs_filter = PipelineRunsFilter(tags=merge_dicts( PipelineRun.tags_for_schedule(external_schedule), {SCHEDULED_EXECUTION_TIME_TAG: schedule_time.in_tz("UTC").isoformat()}, )) existing_runs = instance.get_runs(runs_filter) run_to_launch = None if len(existing_runs): check.invariant(len(existing_runs) == 1) run = existing_runs[0] if run.status != PipelineRunStatus.NOT_STARTED: # A run already exists and was launched for this time period, # but the scheduler must have crashed before the tick could be put # into a SUCCESS state logger.info( "Run {run_id} already completed for this execution of {schedule_name}" .format(run_id=run.run_id, schedule_name=schedule_name)) tick_holder.update_with_status(JobTickStatus.SUCCESS, run_id=run.run_id) return else: logger.info( "Run {run_id} already created for this execution of {schedule_name}" .format(run_id=run.run_id, schedule_name=schedule_name)) run_to_launch = run else: run_to_launch = _create_scheduler_run( instance, logger, schedule_time, repo_location, external_repo, external_schedule, external_pipeline, tick_holder, ) _check_for_debug_crash(debug_crash_flags, "RUN_CREATED") if not run_to_launch: check.invariant(tick_holder.status != JobTickStatus.STARTED and tick_holder.status != JobTickStatus.SUCCESS) return if run_to_launch.status != PipelineRunStatus.FAILURE: try: instance.submit_run(run_to_launch.run_id, external_pipeline) logger.info( "Completed scheduled launch of run {run_id} for {schedule_name}" .format(run_id=run_to_launch.run_id, schedule_name=schedule_name)) except Exception: # pylint: disable=broad-except logger.error( "Run {run_id} created successfully but failed to launch.". format(run_id=run_to_launch.run_id)) _check_for_debug_crash(debug_crash_flags, "RUN_LAUNCHED") tick_holder.update_with_status(JobTickStatus.SUCCESS, run_id=run_to_launch.run_id) _check_for_debug_crash(debug_crash_flags, "TICK_SUCCESS")
def _schedule_run_at_time( instance, logger, repo_location, schedule_state, schedule_time_utc, tick_holder, debug_crash_flags, ): schedule_name = schedule_state.name repo_dict = repo_location.get_repositories() check.invariant( len(repo_dict) == 1, "Reconstructed repository location should have exactly one repository", ) external_repo = next(iter(repo_dict.values())) external_schedule = external_repo.get_external_schedule(schedule_name) pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=external_schedule.pipeline_name, solid_selection=external_schedule.solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) # Rule out the case where the scheduler crashed between creating a run for this time # and launching it runs_filter = PipelineRunsFilter(tags=merge_dicts( PipelineRun.tags_for_schedule(schedule_state), {SCHEDULED_EXECUTION_TIME_TAG: schedule_time_utc.isoformat()}, )) existing_runs = instance.get_runs(runs_filter) run_to_launch = None if len(existing_runs): check.invariant(len(existing_runs) == 1) run = existing_runs[0] if run.status != PipelineRunStatus.NOT_STARTED: # A run already exists and was launched for this time period, # but the scheduler must have crashed before the tick could be put # into a SUCCESS state logger.info( "Run {run_id} already completed for this execution of {schedule_name}" .format(run_id=run.run_id, schedule_name=schedule_state.name)) tick_holder.update_with_status(ScheduleTickStatus.SUCCESS, run_id=run.run_id) return else: logger.info( "Run {run_id} already created for this execution of {schedule_name}" .format(run_id=run.run_id, schedule_name=schedule_state.name)) run_to_launch = run else: run_to_launch = _create_scheduler_run( instance, logger, schedule_time_utc, repo_location, external_repo, external_schedule, external_pipeline, tick_holder, ) _check_for_debug_crash(debug_crash_flags, "RUN_CREATED") if not run_to_launch: check.invariant(tick_holder.status != ScheduleTickStatus.STARTED and tick_holder.status != ScheduleTickStatus.SUCCESS) return if run_to_launch.status != PipelineRunStatus.FAILURE: try: instance.launch_run(run_to_launch.run_id, external_pipeline) logger.info( "Completed scheduled launch of run {run_id} for {schedule_name}" .format(run_id=run_to_launch.run_id, schedule_name=schedule_name)) except Exception as e: # pylint: disable=broad-except if not isinstance(e, KeyboardInterrupt): error = serializable_error_info_from_exc_info(sys.exc_info()) instance.report_engine_event( error.message, run_to_launch, EngineEventData.engine_error(error), ) instance.report_run_failed(run_to_launch) logger.error( "Run {run_id} created successfully but failed to launch.". format(run_id=run_to_launch.run_id)) _check_for_debug_crash(debug_crash_flags, "RUN_LAUNCHED") tick_holder.update_with_status(ScheduleTickStatus.SUCCESS, run_id=run_to_launch.run_id) _check_for_debug_crash(debug_crash_flags, "TICK_SUCCESS")
def _create_external_pipeline_run( instance, repo_location, external_repo, external_pipeline, run_config, mode, preset, tags, solid_selection, run_id, ): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(repo_location, "repo_location", RepositoryLocation) check.inst_param(external_repo, "external_repo", ExternalRepository) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.opt_dict_param(run_config, "run_config") check.opt_str_param(mode, "mode") check.opt_str_param(preset, "preset") check.opt_dict_param(tags, "tags", key_type=str) check.opt_list_param(solid_selection, "solid_selection", of_type=str) check.opt_str_param(run_id, "run_id") run_config, mode, tags, solid_selection = _check_execute_external_pipeline_args( external_pipeline, run_config, mode, preset, tags, solid_selection, ) pipeline_name = external_pipeline.name pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=pipeline_name, solid_selection=solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result(pipeline_selector) external_pipeline_subset = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) pipeline_mode = mode or external_pipeline_subset.get_default_mode_name() external_execution_plan = repo_location.get_external_execution_plan( external_pipeline_subset, run_config, pipeline_mode, step_keys_to_execute=None, known_state=None, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot return instance.create_run( pipeline_name=pipeline_name, run_id=run_id, run_config=run_config, mode=pipeline_mode, solids_to_execute=external_pipeline_subset.solids_to_execute, step_keys_to_execute=None, solid_selection=solid_selection, status=None, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline_subset.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline_subset.parent_pipeline_snapshot, external_pipeline_origin=external_pipeline_subset.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), )
def _create_external_pipeline_run( instance: DagsterInstance, repo_location: RepositoryLocation, external_repo: ExternalRepository, external_pipeline: ExternalPipeline, run_config: Dict[str, object], mode: Optional[str], preset: Optional[str], tags: Optional[Mapping[str, object]], solid_selection: Optional[List[str]], run_id: Optional[str], ): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(repo_location, "repo_location", RepositoryLocation) check.inst_param(external_repo, "external_repo", ExternalRepository) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.opt_dict_param(run_config, "run_config", key_type=str) check.opt_str_param(mode, "mode") check.opt_str_param(preset, "preset") check.opt_dict_param(tags, "tags", key_type=str) check.opt_list_param(solid_selection, "solid_selection", of_type=str) check.opt_str_param(run_id, "run_id") run_config, mode, tags, solid_selection = _check_execute_external_pipeline_args( external_pipeline, run_config, mode, preset, tags, solid_selection, ) pipeline_name = external_pipeline.name pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=pipeline_name, solid_selection=solid_selection, ) external_pipeline = repo_location.get_external_pipeline(pipeline_selector) pipeline_mode = mode or external_pipeline.get_default_mode_name() external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, run_config, pipeline_mode, step_keys_to_execute=None, known_state=None, instance=instance, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot return instance.create_run( pipeline_name=pipeline_name, run_id=run_id, run_config=run_config, mode=pipeline_mode, solids_to_execute=external_pipeline.solids_to_execute, step_keys_to_execute=execution_plan_snapshot.step_keys_to_execute, solid_selection=solid_selection, status=None, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), )