def external_pipeline_from_location_handle(repository_location_handle, external_pipeline_origin, solid_selection): check.inst_param(repository_location_handle, "repository_location_handle", RepositoryLocationHandle) check.inst_param(external_pipeline_origin, "external_pipeline_origin", ExternalPipelineOrigin) repo_location = repository_location_handle.create_location() repo_name = external_pipeline_origin.external_repository_origin.repository_name pipeline_name = external_pipeline_origin.pipeline_name check.invariant( repo_location.has_repository(repo_name), "Could not find repository {repo_name} in location {repo_location_name}" .format(repo_name=repo_name, repo_location_name=repo_location.name), ) external_repo = repo_location.get_repository(repo_name) pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=pipeline_name, solid_selection=solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) return external_pipeline
def external_pipeline_from_location_handle(repository_location_handle, pipeline_name, solid_selection): check.inst_param(repository_location_handle, "repository_location_handle", RepositoryLocationHandle) repo_location = RepositoryLocation.from_handle(repository_location_handle) repo_dict = repo_location.get_repositories() check.invariant( len(repo_dict) == 1, "Reconstructed repository location should have exactly one repository", ) external_repo = next(iter(repo_dict.values())) pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=pipeline_name, solid_selection=solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) return external_pipeline
def external_pipeline_from_run(pipeline_run): check.inst_param(pipeline_run, "pipeline_run", PipelineRun) external_pipeline_origin = check.inst( pipeline_run.external_pipeline_origin, ExternalPipelineOrigin) with RepositoryLocationHandle.create_from_repository_location_origin( external_pipeline_origin.external_repository_origin. repository_location_origin) as repo_location_handle: repo_location = RepositoryLocation.from_handle(repo_location_handle) repo_dict = repo_location.get_repositories() check.invariant( len(repo_dict) == 1, "Reconstructed repository location should have exactly one repository", ) external_repo = next(iter(repo_dict.values())) pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=pipeline_run.pipeline_name, solid_selection=pipeline_run.solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) yield external_pipeline
def _launch_scheduled_executions(instance, repo_location, external_repo, external_schedule, tick_context): pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=external_schedule.pipeline_name, solid_selection=external_schedule.solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) schedule_execution_data = repo_location.get_external_schedule_execution_data( instance=instance, repository_handle=external_repo.handle, schedule_name=external_schedule.name, scheduled_execution_time= None, # No way to know this in general for this scheduler ) if not schedule_execution_data.run_requests: # Update tick to skipped state and return tick_context.update_state(JobTickStatus.SKIPPED) tick_context.stream.send(ScheduledExecutionSkipped()) return for run_request in schedule_execution_data.run_requests: _launch_run(instance, repo_location, external_schedule, external_pipeline, tick_context, run_request) tick_context.update_state(JobTickStatus.SUCCESS)
def trigger_execution(graphene_info, trigger_selector): check.inst_param(graphene_info, "graphene_info", ResolveInfo) check.inst_param(trigger_selector, "trigger_selector", TriggerSelector) location = graphene_info.context.get_repository_location( trigger_selector.location_name) repository = location.get_repository(trigger_selector.repository_name) matches = [ triggered_execution for triggered_execution in repository.get_external_triggered_executions() if triggered_execution.name == trigger_selector.trigger_name ] launched_run_ids = [] for external_triggered_execution in matches: external_pipeline = repository.get_full_external_pipeline( external_triggered_execution.pipeline_name) result = graphene_info.context.get_external_triggered_execution_param_data( repository.handle, external_triggered_execution.name) if isinstance(result, ExternalExecutionParamsErrorData): continue assert isinstance(result, ExternalExecutionParamsData) pipeline_selector = PipelineSelector( location_name=location.name, repository_name=repository.name, pipeline_name=external_pipeline.name, solid_selection=external_triggered_execution.solid_selection, ) execution_params = ExecutionParams( selector=pipeline_selector, run_config=result.run_config, mode=external_triggered_execution.mode, execution_metadata=ExecutionMetadata(run_id=None, tags=result.tags), step_keys=None, ) pipeline_run = create_valid_pipeline_run(graphene_info, external_pipeline, execution_params) graphene_info.context.instance.launch_run(pipeline_run.run_id, external_pipeline) launched_run_ids.append(pipeline_run.run_id) return graphene_info.schema.type_named("TriggerExecutionSuccess")( launched_run_ids=launched_run_ids)
def external_pipeline_from_location(repo_location, external_pipeline_origin, solid_selection): check.inst_param(repo_location, "repository_location", RepositoryLocation) check.inst_param(external_pipeline_origin, "external_pipeline_origin", ExternalPipelineOrigin) repo_name = external_pipeline_origin.external_repository_origin.repository_name pipeline_name = external_pipeline_origin.pipeline_name check.invariant( repo_location.has_repository(repo_name), "Could not find repository {repo_name} in location {repo_location_name}" .format(repo_name=repo_name, repo_location_name=repo_location.name), ) external_repo = repo_location.get_repository(repo_name) pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=pipeline_name, solid_selection=solid_selection, ) return repo_location.get_external_pipeline(pipeline_selector)
def _create_external_pipeline_run( instance, repo_location, external_repo, external_pipeline, run_config, mode, preset, tags, solid_selection, run_id, ): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(repo_location, "repo_location", RepositoryLocation) check.inst_param(external_repo, "external_repo", ExternalRepository) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.opt_dict_param(run_config, "run_config") check.opt_str_param(mode, "mode") check.opt_str_param(preset, "preset") check.opt_dict_param(tags, "tags", key_type=str) check.opt_list_param(solid_selection, "solid_selection", of_type=str) check.opt_str_param(run_id, "run_id") run_config, mode, tags, solid_selection = _check_execute_external_pipeline_args( external_pipeline, run_config, mode, preset, tags, solid_selection, ) pipeline_name = external_pipeline.name pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=pipeline_name, solid_selection=solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result(pipeline_selector) external_pipeline_subset = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) pipeline_mode = mode or external_pipeline_subset.get_default_mode_name() external_execution_plan = repo_location.get_external_execution_plan( external_pipeline_subset, run_config, pipeline_mode, step_keys_to_execute=None, known_state=None, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot return instance.create_run( pipeline_name=pipeline_name, run_id=run_id, run_config=run_config, mode=pipeline_mode, solids_to_execute=external_pipeline_subset.solids_to_execute, step_keys_to_execute=None, solid_selection=solid_selection, status=None, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline_subset.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline_subset.parent_pipeline_snapshot, external_pipeline_origin=external_pipeline_subset.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), )
def to_selector(self): return PipelineSelector(self.location_name, self.repository_name, self.pipeline_name, None)
def _create_external_pipeline_run( instance, repo_location, external_repo, external_pipeline, run_config, mode, preset, tags, solid_selection, run_id, ): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(repo_location, "repo_location", RepositoryLocation) check.inst_param(external_repo, "external_repo", ExternalRepository) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.opt_dict_param(run_config, "run_config") check.opt_str_param(mode, "mode") check.opt_str_param(preset, "preset") check.opt_dict_param(tags, "tags", key_type=str) check.opt_list_param(solid_selection, "solid_selection", of_type=str) check.opt_str_param(run_id, "run_id") run_config, mode, tags, solid_selection = _check_execute_external_pipeline_args( external_pipeline, run_config, mode, preset, tags, solid_selection, ) pipeline_name = external_pipeline.name pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=pipeline_name, solid_selection=solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) if subset_pipeline_result.success == False: raise DagsterLaunchFailedError( "Failed to load external pipeline subset: {error_message}".format( error_message=subset_pipeline_result.error.message), serializable_error_info=subset_pipeline_result.error, ) external_pipeline_subset = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) pipeline_mode = mode or external_pipeline_subset.get_default_mode_name() external_execution_plan = repo_location.get_external_execution_plan( external_pipeline_subset, run_config, pipeline_mode, step_keys_to_execute=None, ) if isinstance(external_execution_plan, ExecutionPlanSnapshotErrorData): raise DagsterLaunchFailedError( "Failed to load external execution plan", serializable_error_info=external_execution_plan.error, ) else: execution_plan_snapshot = external_execution_plan.execution_plan_snapshot return instance.create_run( pipeline_name=pipeline_name, run_id=run_id, run_config=run_config, mode=pipeline_mode, solids_to_execute=external_pipeline_subset.solids_to_execute, step_keys_to_execute=None, solid_selection=solid_selection, status=None, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline_subset.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline_subset. parent_pipeline_snapshot, external_pipeline_origin=external_pipeline_subset.get_external_origin( ), )
def _launch_scheduled_execution(instance, repo_location, external_repo, external_schedule, tick, stream): pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=external_schedule.pipeline_name, solid_selection=external_schedule.solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) schedule_execution_data = repo_location.get_external_schedule_execution_data( instance=instance, repository_handle=external_repo.handle, schedule_name=external_schedule.name, schedule_execution_data_mode=ScheduleExecutionDataMode. LAUNCH_SCHEDULED_EXECUTION, scheduled_execution_time= None, # No way to know this in general for this scheduler ) run_config = {} schedule_tags = {} execution_plan_snapshot = None errors = [] if isinstance(schedule_execution_data, ExternalScheduleExecutionErrorData): error = schedule_execution_data.error tick.update_with_status(ScheduleTickStatus.FAILURE, error=error) stream.send(ScheduledExecutionFailed(run_id=None, errors=[error])) return elif not schedule_execution_data.should_execute: # Update tick to skipped state and return tick.update_with_status(ScheduleTickStatus.SKIPPED) stream.send(ScheduledExecutionSkipped()) return else: run_config = schedule_execution_data.run_config schedule_tags = schedule_execution_data.tags try: external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, run_config, external_schedule.mode, step_keys_to_execute=None, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot except DagsterSubprocessError as e: errors.extend(e.subprocess_error_infos) except Exception as e: # pylint: disable=broad-except errors.append(serializable_error_info_from_exc_info( sys.exc_info())) pipeline_tags = external_pipeline.tags or {} check_tags(pipeline_tags, "pipeline_tags") tags = merge_dicts(pipeline_tags, schedule_tags) # Enter the run in the DB with the information we have possibly_invalid_pipeline_run = instance.create_run( pipeline_name=external_schedule.pipeline_name, run_id=None, run_config=run_config, mode=external_schedule.mode, solids_to_execute=external_pipeline.solids_to_execute, step_keys_to_execute=None, solid_selection=external_pipeline.solid_selection, status=None, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, ) tick.update_with_status(ScheduleTickStatus.SUCCESS, run_id=possibly_invalid_pipeline_run.run_id) # If there were errors, inject them into the event log and fail the run if len(errors) > 0: for error in errors: instance.report_engine_event( error.message, possibly_invalid_pipeline_run, EngineEventData.engine_error(error), ) instance.report_run_failed(possibly_invalid_pipeline_run) stream.send( ScheduledExecutionFailed( run_id=possibly_invalid_pipeline_run.run_id, errors=errors)) return try: launched_run = instance.launch_run( possibly_invalid_pipeline_run.run_id, external_pipeline) except Exception: # pylint: disable=broad-except stream.send( ScheduledExecutionFailed( run_id=possibly_invalid_pipeline_run.run_id, errors=[error])) return stream.send(ScheduledExecutionSuccess(run_id=launched_run.run_id)) return
def _create_external_pipeline_run( instance: DagsterInstance, repo_location: RepositoryLocation, external_repo: ExternalRepository, external_pipeline: ExternalPipeline, run_config: Dict[str, object], mode: Optional[str], preset: Optional[str], tags: Optional[Mapping[str, object]], solid_selection: Optional[List[str]], run_id: Optional[str], ): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(repo_location, "repo_location", RepositoryLocation) check.inst_param(external_repo, "external_repo", ExternalRepository) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.opt_dict_param(run_config, "run_config", key_type=str) check.opt_str_param(mode, "mode") check.opt_str_param(preset, "preset") check.opt_dict_param(tags, "tags", key_type=str) check.opt_list_param(solid_selection, "solid_selection", of_type=str) check.opt_str_param(run_id, "run_id") run_config, mode, tags, solid_selection = _check_execute_external_pipeline_args( external_pipeline, run_config, mode, preset, tags, solid_selection, ) pipeline_name = external_pipeline.name pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=pipeline_name, solid_selection=solid_selection, ) external_pipeline = repo_location.get_external_pipeline(pipeline_selector) pipeline_mode = mode or external_pipeline.get_default_mode_name() external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, run_config, pipeline_mode, step_keys_to_execute=None, known_state=None, instance=instance, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot return instance.create_run( pipeline_name=pipeline_name, run_id=run_id, run_config=run_config, mode=pipeline_mode, solids_to_execute=external_pipeline.solids_to_execute, step_keys_to_execute=execution_plan_snapshot.step_keys_to_execute, solid_selection=solid_selection, status=None, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), )
def create_and_launch_partition_backfill(graphene_info, backfill_params): partition_set_selector = backfill_params.get("selector") partition_set_name = partition_set_selector.get("partitionSetName") repository_selector = RepositorySelector.from_graphql_input( partition_set_selector.get("repositorySelector")) location = graphene_info.context.get_repository_location( repository_selector.location_name) repository = location.get_repository(repository_selector.repository_name) matches = [ partition_set for partition_set in repository.get_external_partition_sets() if partition_set.name == partition_set_selector.get("partitionSetName") ] if not matches: return graphene_info.schema.type_named("PartitionSetNotFoundError")( partition_set_name) check.invariant( len(matches) == 1, "Partition set names must be unique: found {num} matches for {partition_set_name}" .format(num=len(matches), partition_set_name=partition_set_name), ) external_partition_set = next(iter(matches)) external_pipeline = repository.get_full_external_pipeline( external_partition_set.pipeline_name) pipeline_selector = PipelineSelector( location_name=location.name, repository_name=repository.name, pipeline_name=external_pipeline.name, solid_selection=external_partition_set.solid_selection, ) partition_names = backfill_params.get("partitionNames") backfill_id = make_new_backfill_id() result = graphene_info.context.get_external_partition_set_execution_param_data( repository.handle, partition_set_name, partition_names) if isinstance(result, ExternalPartitionExecutionErrorData): return graphene_info.schema.type_named("PythonError")(result.error) assert isinstance(result, ExternalPartitionSetExecutionParamData) launched_run_ids = [] execution_param_list = _build_execution_param_list_for_backfill( graphene_info.context.instance, result.partition_data, backfill_id, backfill_params, pipeline_selector, external_partition_set, ) for execution_params in execution_param_list: pipeline_run = create_valid_pipeline_run(graphene_info, external_pipeline, execution_params) graphene_info.context.instance.launch_run(pipeline_run.run_id, external_pipeline) launched_run_ids.append(pipeline_run.run_id) return graphene_info.schema.type_named("PartitionBackfillSuccess")( backfill_id=backfill_id, launched_run_ids=launched_run_ids)