def execute_run_iterator(pipeline, pipeline_run, instance): check.inst_param(pipeline, 'pipeline', ExecutablePipeline) check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) check.inst_param(instance, 'instance', DagsterInstance) check.invariant(pipeline_run.status == PipelineRunStatus.NOT_STARTED) if pipeline_run.solid_subset: pipeline_def = pipeline.get_definition() if isinstance(pipeline_def, PipelineSubsetForExecution): check.invariant( len(pipeline_run.solid_subset) == len( pipeline_def.solid_subset) and set(pipeline_run.solid_subset) == set( pipeline_def.solid_subset), 'Cannot execute PipelineRun with solid_subset {solid_subset} that conflicts with ' 'pipeline subset {pipeline_solid_subset}.'.format( pipeline_solid_subset=str_format_list( pipeline_def.solid_subset), solid_subset=str_format_list(pipeline_run.solid_subset), ), ) else: pipeline = pipeline.subset_for_execution(pipeline_run.solid_subset) execution_plan = create_execution_plan( pipeline, environment_dict=pipeline_run.environment_dict, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, ) return iter( _ExecuteRunWithPlanIterable( execution_plan=execution_plan, pipeline_run=pipeline_run, instance=instance, iterator=_pipeline_execution_iterator, environment_dict=pipeline_run.environment_dict, retries=None, raise_on_error=False, ))
def create_run_for_pipeline( self, pipeline_def, execution_plan=None, run_id=None, run_config=None, mode=None, solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, solid_selection=None, ): from dagster.core.execution.api import create_execution_plan from dagster.core.execution.plan.plan import ExecutionPlan from dagster.core.snap import snapshot_from_execution_plan check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) check.opt_inst_param(execution_plan, "execution_plan", ExecutionPlan) # note that solids_to_execute is required to execute the solid subset, which is the # frozenset version of the previous solid_subset. # solid_selection is not required and will not be converted to solids_to_execute here. # i.e. this function doesn't handle solid queries. # solid_selection is only used to pass the user queries further down. check.opt_set_param(solids_to_execute, "solids_to_execute", of_type=str) check.opt_list_param(solid_selection, "solid_selection", of_type=str) if solids_to_execute: if isinstance(pipeline_def, PipelineSubsetDefinition): # for the case when pipeline_def is created by IPipeline or ExternalPipeline check.invariant( solids_to_execute == pipeline_def.solids_to_execute, "Cannot create a PipelineRun from pipeline subset {pipeline_solids_to_execute} " "that conflicts with solids_to_execute arg {solids_to_execute}".format( pipeline_solids_to_execute=str_format_list(pipeline_def.solids_to_execute), solids_to_execute=str_format_list(solids_to_execute), ), ) else: # for cases when `create_run_for_pipeline` is directly called pipeline_def = pipeline_def.get_pipeline_subset_def( solids_to_execute=solids_to_execute ) full_execution_plan = execution_plan or create_execution_plan( pipeline_def, run_config=run_config, mode=mode, ) check.invariant( len(full_execution_plan.step_keys_to_execute) == len(full_execution_plan.steps) ) if is_memoized_run(tags): if step_keys_to_execute: raise DagsterInvariantViolationError( "step_keys_to_execute parameter cannot be used in conjunction with memoized " "pipeline runs." ) subsetted_execution_plan = self.resolve_memoized_execution_plan( full_execution_plan, run_config=run_config, mode=mode, ) # TODO: tighter integration with existing step_keys_to_execute functionality step_keys_to_execute = subsetted_execution_plan.step_keys_to_execute else: subsetted_execution_plan = ( full_execution_plan.build_subset_plan(step_keys_to_execute) if step_keys_to_execute else full_execution_plan ) return self.create_run( pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config, mode=check.opt_str_param(mode, "mode", default=pipeline_def.get_default_mode_name()), solid_selection=solid_selection, solids_to_execute=solids_to_execute, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( subsetted_execution_plan, pipeline_def.get_pipeline_snapshot_id() ), parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(), )
def create_run_for_pipeline( self, pipeline_def, execution_plan=None, run_id=None, run_config=None, mode=None, solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, solid_selection=None, ): from dagster.core.execution.api import create_execution_plan from dagster.core.execution.plan.plan import ExecutionPlan from dagster.core.snap import snapshot_from_execution_plan check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) check.opt_inst_param(execution_plan, 'execution_plan', ExecutionPlan) # note that solids_to_execute is required to execute the solid subset, which is the # frozenset version of the previous solid_subset. # solid_selection is not required and will not be converted to solids_to_execute here. # i.e. this function doesn't handle solid queries. # solid_selection is only used to pass the user queries further down. check.opt_set_param(solids_to_execute, 'solids_to_execute', of_type=str) check.opt_list_param(solid_selection, 'solid_selection', of_type=str) if solids_to_execute: if isinstance(pipeline_def, PipelineSubsetDefinition): # for the case when pipeline_def is created by ExecutablePipeline or ExternalPipeline check.invariant( solids_to_execute == pipeline_def.solids_to_execute, 'Cannot create a PipelineRun from pipeline subset {pipeline_solids_to_execute} ' 'that conflicts with solids_to_execute arg {solids_to_execute}'.format( pipeline_solids_to_execute=str_format_list(pipeline_def.solids_to_execute), solids_to_execute=str_format_list(solids_to_execute), ), ) else: # for cases when `create_run_for_pipeline` is directly called pipeline_def = pipeline_def.get_pipeline_subset_def( solids_to_execute=solids_to_execute ) if execution_plan is None: execution_plan = create_execution_plan( pipeline_def, run_config=run_config, mode=mode, step_keys_to_execute=step_keys_to_execute, ) return self.create_run( pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config, mode=check.opt_str_param(mode, 'mode', default=pipeline_def.get_default_mode_name()), solid_selection=solid_selection, solids_to_execute=solids_to_execute, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id() ), parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(), )
def execute_run(pipeline, pipeline_run, instance, raise_on_error=False): '''Executes an existing pipeline run synchronously. Synchronous version of execute_run_iterator. Args: pipeline (Union[ExecutablePipeline, PipelineDefinition]): The pipeline to execute. pipeline_run (PipelineRun): The run to execute instance (DagsterInstance): The instance in which the run has been created. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``False``. Returns: PipelineExecutionResult: The result of the execution. ''' pipeline, pipeline_def = _check_pipeline(pipeline) check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) check.inst_param(instance, 'instance', DagsterInstance) check.invariant(pipeline_run.status == PipelineRunStatus.NOT_STARTED) if pipeline_run.solid_subset: pipeline_def = pipeline.get_definition() if isinstance(pipeline_def, PipelineSubsetForExecution): check.invariant( len(pipeline_run.solid_subset) == len( pipeline_def.solid_subset) and set(pipeline_run.solid_subset) == set( pipeline_def.solid_subset), 'Cannot execute PipelineRun with solid_subset {solid_subset} that conflicts with ' 'pipeline subset {pipeline_solid_subset}.'.format( pipeline_solid_subset=str_format_list( pipeline_def.solid_subset), solid_subset=str_format_list(pipeline_run.solid_subset), ), ) else: pipeline = pipeline.subset_for_execution(pipeline_run.solid_subset) pipeline_def = pipeline.get_definition() execution_plan = create_execution_plan( pipeline, environment_dict=pipeline_run.environment_dict, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, ) _execute_run_iterable = _ExecuteRunWithPlanIterable( execution_plan=execution_plan, pipeline_run=pipeline_run, instance=instance, iterator=_pipeline_execution_iterator, environment_dict=pipeline_run.environment_dict, retries=None, raise_on_error=raise_on_error, ) event_list = list(_execute_run_iterable) pipeline_context = _execute_run_iterable.pipeline_context return PipelineExecutionResult( pipeline_def, pipeline_run.run_id, event_list, lambda: scoped_pipeline_context( execution_plan, pipeline_run.environment_dict, pipeline_run, instance, system_storage_data=SystemStorageData( intermediates_manager=pipeline_context.intermediates_manager, file_manager=pipeline_context.file_manager, ), ), )
def create_run_for_pipeline( self, pipeline_def, execution_plan=None, run_id=None, environment_dict=None, mode=None, solid_subset=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, ): from dagster.core.execution.api import create_execution_plan from dagster.core.execution.plan.plan import ExecutionPlan from dagster.core.snap import snapshot_from_execution_plan check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) check.opt_inst_param(execution_plan, 'execution_plan', ExecutionPlan) if solid_subset: if isinstance(pipeline_def, PipelineSubsetForExecution): check.invariant( len(solid_subset) == len(pipeline_def.solid_subset) and set(solid_subset) == set(pipeline_def.solid_subset), 'Cannot create a PipelineRun from pipeline subset {pipeline_solid_subset} that ' 'conflicts with solid_subset arg {solid_subset}'.format( pipeline_solid_subset=str_format_list( pipeline_def.solid_subset), solid_subset=str_format_list(solid_subset), ), ) else: pipeline_def = pipeline_def.subset_for_execution( solid_subset=solid_subset) if execution_plan is None: execution_plan = create_execution_plan( pipeline_def, environment_dict=environment_dict, mode=mode, step_keys_to_execute=step_keys_to_execute, ) return self.create_run( pipeline_name=pipeline_def.name, run_id=run_id, environment_dict=environment_dict, mode=check.opt_str_param( mode, 'mode', default=pipeline_def.get_default_mode_name()), solid_subset=solid_subset, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id()), parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot( ), )