def execute_pipeline(pipeline, environment_dict=None, run_config=None): ''' "Synchronous" version of :py:func:`execute_pipeline_iterator`. This is the entry point for dagster CLI and dagit execution. For the dagster-graphql entry point, see execute_plan() below. Parameters: pipeline (PipelineDefinition): Pipeline to run environment_dict (dict): The enviroment configuration that parameterizes this run run_config (RunConfig): Configuration for how this pipeline will be executed Returns: :py:class:`PipelineExecutionResult` ''' check.inst_param(pipeline, 'pipeline', PipelineDefinition) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict') run_config = check_run_config_param(run_config, pipeline) execution_plan = create_execution_plan(pipeline, environment_dict, run_config) with scoped_pipeline_context(pipeline, environment_dict, run_config) as pipeline_context: event_list = list( _execute_pipeline_iterator( pipeline_context, execution_plan=execution_plan, run_config=run_config, step_keys_to_execute=run_config.step_keys_to_execute, )) return PipelineExecutionResult( pipeline, run_config.run_id, event_list, lambda: scoped_pipeline_context( pipeline, environment_dict, run_config, system_storage_data=SystemStorageData( run_storage=pipeline_context.run_storage, intermediates_manager=pipeline_context. intermediates_manager, file_manager=pipeline_context.file_manager, ), ), )
def execute_pipeline( pipeline, environment_dict=None, run_config=None, instance=None, raise_on_error=True ): ''' "Synchronous" version of :py:func:`execute_pipeline_iterator`. This is the entry point for dagster CLI and dagit execution. For the dagster-graphql entry point, see execute_plan() below. Parameters: pipeline (PipelineDefinition): Pipeline to run environment_dict (dict): The enviroment configuration that parameterizes this run run_config (RunConfig): Configuration for how this pipeline will be executed instance (DagsterInstance): The instance to execute against, defaults to ephemeral (no artifacts persisted) raise_on_error (Bool): Whether or not to raise exceptions when they occur. Defaults to True since this behavior is useful in tests which is the most common use of this API. Returns: :py:class:`PipelineExecutionResult` ''' check.inst_param(pipeline, 'pipeline', PipelineDefinition) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict') run_config = check_run_config_param(run_config, pipeline) check.opt_inst_param(instance, 'instance', DagsterInstance) instance = instance or DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline, environment_dict, run_config) step_keys_to_execute = _resolve_step_keys(execution_plan, run_config.step_keys_to_execute) # run should be used and threaded through here # https://github.com/dagster-io/dagster/issues/1745 _run = _create_run(instance, pipeline, run_config, environment_dict) with scoped_pipeline_context( pipeline, environment_dict, run_config, instance, raise_on_error=raise_on_error ) as pipeline_context: event_list = list( _pipeline_execution_iterator( pipeline_context, execution_plan=execution_plan, run_config=run_config, step_keys_to_execute=step_keys_to_execute, ) ) return PipelineExecutionResult( pipeline, run_config.run_id, event_list, lambda: scoped_pipeline_context( pipeline, environment_dict, run_config, instance, system_storage_data=SystemStorageData( intermediates_manager=pipeline_context.intermediates_manager, file_manager=pipeline_context.file_manager, ), ), )
def execute_pipeline(pipeline, environment_dict=None, run_config=None, instance=None, raise_on_error=True): '''Execute a pipeline synchronously. Users will typically call this API when testing pipeline execution, or running standalone scripts. Parameters: pipeline (PipelineDefinition): The pipeline to execute. environment_dict (Optional[dict]): The enviroment configuration that parameterizes this run, as a dict. run_config (Optional[RunConfig]): Optionally specifies additional config options for pipeline execution. instance (Optional[DagsterInstance]): The instance to execute against. If this is ``None``, an ephemeral instance will be used, and no artifacts will be persisted from the run. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``True``, since this is the most useful behavior in test. Returns: :py:class:`PipelineExecutionResult`: The result of pipeline execution. For the asynchronous version, see :py:func:`execute_pipeline_iterator`. This is the entrypoint for dagster CLI execution. For the dagster-graphql entrypoint, see ``dagster.core.execution.api.execute_plan()``. ''' check.inst_param(pipeline, 'pipeline', PipelineDefinition) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict') run_config = check_run_config_param(run_config, pipeline) check.opt_inst_param(instance, 'instance', DagsterInstance) instance = instance or DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline, environment_dict, run_config) pipeline_run = _create_run(instance, pipeline, run_config, environment_dict) with scoped_pipeline_context( pipeline, environment_dict, pipeline_run, instance, raise_on_error=raise_on_error) as pipeline_context: event_list = list( _pipeline_execution_iterator(pipeline_context, execution_plan, pipeline_run)) return PipelineExecutionResult( pipeline, run_config.run_id, event_list, lambda: scoped_pipeline_context( pipeline, environment_dict, pipeline_run, instance, system_storage_data=SystemStorageData( intermediates_manager=pipeline_context. intermediates_manager, file_manager=pipeline_context.file_manager, ), ), )
def execute_run(pipeline, pipeline_run, instance, raise_on_error=False): '''Executes an existing pipeline run synchronously. Synchronous version of execute_run_iterator. Args: pipeline (ExecutablePipeline): The pipeline to execute. pipeline_run (PipelineRun): The run to execute instance (DagsterInstance): The instance in which the run has been created. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``False``. Returns: PipelineExecutionResult: The result of the execution. ''' if isinstance(pipeline, PipelineDefinition): raise DagsterInvariantViolationError( 'execute_run requires an ExecutablePipeline but received a PipelineDefinition ' 'directly instead. To support hand-off to other processes provide a ' 'ReconstructablePipeline which can be done using reconstructable(). For in ' 'process only execution you can use InMemoryExecutablePipeline.') check.inst_param(pipeline, 'pipeline', ExecutablePipeline) check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) check.inst_param(instance, 'instance', DagsterInstance) check.invariant(pipeline_run.status == PipelineRunStatus.NOT_STARTED) pipeline_def = pipeline.get_definition() if pipeline_run.solids_to_execute: if isinstance(pipeline_def, PipelineSubsetDefinition): check.invariant( pipeline_run.solids_to_execute == pipeline.solids_to_execute, 'Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that ' 'conflicts with pipeline subset {pipeline_solids_to_execute}.'. format( pipeline_solids_to_execute=str_format_set( pipeline.solids_to_execute), solids_to_execute=str_format_set( pipeline_run.solids_to_execute), ), ) else: # when `execute_run` is directly called, the sub pipeline hasn't been created # note that when we receive the solids to execute via PipelineRun, it won't support # solid selection query syntax pipeline = pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan = create_execution_plan( pipeline, environment_dict=pipeline_run.environment_dict, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, ) _execute_run_iterable = _ExecuteRunWithPlanIterable( execution_plan=execution_plan, pipeline_run=pipeline_run, instance=instance, iterator=_pipeline_execution_iterator, environment_dict=pipeline_run.environment_dict, retries=None, raise_on_error=raise_on_error, ) event_list = list(_execute_run_iterable) pipeline_context = _execute_run_iterable.pipeline_context return PipelineExecutionResult( pipeline.get_definition(), pipeline_run.run_id, event_list, lambda: scoped_pipeline_context( execution_plan, pipeline_run.environment_dict, pipeline_run, instance, system_storage_data=SystemStorageData( intermediates_manager=pipeline_context.intermediates_manager, file_manager=pipeline_context.file_manager, ), ), )
def execute_run(pipeline, pipeline_run, instance, raise_on_error=False): '''Executes an existing pipeline run synchronously. Synchronous version of execute_run_iterator. Args: pipeline (Union[ExecutablePipeline, PipelineDefinition]): The pipeline to execute. pipeline_run (PipelineRun): The run to execute instance (DagsterInstance): The instance in which the run has been created. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``False``. Returns: PipelineExecutionResult: The result of the execution. ''' pipeline, pipeline_def = _check_pipeline(pipeline) check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) check.inst_param(instance, 'instance', DagsterInstance) check.invariant(pipeline_run.status == PipelineRunStatus.NOT_STARTED) if pipeline_run.solid_subset: pipeline_def = pipeline.get_definition() if isinstance(pipeline_def, PipelineSubsetForExecution): check.invariant( len(pipeline_run.solid_subset) == len( pipeline_def.solid_subset) and set(pipeline_run.solid_subset) == set( pipeline_def.solid_subset), 'Cannot execute PipelineRun with solid_subset {solid_subset} that conflicts with ' 'pipeline subset {pipeline_solid_subset}.'.format( pipeline_solid_subset=str_format_list( pipeline_def.solid_subset), solid_subset=str_format_list(pipeline_run.solid_subset), ), ) else: pipeline = pipeline.subset_for_execution(pipeline_run.solid_subset) pipeline_def = pipeline.get_definition() execution_plan = create_execution_plan( pipeline, environment_dict=pipeline_run.environment_dict, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, ) _execute_run_iterable = _ExecuteRunWithPlanIterable( execution_plan=execution_plan, pipeline_run=pipeline_run, instance=instance, iterator=_pipeline_execution_iterator, environment_dict=pipeline_run.environment_dict, retries=None, raise_on_error=raise_on_error, ) event_list = list(_execute_run_iterable) pipeline_context = _execute_run_iterable.pipeline_context return PipelineExecutionResult( pipeline_def, pipeline_run.run_id, event_list, lambda: scoped_pipeline_context( execution_plan, pipeline_run.environment_dict, pipeline_run, instance, system_storage_data=SystemStorageData( intermediates_manager=pipeline_context.intermediates_manager, file_manager=pipeline_context.file_manager, ), ), )
def execute_pipeline( pipeline, environment_dict=None, mode=None, preset=None, tags=None, run_config=None, instance=None, raise_on_error=True, ): '''Execute a pipeline synchronously. Users will typically call this API when testing pipeline execution, or running standalone scripts. Parameters: pipeline (PipelineDefinition): The pipeline to execute. environment_dict (Optional[dict]): The environment configuration that parametrizes this run, as a dict. mode (Optional[str]): The name of the pipeline mode to use. You may not set both ``mode`` and ``preset``. preset (Optional[str]): The name of the pipeline preset to use. You may not set both ``mode`` and ``preset``. tags (Optional[Dict[str, Any]]): Arbitrary key-value pairs that will be added to pipeline logs. run_config (Optional[RunConfig]): Optionally specifies additional config options for pipeline execution. Deprecation notice: In 0.8.0, the use of `run_config` to set mode, tags, and step keys will be deprecated. In the interim, if you set a mode using `run_config`, this must match any mode set using `mode` or `preset`. If you set tags using `run_config`, any tags set using `tags` will take precedence. If you set step keys, these must be compatible with any solid subset specified using `preset`. instance (Optional[DagsterInstance]): The instance to execute against. If this is ``None``, an ephemeral instance will be used, and no artifacts will be persisted from the run. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``True``, since this is the most useful behavior in test. Returns: :py:class:`PipelineExecutionResult`: The result of pipeline execution. For the asynchronous version, see :py:func:`execute_pipeline_iterator`. This is the entrypoint for dagster CLI execution. For the dagster-graphql entrypoint, see ``dagster.core.execution.api.execute_plan()``. ''' ( pipeline, environment_dict, instance, mode, tags, run_config, execution_plan, ) = _check_execute_pipeline_args( 'execute_pipeline', pipeline=pipeline, environment_dict=environment_dict, mode=mode, preset=preset, tags=tags, run_config=run_config, instance=instance, ) pipeline_run = instance.create_run_for_pipeline( pipeline=pipeline, run_id=run_config.run_id, environment_dict=environment_dict, mode=mode, selector=pipeline.selector, step_keys_to_execute=run_config.step_keys_to_execute, tags=tags, root_run_id=run_config.previous_run_id, parent_run_id=run_config.previous_run_id, ) initialization_manager = pipeline_initialization_manager( pipeline, environment_dict, pipeline_run, instance, execution_plan, raise_on_error=raise_on_error, ) event_list = list(initialization_manager.generate_setup_events()) pipeline_context = initialization_manager.get_object() try: if pipeline_context: event_list.extend( _pipeline_execution_iterator(pipeline_context, execution_plan, pipeline_run) ) finally: event_list.extend(initialization_manager.generate_teardown_events()) return PipelineExecutionResult( pipeline, pipeline_run.run_id, event_list, lambda: scoped_pipeline_context( pipeline, environment_dict, pipeline_run, instance, execution_plan, system_storage_data=SystemStorageData( intermediates_manager=pipeline_context.intermediates_manager, file_manager=pipeline_context.file_manager, ), ), )
def execute_run(pipeline, pipeline_run, instance, raise_on_error=False): """Executes an existing pipeline run synchronously. Synchronous version of execute_run_iterator. Args: pipeline (IPipeline): The pipeline to execute. pipeline_run (PipelineRun): The run to execute instance (DagsterInstance): The instance in which the run has been created. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``False``. Returns: PipelineExecutionResult: The result of the execution. """ if isinstance(pipeline, PipelineDefinition): raise DagsterInvariantViolationError( "execute_run requires an IPipeline but received a PipelineDefinition " "directly instead. To support hand-off to other processes provide a " "ReconstructablePipeline which can be done using reconstructable(). For in " "process only execution you can use InMemoryPipeline.") check.inst_param(pipeline, "pipeline", IPipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) check.invariant( pipeline_run.status == PipelineRunStatus.NOT_STARTED, desc= "Pipeline run {} ({}) in state {}, expected PipelineRunStatus.NOT_STARTED" .format(pipeline_run.pipeline_name, pipeline_run.run_id, pipeline_run.status), ) pipeline_def = pipeline.get_definition() if pipeline_run.solids_to_execute: if isinstance(pipeline_def, PipelineSubsetDefinition): check.invariant( pipeline_run.solids_to_execute == pipeline.solids_to_execute, "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that " "conflicts with pipeline subset {pipeline_solids_to_execute}.". format( pipeline_solids_to_execute=str_format_set( pipeline.solids_to_execute), solids_to_execute=str_format_set( pipeline_run.solids_to_execute), ), ) else: # when `execute_run` is directly called, the sub pipeline hasn't been created # note that when we receive the solids to execute via PipelineRun, it won't support # solid selection query syntax pipeline = pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan = create_execution_plan( pipeline, run_config=pipeline_run.run_config, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, ) if is_memoized_run(pipeline_run.tags): execution_plan = instance.resolve_memoized_execution_plan( execution_plan, run_config=pipeline_run.run_config, mode=pipeline_run.mode) _execute_run_iterable = _ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=_pipeline_execution_iterator, execution_context_manager=PipelineExecutionContextManager( execution_plan=execution_plan, pipeline_run=pipeline_run, instance=instance, run_config=pipeline_run.run_config, raise_on_error=raise_on_error, ), ) event_list = list(_execute_run_iterable) pipeline_context = _execute_run_iterable.pipeline_context return PipelineExecutionResult( pipeline.get_definition(), pipeline_run.run_id, event_list, lambda: scoped_pipeline_context( execution_plan, pipeline_run.run_config, pipeline_run, instance, intermediate_storage=pipeline_context.intermediate_storage, system_storage_data=SystemStorageData( intermediate_storage=pipeline_context.intermediate_storage, file_manager=pipeline_context.file_manager, ), ), )