Ejemplo n.º 1
0
def execute_run_iterator(pipeline, pipeline_run, instance):
    check.inst_param(pipeline, 'pipeline', ExecutablePipeline)
    check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)
    check.inst_param(instance, 'instance', DagsterInstance)
    check.invariant(pipeline_run.status == PipelineRunStatus.NOT_STARTED)

    if pipeline_run.solid_subset:
        pipeline_def = pipeline.get_definition()
        if isinstance(pipeline_def, PipelineSubsetForExecution):
            check.invariant(
                len(pipeline_run.solid_subset) == len(
                    pipeline_def.solid_subset)
                and set(pipeline_run.solid_subset) == set(
                    pipeline_def.solid_subset),
                'Cannot execute PipelineRun with solid_subset {solid_subset} that conflicts with '
                'pipeline subset {pipeline_solid_subset}.'.format(
                    pipeline_solid_subset=str_format_list(
                        pipeline_def.solid_subset),
                    solid_subset=str_format_list(pipeline_run.solid_subset),
                ),
            )
        else:
            pipeline = pipeline.subset_for_execution(pipeline_run.solid_subset)

    execution_plan = create_execution_plan(
        pipeline,
        environment_dict=pipeline_run.environment_dict,
        mode=pipeline_run.mode,
        step_keys_to_execute=pipeline_run.step_keys_to_execute,
    )

    return iter(
        _ExecuteRunWithPlanIterable(
            execution_plan=execution_plan,
            pipeline_run=pipeline_run,
            instance=instance,
            iterator=_pipeline_execution_iterator,
            environment_dict=pipeline_run.environment_dict,
            retries=None,
            raise_on_error=False,
        ))
Ejemplo n.º 2
0
    def create_run_for_pipeline(
        self,
        pipeline_def,
        execution_plan=None,
        run_id=None,
        run_config=None,
        mode=None,
        solids_to_execute=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        solid_selection=None,
    ):
        from dagster.core.execution.api import create_execution_plan
        from dagster.core.execution.plan.plan import ExecutionPlan
        from dagster.core.snap import snapshot_from_execution_plan

        check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition)
        check.opt_inst_param(execution_plan, "execution_plan", ExecutionPlan)

        # note that solids_to_execute is required to execute the solid subset, which is the
        # frozenset version of the previous solid_subset.
        # solid_selection is not required and will not be converted to solids_to_execute here.
        # i.e. this function doesn't handle solid queries.
        # solid_selection is only used to pass the user queries further down.
        check.opt_set_param(solids_to_execute, "solids_to_execute", of_type=str)
        check.opt_list_param(solid_selection, "solid_selection", of_type=str)

        if solids_to_execute:
            if isinstance(pipeline_def, PipelineSubsetDefinition):
                # for the case when pipeline_def is created by IPipeline or ExternalPipeline
                check.invariant(
                    solids_to_execute == pipeline_def.solids_to_execute,
                    "Cannot create a PipelineRun from pipeline subset {pipeline_solids_to_execute} "
                    "that conflicts with solids_to_execute arg {solids_to_execute}".format(
                        pipeline_solids_to_execute=str_format_list(pipeline_def.solids_to_execute),
                        solids_to_execute=str_format_list(solids_to_execute),
                    ),
                )
            else:
                # for cases when `create_run_for_pipeline` is directly called
                pipeline_def = pipeline_def.get_pipeline_subset_def(
                    solids_to_execute=solids_to_execute
                )

        full_execution_plan = execution_plan or create_execution_plan(
            pipeline_def, run_config=run_config, mode=mode,
        )
        check.invariant(
            len(full_execution_plan.step_keys_to_execute) == len(full_execution_plan.steps)
        )

        if is_memoized_run(tags):
            if step_keys_to_execute:
                raise DagsterInvariantViolationError(
                    "step_keys_to_execute parameter cannot be used in conjunction with memoized "
                    "pipeline runs."
                )

            subsetted_execution_plan = self.resolve_memoized_execution_plan(
                full_execution_plan, run_config=run_config, mode=mode,
            )  # TODO: tighter integration with existing step_keys_to_execute functionality
            step_keys_to_execute = subsetted_execution_plan.step_keys_to_execute
        else:
            subsetted_execution_plan = (
                full_execution_plan.build_subset_plan(step_keys_to_execute)
                if step_keys_to_execute
                else full_execution_plan
            )

        return self.create_run(
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            run_config=run_config,
            mode=check.opt_str_param(mode, "mode", default=pipeline_def.get_default_mode_name()),
            solid_selection=solid_selection,
            solids_to_execute=solids_to_execute,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
            pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                subsetted_execution_plan, pipeline_def.get_pipeline_snapshot_id()
            ),
            parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(),
        )
Ejemplo n.º 3
0
    def create_run_for_pipeline(
        self,
        pipeline_def,
        execution_plan=None,
        run_id=None,
        run_config=None,
        mode=None,
        solids_to_execute=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        solid_selection=None,
    ):
        from dagster.core.execution.api import create_execution_plan
        from dagster.core.execution.plan.plan import ExecutionPlan
        from dagster.core.snap import snapshot_from_execution_plan

        check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition)
        check.opt_inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        # note that solids_to_execute is required to execute the solid subset, which is the
        # frozenset version of the previous solid_subset.
        # solid_selection is not required and will not be converted to solids_to_execute here.
        # i.e. this function doesn't handle solid queries.
        # solid_selection is only used to pass the user queries further down.
        check.opt_set_param(solids_to_execute, 'solids_to_execute', of_type=str)
        check.opt_list_param(solid_selection, 'solid_selection', of_type=str)

        if solids_to_execute:
            if isinstance(pipeline_def, PipelineSubsetDefinition):
                # for the case when pipeline_def is created by ExecutablePipeline or ExternalPipeline
                check.invariant(
                    solids_to_execute == pipeline_def.solids_to_execute,
                    'Cannot create a PipelineRun from pipeline subset {pipeline_solids_to_execute} '
                    'that conflicts with solids_to_execute arg {solids_to_execute}'.format(
                        pipeline_solids_to_execute=str_format_list(pipeline_def.solids_to_execute),
                        solids_to_execute=str_format_list(solids_to_execute),
                    ),
                )
            else:
                # for cases when `create_run_for_pipeline` is directly called
                pipeline_def = pipeline_def.get_pipeline_subset_def(
                    solids_to_execute=solids_to_execute
                )

        if execution_plan is None:
            execution_plan = create_execution_plan(
                pipeline_def,
                run_config=run_config,
                mode=mode,
                step_keys_to_execute=step_keys_to_execute,
            )

        return self.create_run(
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            run_config=run_config,
            mode=check.opt_str_param(mode, 'mode', default=pipeline_def.get_default_mode_name()),
            solid_selection=solid_selection,
            solids_to_execute=solids_to_execute,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
            pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                execution_plan, pipeline_def.get_pipeline_snapshot_id()
            ),
            parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(),
        )
Ejemplo n.º 4
0
def execute_run(pipeline, pipeline_run, instance, raise_on_error=False):
    '''Executes an existing pipeline run synchronously.

    Synchronous version of execute_run_iterator.

    Args:
        pipeline (Union[ExecutablePipeline, PipelineDefinition]): The pipeline to execute.
        pipeline_run (PipelineRun): The run to execute
        instance (DagsterInstance): The instance in which the run has been created.
        raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur.
            Defaults to ``False``.
    
    Returns:
        PipelineExecutionResult: The result of the execution.
    '''
    pipeline, pipeline_def = _check_pipeline(pipeline)

    check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)
    check.inst_param(instance, 'instance', DagsterInstance)
    check.invariant(pipeline_run.status == PipelineRunStatus.NOT_STARTED)

    if pipeline_run.solid_subset:
        pipeline_def = pipeline.get_definition()
        if isinstance(pipeline_def, PipelineSubsetForExecution):
            check.invariant(
                len(pipeline_run.solid_subset) == len(
                    pipeline_def.solid_subset)
                and set(pipeline_run.solid_subset) == set(
                    pipeline_def.solid_subset),
                'Cannot execute PipelineRun with solid_subset {solid_subset} that conflicts with '
                'pipeline subset {pipeline_solid_subset}.'.format(
                    pipeline_solid_subset=str_format_list(
                        pipeline_def.solid_subset),
                    solid_subset=str_format_list(pipeline_run.solid_subset),
                ),
            )
        else:
            pipeline = pipeline.subset_for_execution(pipeline_run.solid_subset)
            pipeline_def = pipeline.get_definition()

    execution_plan = create_execution_plan(
        pipeline,
        environment_dict=pipeline_run.environment_dict,
        mode=pipeline_run.mode,
        step_keys_to_execute=pipeline_run.step_keys_to_execute,
    )

    _execute_run_iterable = _ExecuteRunWithPlanIterable(
        execution_plan=execution_plan,
        pipeline_run=pipeline_run,
        instance=instance,
        iterator=_pipeline_execution_iterator,
        environment_dict=pipeline_run.environment_dict,
        retries=None,
        raise_on_error=raise_on_error,
    )
    event_list = list(_execute_run_iterable)
    pipeline_context = _execute_run_iterable.pipeline_context

    return PipelineExecutionResult(
        pipeline_def,
        pipeline_run.run_id,
        event_list,
        lambda: scoped_pipeline_context(
            execution_plan,
            pipeline_run.environment_dict,
            pipeline_run,
            instance,
            system_storage_data=SystemStorageData(
                intermediates_manager=pipeline_context.intermediates_manager,
                file_manager=pipeline_context.file_manager,
            ),
        ),
    )
Ejemplo n.º 5
0
    def create_run_for_pipeline(
        self,
        pipeline_def,
        execution_plan=None,
        run_id=None,
        environment_dict=None,
        mode=None,
        solid_subset=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
    ):
        from dagster.core.execution.api import create_execution_plan
        from dagster.core.execution.plan.plan import ExecutionPlan
        from dagster.core.snap import snapshot_from_execution_plan

        check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition)
        check.opt_inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        if solid_subset:
            if isinstance(pipeline_def, PipelineSubsetForExecution):
                check.invariant(
                    len(solid_subset) == len(pipeline_def.solid_subset)
                    and set(solid_subset) == set(pipeline_def.solid_subset),
                    'Cannot create a PipelineRun from pipeline subset {pipeline_solid_subset} that '
                    'conflicts with solid_subset arg {solid_subset}'.format(
                        pipeline_solid_subset=str_format_list(
                            pipeline_def.solid_subset),
                        solid_subset=str_format_list(solid_subset),
                    ),
                )
            else:
                pipeline_def = pipeline_def.subset_for_execution(
                    solid_subset=solid_subset)

        if execution_plan is None:
            execution_plan = create_execution_plan(
                pipeline_def,
                environment_dict=environment_dict,
                mode=mode,
                step_keys_to_execute=step_keys_to_execute,
            )

        return self.create_run(
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=check.opt_str_param(
                mode, 'mode', default=pipeline_def.get_default_mode_name()),
            solid_subset=solid_subset,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
            pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                execution_plan, pipeline_def.get_pipeline_snapshot_id()),
            parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(
            ),
        )