Beispiel #1
0
def _execute_step_command_body(args: ExecuteStepArgs,
                               instance: DagsterInstance,
                               pipeline_run: PipelineRun):
    single_step_key = (args.step_keys_to_execute[0]
                       if args.step_keys_to_execute
                       and len(args.step_keys_to_execute) == 1 else None)
    try:
        check.inst(
            pipeline_run,
            PipelineRun,
            "Pipeline run with id '{}' not found for step execution".format(
                args.pipeline_run_id),
        )

        if args.should_verify_step:
            success = verify_step(
                instance,
                pipeline_run,
                check.not_none(args.known_state).get_retry_state(),
                args.step_keys_to_execute,
            )
            if not success:
                return

        recon_pipeline = recon_pipeline_from_origin(
            args.pipeline_origin).subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute)

        execution_plan = create_execution_plan(
            recon_pipeline,
            run_config=pipeline_run.run_config,
            step_keys_to_execute=args.step_keys_to_execute,
            mode=pipeline_run.mode,
            known_state=args.known_state,
        )

        yield from execute_plan_iterator(
            execution_plan,
            recon_pipeline,
            pipeline_run,
            instance,
            run_config=pipeline_run.run_config,
            retry_mode=args.retry_mode,
        )
    except (KeyboardInterrupt, DagsterExecutionInterruptedError):
        yield instance.report_engine_event(
            message="Step execution terminated by interrupt",
            pipeline_run=pipeline_run,
            step_key=single_step_key,
        )
        raise
    except Exception:
        yield instance.report_engine_event(
            "An exception was thrown during step execution that is likely a framework error, rather than an error in user code.",
            pipeline_run,
            EngineEventData.engine_error(
                serializable_error_info_from_exc_info(sys.exc_info())),
            step_key=single_step_key,
        )
        raise
Beispiel #2
0
def execute_run_host_mode(
    pipeline: ReconstructablePipeline,
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    executor_defs: Optional[List[ExecutorDefinition]] = None,
    raise_on_error: bool = False,
):
    check.inst_param(pipeline, "pipeline", ReconstructablePipeline)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)
    check.opt_list_param(executor_defs,
                         "executor_defs",
                         of_type=ExecutorDefinition)
    executor_defs = executor_defs if executor_defs != None else default_executors

    if pipeline_run.status == PipelineRunStatus.CANCELED:
        message = "Not starting execution since the run was canceled before execution could start"
        instance.report_engine_event(
            message,
            pipeline_run,
        )
        raise DagsterInvariantViolationError(message)

    check.invariant(
        pipeline_run.status == PipelineRunStatus.NOT_STARTED
        or pipeline_run.status == PipelineRunStatus.STARTING,
        desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING"
        .format(pipeline_run.pipeline_name, pipeline_run.run_id,
                pipeline_run.status),
    )

    if pipeline_run.solids_to_execute:
        pipeline = pipeline.subset_for_execution_from_existing_pipeline(
            frozenset(pipeline_run.solids_to_execute))

    execution_plan_snapshot = instance.get_execution_plan_snapshot(
        pipeline_run.execution_plan_snapshot_id)
    execution_plan = ExecutionPlan.rebuild_from_snapshot(
        pipeline_run.pipeline_name,
        execution_plan_snapshot,
    )

    _execute_run_iterable = ExecuteRunWithPlanIterable(
        execution_plan=execution_plan,
        iterator=pipeline_execution_iterator,
        execution_context_manager=PlanOrchestrationContextManager(
            context_event_generator=host_mode_execution_context_event_generator,
            pipeline=pipeline,
            execution_plan=execution_plan,
            run_config=pipeline_run.run_config,
            pipeline_run=pipeline_run,
            instance=instance,
            raise_on_error=raise_on_error,
            executor_defs=executor_defs,
            output_capture=None,
        ),
    )
    event_list = list(_execute_run_iterable)
    return event_list
Beispiel #3
0
def execute_run_host_mode(
    pipeline: ReconstructablePipeline,
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    get_executor_def_fn: Callable[[Optional[str]], ExecutorDefinition] = None,
    raise_on_error: bool = False,
):
    check.inst_param(pipeline, "pipeline", ReconstructablePipeline)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)
    check.opt_callable_param(get_executor_def_fn, "get_executor_def_fn")

    if pipeline_run.status == PipelineRunStatus.CANCELED:
        message = "Not starting execution since the run was canceled before execution could start"
        instance.report_engine_event(
            message,
            pipeline_run,
        )
        raise DagsterInvariantViolationError(message)

    check.invariant(
        pipeline_run.status == PipelineRunStatus.NOT_STARTED
        or pipeline_run.status == PipelineRunStatus.STARTING,
        desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING"
        .format(pipeline_run.pipeline_name, pipeline_run.run_id,
                pipeline_run.status),
    )

    if pipeline_run.solids_to_execute:
        pipeline = pipeline.subset_for_execution_from_existing_pipeline(
            pipeline_run.solids_to_execute)

    execution_plan_snapshot = instance.get_execution_plan_snapshot(
        pipeline_run.execution_plan_snapshot_id)
    execution_plan = ExecutionPlan.rebuild_from_snapshot(
        pipeline_run.pipeline_name,
        execution_plan_snapshot,
    )

    _execute_run_iterable = ExecuteRunWithPlanIterable(
        execution_plan=execution_plan,
        iterator=pipeline_execution_iterator,
        execution_context_manager=HostModeRunWorkerExecutionContextManager(
            execution_plan=execution_plan,
            recon_pipeline=pipeline,
            pipeline_run=pipeline_run,
            instance=instance,
            run_config=pipeline_run.run_config,
            get_executor_def_fn=get_executor_def_fn,
            raise_on_error=raise_on_error,
        ),
    )
    event_list = list(_execute_run_iterable)
    return event_list
Beispiel #4
0
def core_execute_run(
    recon_pipeline: ReconstructablePipeline,
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    resume_from_failure: bool = False,
) -> Generator[DagsterEvent, None, None]:
    check.inst_param(recon_pipeline, "recon_pipeline", ReconstructablePipeline)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)

    # try to load the pipeline definition early
    try:
        recon_pipeline.get_definition()
    except Exception:
        yield instance.report_engine_event(
            "Could not load pipeline definition.",
            pipeline_run,
            EngineEventData.engine_error(
                serializable_error_info_from_exc_info(sys.exc_info())),
        )
        yield from _report_run_failed_if_not_finished(instance,
                                                      pipeline_run.run_id)
        raise
    try:
        yield from execute_run_iterator(
            recon_pipeline,
            pipeline_run,
            instance,
            resume_from_failure=resume_from_failure)
    except (KeyboardInterrupt, DagsterExecutionInterruptedError):
        yield from _report_run_failed_if_not_finished(instance,
                                                      pipeline_run.run_id)
        yield instance.report_engine_event(
            message="Run execution terminated by interrupt",
            pipeline_run=pipeline_run,
        )
        raise
    except Exception:
        yield instance.report_engine_event(
            "An exception was thrown during execution that is likely a framework error, "
            "rather than an error in user code.",
            pipeline_run,
            EngineEventData.engine_error(
                serializable_error_info_from_exc_info(sys.exc_info())),
        )
        yield from _report_run_failed_if_not_finished(instance,
                                                      pipeline_run.run_id)
        raise
Beispiel #5
0
def _resume_run_command_body(
    recon_pipeline: ReconstructablePipeline,
    pipeline_run_id: Optional[str],
    instance: DagsterInstance,
    write_stream_fn: Callable[[DagsterEvent], Any],
    set_exit_code_on_failure: bool,
):
    if instance.should_start_background_run_thread:
        cancellation_thread, cancellation_thread_shutdown_event = start_run_cancellation_thread(
            instance, pipeline_run_id)
    pipeline_run = instance.get_run_by_id(pipeline_run_id)
    check.inst(
        pipeline_run,
        PipelineRun,
        "Pipeline run with id '{}' not found for run execution.".format(
            pipeline_run_id),
    )

    pid = os.getpid()
    instance.report_engine_event(
        "Started process for resuming pipeline (pid: {pid}).".format(pid=pid),
        pipeline_run,
        EngineEventData.in_process(pid, marker_end="cli_api_subprocess_init"),
    )

    run_worker_failed = False

    try:
        for event in core_execute_run(
                recon_pipeline,
                pipeline_run,
                instance,
                resume_from_failure=True,
        ):
            write_stream_fn(event)
            if event.event_type == DagsterEventType.PIPELINE_FAILURE:
                run_worker_failed = True

    except:
        # relies on core_execute_run writing failures to the event log before raising
        run_worker_failed = True
    finally:
        if instance.should_start_background_run_thread:
            cancellation_thread_shutdown_event.set()
            if cancellation_thread.is_alive():
                cancellation_thread.join(timeout=15)
                if cancellation_thread.is_alive():
                    instance.report_engine_event(
                        "Cancellation thread did not shutdown gracefully",
                        pipeline_run,
                    )
        instance.report_engine_event(
            "Process for pipeline exited (pid: {pid}).".format(pid=pid),
            pipeline_run,
        )

    return 1 if (run_worker_failed and set_exit_code_on_failure) else 0
Beispiel #6
0
def execute_run(
    pipeline: IPipeline,
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    raise_on_error: bool = False,
) -> PipelineExecutionResult:
    """Executes an existing pipeline run synchronously.

    Synchronous version of execute_run_iterator.

    Args:
        pipeline (IPipeline): The pipeline to execute.
        pipeline_run (PipelineRun): The run to execute
        instance (DagsterInstance): The instance in which the run has been created.
        raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur.
            Defaults to ``False``.

    Returns:
        PipelineExecutionResult: The result of the execution.
    """
    if isinstance(pipeline, PipelineDefinition):
        raise DagsterInvariantViolationError(
            "execute_run requires an IPipeline but received a PipelineDefinition "
            "directly instead. To support hand-off to other processes provide a "
            "ReconstructablePipeline which can be done using reconstructable(). For in "
            "process only execution you can use InMemoryPipeline.")

    check.inst_param(pipeline, "pipeline", IPipeline)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)

    if pipeline_run.status == PipelineRunStatus.CANCELED:
        message = "Not starting execution since the run was canceled before execution could start"
        instance.report_engine_event(
            message,
            pipeline_run,
        )
        raise DagsterInvariantViolationError(message)

    check.invariant(
        pipeline_run.status == PipelineRunStatus.NOT_STARTED
        or pipeline_run.status == PipelineRunStatus.STARTING,
        desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING"
        .format(pipeline_run.pipeline_name, pipeline_run.run_id,
                pipeline_run.status),
    )
    pipeline_def = pipeline.get_definition()
    if pipeline_run.solids_to_execute:
        if isinstance(pipeline_def, PipelineSubsetDefinition):
            check.invariant(
                pipeline_run.solids_to_execute == pipeline.solids_to_execute,
                "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that "
                "conflicts with pipeline subset {pipeline_solids_to_execute}.".
                format(
                    pipeline_solids_to_execute=str_format_set(
                        pipeline.solids_to_execute),
                    solids_to_execute=str_format_set(
                        pipeline_run.solids_to_execute),
                ),
            )
        else:
            # when `execute_run` is directly called, the sub pipeline hasn't been created
            # note that when we receive the solids to execute via PipelineRun, it won't support
            # solid selection query syntax
            pipeline = pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute)

    execution_plan = _get_execution_plan_from_run(pipeline, pipeline_run,
                                                  instance)

    if is_memoized_run(pipeline_run.tags):
        resolved_run_config = ResolvedRunConfig.build(
            pipeline.get_definition(), pipeline_run.run_config,
            pipeline_run.mode)

        execution_plan = resolve_memoized_execution_plan(
            execution_plan,
            pipeline.get_definition(),
            pipeline_run.run_config,
            instance,
            resolved_run_config,
        )

    output_capture: Optional[Dict[StepOutputHandle, Any]] = {}

    _execute_run_iterable = ExecuteRunWithPlanIterable(
        execution_plan=execution_plan,
        iterator=pipeline_execution_iterator,
        execution_context_manager=PlanOrchestrationContextManager(
            context_event_generator=orchestration_context_event_generator,
            pipeline=pipeline,
            execution_plan=execution_plan,
            pipeline_run=pipeline_run,
            instance=instance,
            run_config=pipeline_run.run_config,
            raise_on_error=raise_on_error,
            executor_defs=None,
            output_capture=output_capture,
        ),
    )
    event_list = list(_execute_run_iterable)

    return PipelineExecutionResult(
        pipeline.get_definition(),
        pipeline_run.run_id,
        event_list,
        lambda: scoped_pipeline_context(
            execution_plan,
            pipeline,
            pipeline_run.run_config,
            pipeline_run,
            instance,
        ),
        output_capture=output_capture,
    )
Beispiel #7
0
def execute_run(
    pipeline: IPipeline,
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    raise_on_error: bool = False,
) -> PipelineExecutionResult:
    """Executes an existing pipeline run synchronously.

    Synchronous version of execute_run_iterator.

    Args:
        pipeline (IPipeline): The pipeline to execute.
        pipeline_run (PipelineRun): The run to execute
        instance (DagsterInstance): The instance in which the run has been created.
        raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur.
            Defaults to ``False``.

    Returns:
        PipelineExecutionResult: The result of the execution.
    """
    if isinstance(pipeline, PipelineDefinition):
        raise DagsterInvariantViolationError(
            "execute_run requires an IPipeline but received a PipelineDefinition "
            "directly instead. To support hand-off to other processes provide a "
            "ReconstructablePipeline which can be done using reconstructable(). For in "
            "process only execution you can use InMemoryPipeline.")

    check.inst_param(pipeline, "pipeline", IPipeline)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)

    if pipeline_run.status == PipelineRunStatus.CANCELED:
        message = "Not starting execution since the run was canceled before execution could start"
        instance.report_engine_event(
            message,
            pipeline_run,
        )
        raise DagsterInvariantViolationError(message)

    check.invariant(
        pipeline_run.status == PipelineRunStatus.NOT_STARTED
        or pipeline_run.status == PipelineRunStatus.STARTING,
        desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING"
        .format(pipeline_run.pipeline_name, pipeline_run.run_id,
                pipeline_run.status),
    )
    pipeline_def = pipeline.get_definition()
    if pipeline_run.solids_to_execute:
        if isinstance(pipeline_def, PipelineSubsetDefinition):
            check.invariant(
                pipeline_run.solids_to_execute == pipeline.solids_to_execute,
                "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that "
                "conflicts with pipeline subset {pipeline_solids_to_execute}.".
                format(
                    pipeline_solids_to_execute=str_format_set(
                        pipeline.solids_to_execute),
                    solids_to_execute=str_format_set(
                        pipeline_run.solids_to_execute),
                ),
            )
        else:
            # when `execute_run` is directly called, the sub pipeline hasn't been created
            # note that when we receive the solids to execute via PipelineRun, it won't support
            # solid selection query syntax
            pipeline = pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute)

    execution_plan = create_execution_plan(
        pipeline,
        run_config=pipeline_run.run_config,
        mode=pipeline_run.mode,
        step_keys_to_execute=pipeline_run.step_keys_to_execute,
    )

    if is_memoized_run(pipeline_run.tags):
        execution_plan = resolve_memoized_execution_plan(execution_plan)

    _execute_run_iterable = _ExecuteRunWithPlanIterable(
        execution_plan=execution_plan,
        iterator=_pipeline_execution_iterator,
        execution_context_manager=PipelineExecutionContextManager(
            execution_plan=execution_plan,
            pipeline_run=pipeline_run,
            instance=instance,
            run_config=pipeline_run.run_config,
            raise_on_error=raise_on_error,
        ),
    )
    event_list = list(_execute_run_iterable)
    pipeline_context = _execute_run_iterable.pipeline_context

    # workaround for mem_io_manager to work in reconstruct_context, e.g. result.result_for_solid
    # in-memory values dict will get lost when the resource is re-initiated in reconstruct_context
    # so instead of re-initiating every single resource, we pass the resource instances to
    # reconstruct_context directly to avoid re-building from resource def.
    resource_instances_to_override = {}
    if pipeline_context:  # None if we have a pipeline failure
        for (
                key,
                resource_instance,
        ) in pipeline_context.scoped_resources_builder.resource_instance_dict.items(
        ):
            if isinstance(resource_instance, InMemoryIOManager):
                resource_instances_to_override[key] = resource_instance

    return PipelineExecutionResult(
        pipeline.get_definition(),
        pipeline_run.run_id,
        event_list,
        lambda hardcoded_resources_arg: scoped_pipeline_context(
            execution_plan,
            pipeline_run.run_config,
            pipeline_run,
            instance,
            intermediate_storage=pipeline_context.intermediate_storage,
            resource_instances_to_override=hardcoded_resources_arg,
        ),
        resource_instances_to_override=resource_instances_to_override,
    )