Exemple #1
0
def create_test_pipeline_execution_context(logger_defs=None):
    from dagster.core.storage.intermediate_storage import build_in_mem_intermediates_storage

    loggers = check.opt_dict_param(
        logger_defs, "logger_defs", key_type=str, value_type=LoggerDefinition
    )
    mode_def = ModeDefinition(logger_defs=loggers)
    pipeline_def = PipelineDefinition(
        name="test_legacy_context", solid_defs=[], mode_defs=[mode_def]
    )
    run_config = {"loggers": {key: {} for key in loggers}}
    pipeline_run = PipelineRun(pipeline_name="test_legacy_context", run_config=run_config)
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline=pipeline_def, run_config=run_config)
    creation_data = create_context_creation_data(
        InMemoryPipeline(pipeline_def), execution_plan, run_config, pipeline_run, instance
    )
    log_manager = create_log_manager(creation_data)
    scoped_resources_builder = ScopedResourcesBuilder()
    executor = create_executor(creation_data)

    return PlanExecutionContext(
        plan_data=create_plan_data(creation_data, True, executor.retries),
        execution_data=create_execution_data(
            context_creation_data=creation_data,
            scoped_resources_builder=scoped_resources_builder,
            intermediate_storage=build_in_mem_intermediates_storage(pipeline_run.run_id),
        ),
        log_manager=log_manager,
        output_capture=None,
    )
Exemple #2
0
def inner_plan_execution_iterator(
        pipeline_context: PlanExecutionContext,
        execution_plan: ExecutionPlan) -> Iterator[DagsterEvent]:
    check.inst_param(pipeline_context, "pipeline_context",
                     PlanExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

    with execution_plan.start(
            retry_mode=pipeline_context.retry_mode) as active_execution:

        # It would be good to implement a reference tracking algorithm here to
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        while not active_execution.is_complete:
            step = active_execution.get_next_step()
            step_context = cast(
                StepExecutionContext,
                pipeline_context.for_step(
                    step,
                    active_execution.retry_state.get_attempt_count(step.key)),
            )
            step_event_list = []

            missing_resources = [
                resource_key
                for resource_key in step_context.required_resource_keys
                if not hasattr(step_context.resources, resource_key)
            ]
            check.invariant(
                len(missing_resources) == 0,
                ("Expected step context for solid {solid_name} to have all required resources, but "
                 "missing {missing_resources}.").format(
                     solid_name=step_context.solid.name,
                     missing_resources=missing_resources),
            )

            # capture all of the logs for this step
            with pipeline_context.instance.compute_log_manager.watch(
                    step_context.pipeline_run, step_context.step.key):

                for step_event in check.generator(
                        _dagster_event_sequence_for_step(step_context)):
                    check.inst(step_event, DagsterEvent)
                    step_event_list.append(step_event)
                    yield step_event
                    active_execution.handle_event(step_event)

                active_execution.verify_complete(pipeline_context, step.key)

            # process skips from failures or uncovered inputs
            for event in active_execution.plan_events_iterator(
                    pipeline_context):
                step_event_list.append(event)
                yield event

            # pass a list of step events to hooks
            for hook_event in _trigger_hook(step_context, step_event_list):
                yield hook_event
Exemple #3
0
def inner_plan_execution_iterator(
    pipeline_context: PlanExecutionContext, execution_plan: ExecutionPlan
) -> Iterator[DagsterEvent]:
    check.inst_param(pipeline_context, "pipeline_context", PlanExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

    with execution_plan.start(retry_mode=pipeline_context.retry_mode) as active_execution:

        # It would be good to implement a reference tracking algorithm here to
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        while not active_execution.is_complete:
            step = active_execution.get_next_step()
            step_context = cast(
                StepExecutionContext,
                pipeline_context.for_step(
                    step, active_execution.retry_state.get_attempt_count(step.key)
                ),
            )
            step_event_list = []

            missing_resources = [
                resource_key
                for resource_key in step_context.required_resource_keys
                if not hasattr(step_context.resources, resource_key)
            ]
            check.invariant(
                len(missing_resources) == 0,
                (
                    "Expected step context for solid {solid_name} to have all required resources, but "
                    "missing {missing_resources}."
                ).format(solid_name=step_context.solid.name, missing_resources=missing_resources),
            )

            # capture all of the logs for this step
            with ExitStack() as stack:
                log_capture_error = None
                try:
                    stack.enter_context(
                        pipeline_context.instance.compute_log_manager.watch(
                            step_context.pipeline_run, step_context.step.key
                        )
                    )
                except Exception as e:
                    yield DagsterEvent.engine_event(
                        pipeline_context=pipeline_context,
                        message="Exception while setting up compute log capture",
                        event_specific_data=EngineEventData(
                            error=serializable_error_info_from_exc_info(sys.exc_info())
                        ),
                        step_handle=step_context.step.handle,
                    )
                    log_capture_error = e

                if not log_capture_error:
                    yield DagsterEvent.capture_logs(
                        step_context, log_key=step_context.step.key, steps=[step_context.step]
                    )

                for step_event in check.generator(dagster_event_sequence_for_step(step_context)):
                    check.inst(step_event, DagsterEvent)
                    step_event_list.append(step_event)
                    yield step_event
                    active_execution.handle_event(step_event)

                active_execution.verify_complete(pipeline_context, step.key)

                try:
                    stack.close()
                except Exception:
                    yield DagsterEvent.engine_event(
                        pipeline_context=pipeline_context,
                        message="Exception while cleaning up compute log capture",
                        event_specific_data=EngineEventData(
                            error=serializable_error_info_from_exc_info(sys.exc_info())
                        ),
                        step_handle=step_context.step.handle,
                    )

            # process skips from failures or uncovered inputs
            for event in active_execution.plan_events_iterator(pipeline_context):
                step_event_list.append(event)
                yield event

            # pass a list of step events to hooks
            for hook_event in _trigger_hook(step_context, step_event_list):
                yield hook_event