예제 #1
0
def _user_event_sequence_for_step_compute_fn(step_context, evaluated_inputs):
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.dict_param(evaluated_inputs, 'evaluated_inputs', key_type=str)

    with user_code_error_boundary(
            DagsterExecutionStepExecutionError,
            msg_fn=lambda: '''Error occured during the execution of step:
        step key: "{key}"
        solid invocation: "{solid}"
        solid definition: "{solid_def}"
        '''.format(
                key=step_context.step.key,
                solid_def=step_context.solid_def.name,
                solid=step_context.solid.name,
            ),
            step_key=step_context.step.key,
            solid_def_name=step_context.solid_def.name,
            solid_name=step_context.solid.name,
    ):

        with mirror_step_io(step_context):
            gen = check.opt_generator(
                step_context.step.compute_fn(step_context, evaluated_inputs))

            if gen is not None:
                for event in gen:
                    yield event
예제 #2
0
    def execute(pipeline_context, execution_plan):
        check.inst_param(pipeline_context, 'pipeline_context',
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Executing steps in process (pid: {pid})'.format(pid=os.getpid()),
            event_specific_data=EngineEventData.in_process(
                os.getpid(), execution_plan.step_keys_to_execute),
        )

        with time_execution_scope() as timer_result:
            check.param_invariant(
                isinstance(pipeline_context.executor_config, ExecutorConfig),
                'pipeline_context',
                'Expected executor_config to be ExecutorConfig got {}'.format(
                    pipeline_context.executor_config),
            )

            for event in copy_required_intermediates_for_execution(
                    pipeline_context, execution_plan):
                yield event

            # It would be good to implement a reference tracking algorithm here to
            # garbage collect results that are no longer needed by any steps
            # https://github.com/dagster-io/dagster/issues/811
            active_execution = execution_plan.start()
            while not active_execution.is_complete:

                steps = active_execution.get_steps_to_execute(limit=1)
                check.invariant(
                    len(steps) == 1,
                    'Invariant Violation: expected step to be available to execute'
                )
                step = steps[0]
                step_context = pipeline_context.for_step(step)
                check.invariant(
                    all(
                        hasattr(step_context.resources, resource_key) for
                        resource_key in step_context.required_resource_keys),
                    'expected step context to have all required resources',
                )

                with mirror_step_io(step_context):
                    # capture all of the logs for this step
                    uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs(
                        step_context, step)
                    if uncovered_inputs:
                        # In partial pipeline execution, we may end up here without having validated the
                        # missing dependent outputs were optional
                        _assert_missing_inputs_optional(
                            uncovered_inputs, execution_plan, step.key)

                        step_context.log.info((
                            'Not all inputs covered for {step}. Not executing. Output missing for '
                            'inputs: {uncovered_inputs}').format(
                                uncovered_inputs=uncovered_inputs,
                                step=step.key))
                        yield DagsterEvent.step_skipped_event(step_context)
                        active_execution.mark_skipped(step.key)
                        continue

                    for step_event in check.generator(
                            dagster_event_sequence_for_step(step_context)):
                        check.inst(step_event, DagsterEvent)
                        yield step_event
                        active_execution.handle_event(step_event)

                    active_execution.verify_complete(pipeline_context,
                                                     step.key)

                # process skips from failures or uncovered inputs
                for event in active_execution.skipped_step_events_iterator(
                        pipeline_context):
                    yield event

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Finished steps in process (pid: {pid}) in {duration_ms}'.format(
                pid=os.getpid(),
                duration_ms=format_duration(timer_result.millis)),
            event_specific_data=EngineEventData.in_process(
                os.getpid(), execution_plan.step_keys_to_execute),
        )
예제 #3
0
    def execute(pipeline_context, execution_plan):
        check.inst_param(pipeline_context, 'pipeline_context',
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        step_levels = execution_plan.execution_step_levels()
        step_key_set = set(step.key for step_level in step_levels
                           for step in step_level)

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Executing steps in process (pid: {pid})'.format(pid=os.getpid()),
            event_specific_data=EngineEventData.in_process(
                os.getpid(), step_key_set),
        )

        with time_execution_scope() as timer_result:
            check.param_invariant(
                isinstance(pipeline_context.executor_config, ExecutorConfig),
                'pipeline_context',
                'Expected executor_config to be ExecutorConfig got {}'.format(
                    pipeline_context.executor_config),
            )

            for event in copy_required_intermediates_for_execution(
                    pipeline_context, execution_plan):
                yield event

            failed_or_skipped_steps = set()

            # It would be good to implement a reference tracking algorithm here to
            # garbage collect results that are no longer needed by any steps
            # https://github.com/dagster-io/dagster/issues/811
            for step_level in step_levels:
                for step in step_level:
                    step_context = pipeline_context.for_step(step)

                    with mirror_step_io(step_context):
                        # capture all of the logs for this step

                        failed_inputs = []
                        for step_input in step.step_inputs:
                            failed_inputs.extend(
                                failed_or_skipped_steps.intersection(
                                    step_input.dependency_keys))

                        if failed_inputs:
                            step_context.log.info((
                                'Dependencies for step {step} failed: {failed_inputs}. Not executing.'
                            ).format(step=step.key,
                                     failed_inputs=failed_inputs))
                            failed_or_skipped_steps.add(step.key)
                            yield DagsterEvent.step_skipped_event(step_context)
                            continue

                        uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs(
                            step_context, step)
                        if uncovered_inputs:
                            # In partial pipeline execution, we may end up here without having validated the
                            # missing dependent outputs were optional
                            _assert_missing_inputs_optional(
                                uncovered_inputs, execution_plan, step.key)

                            step_context.log.info((
                                'Not all inputs covered for {step}. Not executing. Output missing for '
                                'inputs: {uncovered_inputs}').format(
                                    uncovered_inputs=uncovered_inputs,
                                    step=step.key))
                            failed_or_skipped_steps.add(step.key)
                            yield DagsterEvent.step_skipped_event(step_context)
                            continue

                        for step_event in check.generator(
                                dagster_event_sequence_for_step(step_context)):
                            check.inst(step_event, DagsterEvent)
                            if step_event.is_step_failure:
                                failed_or_skipped_steps.add(step.key)

                            yield step_event

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Finished steps in process (pid: {pid}) in {duration_ms}'.format(
                pid=os.getpid(),
                duration_ms=format_duration(timer_result.millis)),
            event_specific_data=EngineEventData.in_process(
                os.getpid(), step_key_set),
        )