def execute(pipeline_context, execution_plan, step_keys_to_execute=None):
        check.inst_param(pipeline_context, 'pipeline_context',
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
        check.opt_list_param(step_keys_to_execute,
                             'step_keys_to_execute',
                             of_type=str)

        step_key_set = None if step_keys_to_execute is None else set(
            step_keys_to_execute)

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Executing steps in process (pid: {pid})'.format(pid=os.getpid()),
            event_specific_data=EngineEventData.in_process(
                os.getpid(), step_key_set),
        )

        with time_execution_scope() as timer_result:
            check.param_invariant(
                isinstance(pipeline_context.executor_config, ExecutorConfig),
                'pipeline_context',
                'Expected executor_config to be ExecutorConfig got {}'.format(
                    pipeline_context.executor_config),
            )

            failed_or_skipped_steps = set()

            step_levels = execution_plan.topological_step_levels()

            # It would be good to implement a reference tracking algorithm here to
            # garbage collect results that are no longer needed by any steps
            # https://github.com/dagster-io/dagster/issues/811
            for step_level in step_levels:
                for step in step_level:
                    if step_key_set and step.key not in step_key_set:
                        continue

                    step_context = pipeline_context.for_step(step)

                    failed_inputs = []
                    for step_input in step.step_inputs:
                        failed_inputs.extend(
                            failed_or_skipped_steps.intersection(
                                step_input.dependency_keys))

                    if failed_inputs:
                        step_context.log.info((
                            'Dependencies for step {step} failed: {failed_inputs}. Not executing.'
                        ).format(step=step.key, failed_inputs=failed_inputs))
                        failed_or_skipped_steps.add(step.key)
                        yield DagsterEvent.step_skipped_event(step_context)
                        continue

                    uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs(
                        step_context, step)
                    if uncovered_inputs:
                        # In partial pipeline execution, we may end up here without having validated the
                        # missing dependent outputs were optional
                        _assert_missing_inputs_optional(
                            uncovered_inputs, execution_plan, step.key)

                        step_context.log.info((
                            'Not all inputs covered for {step}. Not executing. Output missing for '
                            'inputs: {uncovered_inputs}').format(
                                uncovered_inputs=uncovered_inputs,
                                step=step.key))
                        failed_or_skipped_steps.add(step.key)
                        yield DagsterEvent.step_skipped_event(step_context)
                        continue

                    for step_event in check.generator(
                            dagster_event_sequence_for_step(step_context)):
                        check.inst(step_event, DagsterEvent)
                        if step_event.is_step_failure:
                            failed_or_skipped_steps.add(step.key)

                        yield step_event

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Finished steps in process (pid: {pid}) in {duration_ms}'.format(
                pid=os.getpid(),
                duration_ms=format_duration(timer_result.millis)),
            event_specific_data=EngineEventData.in_process(
                os.getpid(), step_key_set),
        )
Exemple #2
0
def inner_plan_execution_iterator(pipeline_context, execution_plan, retries):
    check.inst_param(pipeline_context, 'pipeline_context',
                     SystemPipelineExecutionContext)
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    check.inst_param(retries, 'retries', Retries)
    for event in copy_required_intermediates_for_execution(
            pipeline_context, execution_plan):
        yield event

    # It would be good to implement a reference tracking algorithm here to
    # garbage collect results that are no longer needed by any steps
    # https://github.com/dagster-io/dagster/issues/811
    active_execution = execution_plan.start(retries=retries)
    while not active_execution.is_complete:
        step = active_execution.get_next_step()
        step_context = pipeline_context.for_step(step)

        missing_resources = [
            resource_key
            for resource_key in step_context.required_resource_keys
            if not hasattr(step_context.resources, resource_key)
        ]
        check.invariant(
            len(missing_resources) == 0,
            ('Expected step context for solid {solid_name} to have all required resources, but '
             'missing {missing_resources}.').format(
                 solid_name=step_context.solid.name,
                 missing_resources=missing_resources),
        )

        with pipeline_context.instance.compute_log_manager.watch(
                step_context.pipeline_run, step_context.step.key):
            # capture all of the logs for this step
            uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs(
                step_context, step)
            if uncovered_inputs:
                # In partial pipeline execution, we may end up here without having validated the
                # missing dependent outputs were optional
                _assert_missing_inputs_optional(uncovered_inputs,
                                                execution_plan, step.key)

                step_context.log.info((
                    'Not all inputs covered for {step}. Not executing. Output missing for '
                    'inputs: {uncovered_inputs}').format(
                        uncovered_inputs=uncovered_inputs, step=step.key))
                yield DagsterEvent.step_skipped_event(step_context)
                active_execution.mark_skipped(step.key)
            else:
                for step_event in check.generator(
                        _dagster_event_sequence_for_step(
                            step_context, retries)):
                    check.inst(step_event, DagsterEvent)
                    yield step_event
                    active_execution.handle_event(step_event)

            active_execution.verify_complete(pipeline_context, step.key)

        # process skips from failures or uncovered inputs
        for event in active_execution.skipped_step_events_iterator(
                pipeline_context):
            yield event
Exemple #3
0
    def execute(pipeline_context, execution_plan):
        check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Executing steps in process (pid: {pid})'.format(pid=os.getpid()),
            event_specific_data=EngineEventData.in_process(
                os.getpid(), execution_plan.step_keys_to_execute
            ),
        )

        with time_execution_scope() as timer_result:
            check.param_invariant(
                isinstance(pipeline_context.executor_config, ExecutorConfig),
                'pipeline_context',
                'Expected executor_config to be ExecutorConfig got {}'.format(
                    pipeline_context.executor_config
                ),
            )

            for event in copy_required_intermediates_for_execution(
                pipeline_context, execution_plan
            ):
                yield event

            # It would be good to implement a reference tracking algorithm here to
            # garbage collect results that are no longer needed by any steps
            # https://github.com/dagster-io/dagster/issues/811
            active_execution = execution_plan.start()
            while not active_execution.is_complete:

                steps = active_execution.get_steps_to_execute(limit=1)
                check.invariant(
                    len(steps) == 1, 'Invariant Violation: expected step to be available to execute'
                )
                step = steps[0]
                step_context = pipeline_context.for_step(step)
                check.invariant(
                    all(
                        hasattr(step_context.resources, resource_key)
                        for resource_key in step_context.required_resource_keys
                    ),
                    'expected step context to have all required resources',
                )

                with mirror_step_io(step_context):
                    # capture all of the logs for this step
                    uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs(
                        step_context, step
                    )
                    if uncovered_inputs:
                        # In partial pipeline execution, we may end up here without having validated the
                        # missing dependent outputs were optional
                        _assert_missing_inputs_optional(uncovered_inputs, execution_plan, step.key)

                        step_context.log.info(
                            (
                                'Not all inputs covered for {step}. Not executing. Output missing for '
                                'inputs: {uncovered_inputs}'
                            ).format(uncovered_inputs=uncovered_inputs, step=step.key)
                        )
                        yield DagsterEvent.step_skipped_event(step_context)
                        active_execution.mark_skipped(step.key)
                        continue

                    step_success = None
                    for step_event in check.generator(
                        dagster_event_sequence_for_step(step_context)
                    ):
                        check.inst(step_event, DagsterEvent)
                        yield step_event

                        if step_event.is_step_failure:
                            step_success = False
                        elif step_event.is_step_success:
                            step_success = True

                    if step_success == True:
                        active_execution.mark_success(step.key)
                    elif step_success == False:
                        active_execution.mark_failed(step.key)
                    else:
                        pipeline_context.log.error(
                            'Step {key} finished without success or failure event, assuming failure.'.format(
                                key=step.key
                            )
                        )
                        active_execution.mark_failed(step.key)

                # process skips from failures or uncovered inputs
                for event in active_execution.skipped_step_events_iterator(pipeline_context):
                    yield event

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Finished steps in process (pid: {pid}) in {duration_ms}'.format(
                pid=os.getpid(), duration_ms=format_duration(timer_result.millis)
            ),
            event_specific_data=EngineEventData.in_process(
                os.getpid(), execution_plan.step_keys_to_execute
            ),
        )
    def execute(pipeline_context, execution_plan, step_keys_to_execute=None):
        check.inst_param(pipeline_context, 'pipeline_context',
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
        check.opt_list_param(step_keys_to_execute,
                             'step_keys_to_execute',
                             of_type=str)

        step_key_set = None if step_keys_to_execute is None else set(
            step_keys_to_execute)

        check.param_invariant(
            isinstance(pipeline_context.executor_config, ExecutorConfig),
            'pipeline_context',
            'Expected executor_config to be ExecutorConfig got {}'.format(
                pipeline_context.executor_config),
        )

        failed_or_skipped_steps = set()

        step_levels = execution_plan.topological_step_levels()

        intermediates_manager = pipeline_context.intermediates_manager

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collection results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        for step_level in step_levels:
            for step in step_level:
                if step_key_set and step.key not in step_key_set:
                    continue

                step_context = pipeline_context.for_step(step)

                failed_inputs = [
                    step_input.prev_output_handle.step_key
                    for step_input in step.step_inputs
                    if step_input.prev_output_handle.step_key in
                    failed_or_skipped_steps
                ]
                if failed_inputs:
                    step_context.log.info((
                        'Dependencies for step {step} failed: {failed_inputs}. Not executing.'
                    ).format(step=step.key, failed_inputs=failed_inputs))
                    failed_or_skipped_steps.add(step.key)
                    yield DagsterEvent.step_skipped_event(step_context)
                    continue

                uncovered_inputs = intermediates_manager.uncovered_inputs(
                    step_context, step)
                if uncovered_inputs:
                    # In partial pipeline execution, we may end up here without having validated the
                    # missing dependent outputs were optional
                    _assert_missing_inputs_optional(uncovered_inputs,
                                                    execution_plan, step.key)

                    step_context.log.info((
                        'Not all inputs covered for {step}. Not executing. Output missing for '
                        'inputs: {uncovered_inputs}').format(
                            uncovered_inputs=uncovered_inputs, step=step.key))
                    failed_or_skipped_steps.add(step.key)
                    yield DagsterEvent.step_skipped_event(step_context)
                    continue

                input_values = _create_input_values(step_context,
                                                    intermediates_manager)

                for step_event in check.generator(
                        execute_step_in_memory(step_context, input_values,
                                               intermediates_manager)):
                    check.inst(step_event, DagsterEvent)
                    if step_event.is_step_failure:
                        failed_or_skipped_steps.add(step.key)

                    yield step_event