Ejemplo n.º 1
0
def _core_dagster_event_sequence_for_step(step_context):
    '''
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    '''
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)

    yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    for input_name, input_value in _input_values_from_intermediates_manager(
            step_context).items():
        if isinstance(input_value, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(input_value,
                                                  value_name=input_name))
            inputs[input_name] = input_value.obj
        else:
            inputs[input_name] = input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, Output):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event, Materialization):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    'Unexpected event {event}, should have been caught earlier'
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
Ejemplo n.º 2
0
def _set_intermediates(step_context, step_output, step_output_handle, output):
    res = step_context.intermediates_manager.set_intermediate(
        context=step_context,
        runtime_type=step_output.runtime_type,
        step_output_handle=step_output_handle,
        value=output.value,
    )
    if isinstance(res, ObjectStoreOperation):
        yield DagsterEvent.object_store_operation(
            step_context, ObjectStoreOperation.serializable(res, value_name=output.output_name)
        )
Ejemplo n.º 3
0
def copy_required_intermediates_for_execution(pipeline_context,
                                              execution_plan):
    '''
    Uses the intermediates manager to copy intermediates from the previous run that apply to the
    current execution plan, and yields the corresponding events
    '''
    check.inst_param(pipeline_context, 'pipeline_context',
                     SystemPipelineExecutionContext)
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    parent_run_id = pipeline_context.pipeline_run.parent_run_id

    if not parent_run_id:
        return

    parent_run_logs = pipeline_context.instance.all_logs(parent_run_id)

    output_handles_for_current_run = output_handles_from_execution_plan(
        execution_plan)
    output_handles_from_previous_run = output_handles_from_event_logs(
        parent_run_logs)
    output_handles_to_copy = output_handles_for_current_run.intersection(
        output_handles_from_previous_run)
    output_handles_to_copy_by_step = defaultdict(list)
    for handle in output_handles_to_copy:
        output_handles_to_copy_by_step[handle.step_key].append(handle)

    intermediates_manager = pipeline_context.intermediates_manager
    for step in execution_plan.topological_steps():
        step_context = pipeline_context.for_step(step)
        for handle in output_handles_to_copy_by_step.get(step.key, []):
            if intermediates_manager.has_intermediate(pipeline_context,
                                                      handle):
                continue

            operation = intermediates_manager.copy_intermediate_from_run(
                pipeline_context, parent_run_id, handle)
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(
                    operation, value_name=handle.output_name),
            )
Ejemplo n.º 4
0
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count):
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    for input_name, input_value in _input_values_from_intermediate_storage(
            step_context):
        if isinstance(input_value, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(input_value,
                                                  value_name=input_name))
            inputs[input_name] = input_value.obj
        elif isinstance(input_value, MultipleStepOutputsListWrapper):
            for op in input_value:
                yield DagsterEvent.object_store_operation(
                    step_context,
                    ObjectStoreOperation.serializable(op,
                                                      value_name=input_name))
            inputs[input_name] = [op.obj for op in input_value]
        else:
            inputs[input_name] = input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, Output):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    # We only want to log exactly one step success event or failure event if possible,
    # so wait to handle any interrupts (that normally log a failure event) until the success
    # event has finished
    with delay_interrupts():
        yield DagsterEvent.step_success_event(
            step_context, StepSuccessData(duration_ms=timer_result.millis))