예제 #1
0
def _set_intermediates(step_context, step_output, step_output_handle, output,
                       version):
    if step_output.asset_store_handle:
        # use asset_store if it's configured on provided by the user
        res = _set_addressable_asset(step_context, step_output_handle,
                                     step_output.asset_store_handle,
                                     output.value)

        if isinstance(res, AssetStoreOperation):
            yield DagsterEvent.asset_store_operation(step_context, res)
    else:
        res = step_context.intermediate_storage.set_intermediate(
            context=step_context,
            dagster_type=step_output.dagster_type,
            step_output_handle=step_output_handle,
            value=output.value,
            version=version,
        )

        if isinstance(res, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(
                    res, value_name=output.output_name),
            )
예제 #2
0
def _set_intermediates(step_context, step_output, step_output_handle, output,
                       version):
    if step_context.using_asset_store(step_output_handle):
        res = _set_addressable_asset(step_context, step_output_handle,
                                     output.value)
        for evt in res:
            if isinstance(evt, AssetStoreOperation):
                yield DagsterEvent.asset_store_operation(step_context, evt)
            if isinstance(evt, AssetMaterialization):
                yield DagsterEvent.step_materialization(step_context, evt)
    else:
        res = step_context.intermediate_storage.set_intermediate(
            context=step_context,
            dagster_type=step_output.dagster_type,
            step_output_handle=step_output_handle,
            value=output.value,
            version=version,
        )

        if isinstance(res, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(
                    res, value_name=output.output_name),
            )
예제 #3
0
def _set_objects(step_context, step_output, step_output_handle, output):
    from dagster.core.storage.asset_store import AssetStoreHandle

    output_def = step_output.output_def
    output_manager = step_context.get_output_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)
    materializations = output_manager.handle_output(output_context,
                                                    output.value)

    # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043
    if isinstance(materializations, ObjectStoreOperation):
        yield DagsterEvent.object_store_operation(
            step_context,
            ObjectStoreOperation.serializable(
                materializations, value_name=step_output_handle.output_name),
        )
    else:
        for evt in _materializations_to_events(step_context,
                                               step_output_handle,
                                               materializations):
            yield evt

        # SET_ASSET operation by AssetStore
        yield DagsterEvent.asset_store_operation(
            step_context,
            AssetStoreOperation(
                AssetStoreOperationType.SET_ASSET,
                step_output_handle,
                AssetStoreHandle(output_def.manager_key, output_def.metadata),
            ),
        )
예제 #4
0
def _set_intermediates(step_context, step_output, step_output_handle, output):
    res = step_context.intermediate_storage.set_intermediate(
        context=step_context,
        dagster_type=step_output.dagster_type,
        step_output_handle=step_output_handle,
        value=output.value,
    )
    if isinstance(res, ObjectStoreOperation):
        yield DagsterEvent.object_store_operation(
            step_context,
            ObjectStoreOperation.serializable(res,
                                              value_name=output.output_name))
예제 #5
0
def copy_required_intermediates_for_execution(pipeline_context,
                                              execution_plan):
    """
    Uses the intermediates manager to copy intermediates from the previous run that apply to the
    current execution plan, and yields the corresponding events
    """
    check.inst_param(pipeline_context, "pipeline_context",
                     SystemExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)
    parent_run_id = pipeline_context.pipeline_run.parent_run_id

    if not parent_run_id:
        return

    parent_run_logs = pipeline_context.instance.all_logs(parent_run_id)

    output_handles_for_current_run = output_handles_from_execution_plan(
        execution_plan)
    output_handles_from_previous_run = output_handles_from_event_logs(
        parent_run_logs)
    output_handles_to_copy = output_handles_for_current_run.intersection(
        output_handles_from_previous_run)
    output_handles_to_copy_by_step = defaultdict(list)
    for handle in output_handles_to_copy:
        output_handles_to_copy_by_step[handle.step_key].append(handle)

    intermediate_storage = pipeline_context.intermediate_storage
    for step in execution_plan.get_all_steps_in_topo_order():
        handles_to_copy = output_handles_to_copy_by_step.get(step.key, [])

        # exit early to avoid trying to make a context from an UnresolvedExecutionStep
        if not handles_to_copy:
            continue

        step_context = pipeline_context.for_step(step)
        for handle in handles_to_copy:
            if intermediate_storage.has_intermediate(pipeline_context, handle):
                continue

            operation = intermediate_storage.copy_intermediate_from_run(
                pipeline_context, parent_run_id, handle)
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(
                    operation, value_name=handle.output_name),
            )
예제 #6
0
def _set_objects(step_context, step_output, step_output_handle, output):
    from dagster.core.storage.asset_store import AssetStoreHandle

    output_def = step_output.output_def
    output_manager = step_context.get_output_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)
    with user_code_error_boundary(
            DagsterExecutionHandleOutputError,
            control_flow_exceptions=[Failure, RetryRequested],
            msg_fn=lambda:
        (f"Error occurred during the the handling of step output:"
         f'    step key: "{step_context.step.key}"'
         f'    output name: "{output_context.name}"'),
            step_key=step_context.step.key,
            output_name=output_context.name,
    ):
        materializations = output_manager.handle_output(
            output_context, output.value)

    # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043
    if isinstance(materializations, ObjectStoreOperation):
        yield DagsterEvent.object_store_operation(
            step_context,
            ObjectStoreOperation.serializable(
                materializations, value_name=step_output_handle.output_name),
        )
    else:
        for evt in _materializations_to_events(step_context,
                                               step_output_handle,
                                               materializations):
            yield evt

        # SET_ASSET operation by AssetStore
        yield DagsterEvent.asset_store_operation(
            step_context,
            AssetStoreOperation.serializable(
                AssetStoreOperation(
                    AssetStoreOperationType.SET_ASSET,
                    step_output_handle,
                    AssetStoreHandle(output_def.manager_key,
                                     output_def.metadata),
                )),
        )
예제 #7
0
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count):
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    for input_name, input_value in _input_values_from_intermediate_storage(
            step_context):
        if isinstance(input_value, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(input_value,
                                                  value_name=input_name))
            inputs[input_name] = input_value.obj
        elif isinstance(input_value, MultipleStepOutputsListWrapper):
            for op in input_value:
                if isinstance(input_value, ObjectStoreOperation):
                    yield DagsterEvent.object_store_operation(
                        step_context,
                        ObjectStoreOperation.serializable(
                            op, value_name=input_name))
                elif isinstance(input_value, AssetStoreOperation):
                    yield DagsterEvent.asset_store_operation(
                        step_context, input_value)
            inputs[input_name] = [op.obj for op in input_value]
        elif isinstance(input_value, AssetStoreOperation):
            yield DagsterEvent.asset_store_operation(step_context, input_value)
            inputs[input_name] = input_value.obj
        else:
            inputs[input_name] = input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, Output):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
예제 #8
0
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count):
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    for input_name, input_value in _load_input_values(step_context):
        # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043
        if isinstance(input_value, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(input_value,
                                                  value_name=input_name))
            inputs[input_name] = input_value.obj
        elif isinstance(input_value, FanInStepInputValuesWrapper):
            final_values = []
            for inner_value in input_value:
                # inner value is either a store interaction
                # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043
                if isinstance(inner_value, ObjectStoreOperation):
                    yield DagsterEvent.object_store_operation(
                        step_context,
                        ObjectStoreOperation.serializable(
                            inner_value, value_name=input_name),
                    )
                    final_values.append(inner_value.obj)
                elif isinstance(inner_value, AssetStoreOperation):
                    yield DagsterEvent.asset_store_operation(
                        step_context,
                        AssetStoreOperation.serializable(inner_value))
                    final_values.append(inner_value.obj)
                # or the value directly
                else:
                    final_values.append(inner_value)

            inputs[input_name] = final_values
        elif isinstance(input_value, AssetStoreOperation):
            yield DagsterEvent.asset_store_operation(
                step_context, AssetStoreOperation.serializable(input_value))
            inputs[input_name] = input_value.obj
        else:
            inputs[input_name] = input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, (Output, DynamicOutput)):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))