Beispiel #1
0
def resolve_memoized_execution_plan(execution_plan):
    """
        Returns:
            ExecutionPlan: Execution plan configured to only run unmemoized steps.
    """

    pipeline_def = execution_plan.pipeline.get_definition()

    environment_config = execution_plan.environment_config
    pipeline_def = execution_plan.pipeline.get_definition()
    mode_def = pipeline_def.get_mode_definition(environment_config.mode)

    step_keys_to_execute = set()

    for step in execution_plan.steps:
        for output_name in step.step_output_dict.keys():
            step_output_handle = StepOutputHandle(step.key, output_name)

            io_manager_key = execution_plan.get_manager_key(step_output_handle)
            # TODO: https://github.com/dagster-io/dagster/issues/3302
            # The following code block is HIGHLY experimental. It initializes an IO manager
            # outside of the resource initialization context, and will ignore any exit hooks defined
            # for the IO manager, and will not work if the IO manager requires resource keys
            # for initialization.
            resource_config = (
                environment_config.resources[io_manager_key]["config"]
                if "config" in environment_config.resources[io_manager_key]
                else {}
            )
            resource_def = mode_def.resource_defs[io_manager_key]
            resource_context = InitResourceContext(
                resource_config,
                resource_def,
                pipeline_run=PipelineRun(
                    pipeline_name=pipeline_def.name, run_id="", mode=environment_config.mode
                ),
            )
            io_manager = resource_def.resource_fn(resource_context)
            context = get_output_context(
                execution_plan, environment_config, step_output_handle, None
            )
            if not io_manager.has_output(context):
                step_keys_to_execute.add(step_output_handle.step_key)

    return execution_plan.build_subset_plan(list(step_keys_to_execute))
Beispiel #2
0
def resolve_memoized_execution_plan(
    execution_plan, pipeline_def, run_config, instance, environment_config
):
    """
    Returns:
        ExecutionPlan: Execution plan configured to only run unmemoized steps.
    """
    from .build_resources import build_resources, initialize_console_manager

    mode = environment_config.mode
    mode_def = pipeline_def.get_mode_definition(mode)

    step_keys_to_execute = set()

    log_manager = initialize_console_manager(None)

    for step in execution_plan.steps:
        for output_name in step.step_output_dict.keys():
            step_output_handle = StepOutputHandle(step.key, output_name)

            io_manager_key = execution_plan.get_manager_key(step_output_handle, pipeline_def)

            # We can do better here by only initializing the io manager and the resources it
            # depends on.
            with build_resources(
                resources=mode_def.resource_defs,
                instance=instance,
                run_config=run_config.get("resources", {}),
                log_manager=log_manager,
            ) as resources:

                io_manager = getattr(resources, io_manager_key)
                context = get_output_context(
                    execution_plan,
                    pipeline_def,
                    environment_config,
                    step_output_handle,
                    log_manager=log_manager,
                )
                if not io_manager.has_output(context):
                    step_keys_to_execute.add(step_output_handle.step_key)

    return execution_plan.build_subset_plan(
        list(step_keys_to_execute), pipeline_def, environment_config
    )
Beispiel #3
0
def resolve_memoized_execution_plan(execution_plan):
    """
        Returns:
            ExecutionPlan: Execution plan configured to only run unmemoized steps.
        """
    # pylint: disable=comparison-with-callable

    pipeline_def = execution_plan.pipeline.get_definition()

    step_output_versions = execution_plan.resolve_step_output_versions()
    if all(version is None for version in step_output_versions.values()):
        raise DagsterInvariantViolationError(
            "While creating a memoized pipeline run, no steps have versions. At least one step "
            "must have a version.")

    environment_config = execution_plan.environment_config
    pipeline_def = execution_plan.pipeline.get_definition()
    mode_def = pipeline_def.get_mode_definition(environment_config.mode)

    step_keys_to_execute = []

    for step_output_handle in step_output_versions.keys():
        manager_key = execution_plan.get_manager_key(step_output_handle)
        # TODO: https://github.com/dagster-io/dagster/issues/3302
        # The following code block is HIGHLY experimental. It initializes an asset store outside of
        # the resource initialization context, and will ignore any exit hooks defined for the asset
        # store.
        resource_config = (
            environment_config.resources[manager_key]["config"]
            if "config" in environment_config.resources[manager_key] else {})
        resource_def = mode_def.resource_defs[manager_key]
        resource_context = InitResourceContext(resource_config, pipeline_def,
                                               resource_def, "")
        object_manager = resource_def.resource_fn(resource_context)
        context = get_output_context(execution_plan, environment_config,
                                     step_output_handle, None)
        if not object_manager.has_asset(
                AssetStoreContext.from_output_context(context)):
            step_keys_to_execute.append(step_output_handle.step_key)

    return execution_plan.build_subset_plan(step_keys_to_execute)
Beispiel #4
0
def resolve_memoized_execution_plan(execution_plan, run_config):
    """
    Returns:
        ExecutionPlan: Execution plan configured to only run unmemoized steps.
    """
    from .build_resources import build_resources

    environment_config = execution_plan.environment_config

    step_keys_to_execute = set()

    for step in execution_plan.steps:
        for output_name in step.step_output_dict.keys():
            step_output_handle = StepOutputHandle(step.key, output_name)

            io_manager_key = execution_plan.get_manager_key(step_output_handle)
            pipeline_def = execution_plan.pipeline.get_definition()
            mode = execution_plan.environment_config.mode
            mode_def = pipeline_def.get_mode_definition(mode)

            # We can do better here by only initializing the io manager and the resources it
            # depends on.
            with build_resources(
                    resource_defs=mode_def.resource_defs,
                    run_config=run_config.get("resources", {}),
            ) as scoped_resources:

                io_manager = scoped_resources.resource_instance_dict[
                    io_manager_key]
                context = get_output_context(execution_plan,
                                             environment_config,
                                             step_output_handle, None)
                if not io_manager.has_output(context):
                    step_keys_to_execute.add(step_output_handle.step_key)

    return execution_plan.build_subset_plan(list(step_keys_to_execute))