예제 #1
0
파일: api.py 프로젝트: prezi/dagster
def _resolve_reexecute_step_selection(
    instance: DagsterInstance,
    pipeline: IPipeline,
    mode: Optional[str],
    run_config: Optional[dict],
    parent_pipeline_run: PipelineRun,
    step_selection: List[str],
) -> ExecutionPlan:
    if parent_pipeline_run.solid_selection:
        pipeline = pipeline.subset_for_execution(
            parent_pipeline_run.solid_selection)

    parent_logs = instance.all_logs(parent_pipeline_run.run_id)
    parent_plan = create_execution_plan(
        pipeline,
        parent_pipeline_run.run_config,
        mode,
        known_state=KnownExecutionState.derive_from_logs(parent_logs),
    )
    step_keys_to_execute = parse_step_selection(
        parent_plan.get_all_step_deps(), step_selection)
    execution_plan = create_execution_plan(
        pipeline,
        run_config,
        mode,
        step_keys_to_execute=list(step_keys_to_execute),
        known_state=KnownExecutionState.for_reexecution(
            parent_logs, step_keys_to_execute),
    )
    return execution_plan
예제 #2
0
def compute_step_keys_to_execute(graphene_info, external_pipeline, execution_params):
    check.inst_param(graphene_info, "graphene_info", ResolveInfo)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.inst_param(execution_params, "execution_params", ExecutionParams)

    instance = graphene_info.context.instance

    if not execution_params.step_keys and is_resume_retry(execution_params):
        # Get step keys from parent_run_id if it's a resume/retry
        external_execution_plan = get_external_execution_plan_or_raise(
            graphene_info=graphene_info,
            external_pipeline=external_pipeline,
            mode=execution_params.mode,
            run_config=execution_params.run_config,
            step_keys_to_execute=None,
            known_state=None,
        )
        return get_retry_steps_from_execution_plan(
            instance, external_execution_plan, execution_params.execution_metadata.parent_run_id
        )
    else:
        known_state = None
        if execution_params.execution_metadata.parent_run_id:
            known_state = KnownExecutionState.for_reexecution(
                instance.all_logs(execution_params.execution_metadata.parent_run_id),
                execution_params.step_keys,
            )

        return execution_params.step_keys, known_state
예제 #3
0
def test_execute_step_verify_step_framework_error(mock_verify_step):
    with get_foo_pipeline_handle() as pipeline_handle:
        runner = CliRunner()

        mock_verify_step.side_effect = Exception(
            "Unexpected framework error text")

        with instance_for_test(
                overrides={
                    "compute_logs": {
                        "module":
                        "dagster.core.storage.noop_compute_log_manager",
                        "class": "NoOpComputeLogManager",
                    }
                }) as instance:
            run = create_run_for_test(
                instance,
                pipeline_name="foo",
                run_id="new_run",
            )

            input_json = serialize_dagster_namedtuple(
                ExecuteStepArgs(
                    pipeline_origin=pipeline_handle.get_python_origin(),
                    pipeline_run_id=run.run_id,
                    step_keys_to_execute=["fake_step"],
                    instance_ref=instance.get_ref(),
                    should_verify_step=True,
                    known_state=KnownExecutionState(
                        {},
                        {
                            "blah": {
                                "result": ["0", "1", "2"]
                            },
                        },
                    ),
                ))
            result = runner.invoke(api.execute_step_command, [input_json])

            assert result.exit_code != 0

            # Framework error logged to event log
            logs = instance.all_logs(run.run_id)

            log_entry = logs[0]
            assert (
                log_entry.message ==
                "An exception was thrown during step execution that is likely a framework error, rather than an error in user code."
            )
            assert log_entry.step_key == "fake_step"

            assert "Unexpected framework error text" in str(
                log_entry.dagster_event.event_specific_data.error)
예제 #4
0
def compute_step_keys_to_execute(graphene_info, execution_params):
    check.inst_param(graphene_info, "graphene_info", ResolveInfo)
    check.inst_param(execution_params, "execution_params", ExecutionParams)

    instance = graphene_info.context.instance

    if not execution_params.step_keys and is_resume_retry(execution_params):
        # Get step keys from parent_run_id if it's a resume/retry
        return get_retry_steps_from_parent_run(
            instance, execution_params.execution_metadata.parent_run_id
        )
    else:
        known_state = None
        if execution_params.execution_metadata.parent_run_id and execution_params.step_keys:
            known_state = KnownExecutionState.for_reexecution(
                instance.all_logs(execution_params.execution_metadata.parent_run_id),
                execution_params.step_keys,
            )

        return execution_params.step_keys, known_state
예제 #5
0
def test_tags():
    known_state = KnownExecutionState(
        {},
        {
            emit.name: {
                "result": ["0", "1", "2"]
            },
        },
    )
    plan = create_execution_plan(dynamic_pipeline, known_state=known_state)

    assert plan.get_step_by_key(emit.name).tags == {"first": "1"}

    for mapping_key in range(3):
        assert plan.get_step_by_key(
            f"{multiply_inputs.name}[{mapping_key}]").tags == {
                "second": "2"
            }
        assert plan.get_step_by_key(
            f"{multiply_by_two.name}[{mapping_key}]").tags == {
                "third": "3"
            }
예제 #6
0
def create_backfill_run(instance, repo_location, external_pipeline,
                        external_partition_set, backfill_job, partition_data):
    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(repo_location, "repo_location", RepositoryLocation)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.inst_param(external_partition_set, "external_partition_set",
                     ExternalPartitionSet)
    check.inst_param(backfill_job, "backfill_job", PartitionBackfill)
    check.inst_param(partition_data, "partition_data",
                     ExternalPartitionExecutionParamData)

    full_external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline,
        partition_data.run_config,
        external_partition_set.mode,
        step_keys_to_execute=None,
        known_state=None,
    )

    tags = merge_dicts(
        external_pipeline.tags,
        partition_data.tags,
        PipelineRun.tags_for_backfill_id(backfill_job.backfill_id),
        backfill_job.tags,
    )

    if not backfill_job.from_failure and not backfill_job.reexecution_steps:
        step_keys_to_execute = None
        parent_run_id = None
        root_run_id = None
        known_state = None

    elif backfill_job.from_failure:
        last_run = _fetch_last_run(instance, external_partition_set,
                                   partition_data.name)
        if not last_run or last_run.status != PipelineRunStatus.FAILURE:
            return None

        parent_run_id = last_run.run_id
        root_run_id = last_run.root_run_id or last_run.run_id
        tags = merge_dicts(
            tags,
            {
                RESUME_RETRY_TAG: "true",
                PARENT_RUN_ID_TAG: parent_run_id,
                ROOT_RUN_ID_TAG: root_run_id,
            },
        )
        step_keys_to_execute, known_state = get_retry_steps_from_execution_plan(
            instance, full_external_execution_plan, parent_run_id)
    elif backfill_job.reexecution_steps:
        last_run = _fetch_last_run(instance, external_partition_set,
                                   partition_data.name)
        parent_run_id = last_run.run_id if last_run else None
        root_run_id = (last_run.root_run_id
                       or last_run.run_id) if last_run else None
        if parent_run_id and root_run_id:
            tags = merge_dicts(tags, {
                PARENT_RUN_ID_TAG: parent_run_id,
                ROOT_RUN_ID_TAG: root_run_id
            })
        step_keys_to_execute = backfill_job.reexecution_steps
        if last_run and last_run.status == PipelineRunStatus.SUCCESS:
            known_state = KnownExecutionState.for_reexecution(
                instance.all_logs(parent_run_id),
                step_keys_to_execute,
            )
        else:
            known_state = None

    if step_keys_to_execute:
        external_execution_plan = repo_location.get_external_execution_plan(
            external_pipeline,
            partition_data.run_config,
            external_partition_set.mode,
            step_keys_to_execute=step_keys_to_execute,
            known_state=known_state,
        )
    else:
        external_execution_plan = full_external_execution_plan

    return instance.create_run(
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=external_execution_plan.
        execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        pipeline_name=external_pipeline.name,
        run_id=make_new_run_id(),
        solids_to_execute=frozenset(external_partition_set.solid_selection)
        if external_partition_set.solid_selection else None,
        run_config=partition_data.run_config,
        mode=external_partition_set.mode,
        step_keys_to_execute=step_keys_to_execute,
        tags=tags,
        root_run_id=root_run_id,
        parent_run_id=parent_run_id,
        status=PipelineRunStatus.NOT_STARTED,
        external_pipeline_origin=external_pipeline.get_external_origin(),
    )
예제 #7
0
 def get_known_state(self):
     return KnownExecutionState(
         previous_retry_attempts=self._retry_state.snapshot_attempts(),
         dynamic_mappings=dict(self._successful_dynamic_outputs),
     )
예제 #8
0
def create_backfill_run(instance, repo_location, external_pipeline,
                        external_partition_set, backfill_job, partition_data):
    from dagster.daemon.daemon import get_telemetry_daemon_session_id

    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(repo_location, "repo_location", RepositoryLocation)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.inst_param(external_partition_set, "external_partition_set",
                     ExternalPartitionSet)
    check.inst_param(backfill_job, "backfill_job", PartitionBackfill)
    check.inst_param(partition_data, "partition_data",
                     ExternalPartitionExecutionParamData)

    log_action(
        instance,
        BACKFILL_RUN_CREATED,
        metadata={
            "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(),
            "repo_hash": hash_name(repo_location.name),
            "pipeline_name_hash": hash_name(external_pipeline.name),
        },
    )

    tags = merge_dicts(
        external_pipeline.tags,
        partition_data.tags,
        PipelineRun.tags_for_backfill_id(backfill_job.backfill_id),
        backfill_job.tags,
    )

    solids_to_execute = None
    solid_selection = None
    if not backfill_job.from_failure and not backfill_job.reexecution_steps:
        step_keys_to_execute = None
        parent_run_id = None
        root_run_id = None
        known_state = None
        if external_partition_set.solid_selection:
            solids_to_execute = frozenset(
                external_partition_set.solid_selection)
            solid_selection = external_partition_set.solid_selection

    elif backfill_job.from_failure:
        last_run = _fetch_last_run(instance, external_partition_set,
                                   partition_data.name)
        if not last_run or last_run.status != PipelineRunStatus.FAILURE:
            return None
        return instance.create_reexecuted_run_from_failure(
            last_run,
            repo_location,
            external_pipeline,
            tags=tags,
            run_config=partition_data.run_config,
            mode=external_partition_set.mode,
        )

    elif backfill_job.reexecution_steps:
        last_run = _fetch_last_run(instance, external_partition_set,
                                   partition_data.name)
        parent_run_id = last_run.run_id if last_run else None
        root_run_id = (last_run.root_run_id
                       or last_run.run_id) if last_run else None
        if parent_run_id and root_run_id:
            tags = merge_dicts(tags, {
                PARENT_RUN_ID_TAG: parent_run_id,
                ROOT_RUN_ID_TAG: root_run_id
            })
        step_keys_to_execute = backfill_job.reexecution_steps
        if last_run and last_run.status == PipelineRunStatus.SUCCESS:
            known_state = KnownExecutionState.for_reexecution(
                instance.all_logs(parent_run_id),
                step_keys_to_execute,
            )
        else:
            known_state = None

        if external_partition_set.solid_selection:
            solids_to_execute = frozenset(
                external_partition_set.solid_selection)
            solid_selection = external_partition_set.solid_selection

    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline,
        partition_data.run_config,
        external_partition_set.mode,
        step_keys_to_execute=step_keys_to_execute,
        known_state=known_state,
        instance=instance,
    )

    return instance.create_run(
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=external_execution_plan.
        execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        pipeline_name=external_pipeline.name,
        run_id=make_new_run_id(),
        solids_to_execute=solids_to_execute,
        run_config=partition_data.run_config,
        mode=external_partition_set.mode,
        step_keys_to_execute=step_keys_to_execute,
        tags=tags,
        root_run_id=root_run_id,
        parent_run_id=parent_run_id,
        status=PipelineRunStatus.NOT_STARTED,
        external_pipeline_origin=external_pipeline.get_external_origin(),
        pipeline_code_origin=external_pipeline.get_python_origin(),
        solid_selection=solid_selection,
    )
예제 #9
0
def test_tags_to_dynamic_plan():
    @solid(
        tags={
            USER_DEFINED_K8S_CONFIG_KEY: {
                "container_config": {
                    "resources": {
                        "requests": {
                            "cpu": "500m",
                            "memory": "128Mi"
                        },
                        "limits": {
                            "cpu": "1000m",
                            "memory": "1Gi"
                        },
                    }
                }
            }
        })
    def multiply_inputs(_, x):
        return 2 * x

    @solid(
        tags={
            USER_DEFINED_K8S_CONFIG_KEY: {
                "container_config": {
                    "resources": {
                        "requests": {
                            "cpu": "250m",
                            "memory": "64Mi"
                        },
                        "limits": {
                            "cpu": "500m",
                            "memory": "2560Mi"
                        },
                    }
                }
            }
        },
        output_defs=[DynamicOutputDefinition()],
    )
    def emit(_):
        for i in range(3):
            yield DynamicOutput(value=i, mapping_key=str(i))

    @pipeline
    def k8s_ready():
        return emit().map(multiply_inputs)

    known_state = KnownExecutionState(
        {},
        {
            emit.name: {
                "result": ["0", "1", "2"]
            },
        },
    )
    plan = create_execution_plan(k8s_ready, known_state=known_state)

    emit_step = plan.get_step_by_key(emit.name)
    user_defined_k8s_config = get_user_defined_k8s_config(emit_step.tags)

    assert user_defined_k8s_config.container_config
    assert user_defined_k8s_config.container_config["resources"]

    resources = user_defined_k8s_config.container_config["resources"]

    assert resources["requests"]["cpu"] == "250m"
    assert resources["requests"]["memory"] == "64Mi"
    assert resources["limits"]["cpu"] == "500m"
    assert resources["limits"]["memory"] == "2560Mi"

    for mapping_key in range(3):
        multiply_inputs_step = plan.get_step_by_key(
            f"{multiply_inputs.name}[{mapping_key}]")
        dynamic_step_user_defined_k8s_config = get_user_defined_k8s_config(
            multiply_inputs_step.tags)

        assert dynamic_step_user_defined_k8s_config.container_config
        assert dynamic_step_user_defined_k8s_config.container_config[
            "resources"]

        resources = dynamic_step_user_defined_k8s_config.container_config[
            "resources"]

        assert resources["requests"]["cpu"] == "500m"
        assert resources["requests"]["memory"] == "128Mi"
        assert resources["limits"]["cpu"] == "1000m"
        assert resources["limits"]["memory"] == "1Gi"
예제 #10
0
 def get_known_state(self):
     return KnownExecutionState(
         previous_retry_attempts=self._retry_state.snapshot_attempts(), )
예제 #11
0
def get_retry_steps_from_parent_run(
    instance, parent_run_id
) -> Tuple[List[str], Optional[KnownExecutionState]]:
    check.inst_param(instance, "instance", DagsterInstance)
    check.str_param(parent_run_id, "parent_run_id")

    parent_run = instance.get_run_by_id(parent_run_id)
    parent_run_logs = instance.all_logs(parent_run_id)

    execution_plan_snapshot = instance.get_execution_plan_snapshot(
        parent_run.execution_plan_snapshot_id
    )

    if not execution_plan_snapshot:
        raise DagsterExecutionPlanSnapshotNotFoundError(
            f"Could not load execution plan snapshot for run {parent_run_id}"
        )

    execution_plan = ExternalExecutionPlan(execution_plan_snapshot=execution_plan_snapshot)

    # keep track of steps with dicts that point:
    # * step_key -> set(step_handle) in the normal case
    # * unresolved_step_key -> set(resolved_step_handle, ...) for dynamic outputs
    all_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)
    failed_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)
    successful_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)
    interrupted_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)
    skipped_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)

    for record in parent_run_logs:
        if record.dagster_event and record.dagster_event.step_handle:
            step_handle = record.dagster_event.step_handle
            _update_tracking_dict(all_steps_in_parent_run_logs, step_handle)

            if record.dagster_event_type == DagsterEventType.STEP_FAILURE:
                _update_tracking_dict(failed_steps_in_parent_run_logs, step_handle)

            if record.dagster_event_type == DagsterEventType.STEP_SUCCESS:
                _update_tracking_dict(successful_steps_in_parent_run_logs, step_handle)

            if record.dagster_event_type == DagsterEventType.STEP_SKIPPED:
                _update_tracking_dict(skipped_steps_in_parent_run_logs, step_handle)

    for step_set in all_steps_in_parent_run_logs.values():
        for step_handle in step_set:
            if (
                not _in_tracking_dict(step_handle, failed_steps_in_parent_run_logs)
                and not _in_tracking_dict(step_handle, successful_steps_in_parent_run_logs)
                and not _in_tracking_dict(step_handle, skipped_steps_in_parent_run_logs)
            ):
                _update_tracking_dict(interrupted_steps_in_parent_run_logs, step_handle)

    to_retry = defaultdict(set)

    execution_deps = execution_plan.execution_deps()
    for step_snap in execution_plan.topological_steps():
        step_key = step_snap.key
        step_handle = StepHandle.parse_from_key(step_snap.key)

        if parent_run.step_keys_to_execute and step_snap.key not in parent_run.step_keys_to_execute:
            continue

        if step_snap.key in failed_steps_in_parent_run_logs:
            to_retry[step_key].update(failed_steps_in_parent_run_logs[step_key])

        # Interrupted steps can occur when graceful cleanup from a step failure fails to run,
        # and a step failure event is not generated
        if step_key in interrupted_steps_in_parent_run_logs:
            to_retry[step_key].update(interrupted_steps_in_parent_run_logs[step_key])

        # Missing steps did not execute, e.g. when a run was terminated
        if step_key not in all_steps_in_parent_run_logs:
            to_retry[step_key].add(step_handle)

        step_dep_keys = execution_deps[step_key]
        retrying_dep_keys = step_dep_keys.intersection(to_retry.keys())

        # this step is downstream of a step we are about to retry
        if retrying_dep_keys:
            for retrying_key in retrying_dep_keys:
                # If this step and its ancestor are both downstream of a dynamic output,
                # add resolved instances of this step for the retrying mapping keys
                if isinstance(step_handle, UnresolvedStepHandle) and all(
                    map(
                        lambda handle: isinstance(handle, ResolvedFromDynamicStepHandle),
                        to_retry[retrying_key],
                    )
                ):
                    for resolved_handle in to_retry[retrying_key]:
                        to_retry[step_key].add(step_handle.resolve(resolved_handle.mapping_key))

                else:
                    to_retry[step_key].add(step_handle)

    steps_to_retry = [
        step_handle.to_key() for step_set in to_retry.values() for step_handle in step_set
    ]

    return steps_to_retry, KnownExecutionState.for_reexecution(parent_run_logs, steps_to_retry)