예제 #1
0
파일: api.py 프로젝트: prezi/dagster
def _resolve_reexecute_step_selection(
    instance: DagsterInstance,
    pipeline: IPipeline,
    mode: Optional[str],
    run_config: Optional[dict],
    parent_pipeline_run: PipelineRun,
    step_selection: List[str],
) -> ExecutionPlan:
    if parent_pipeline_run.solid_selection:
        pipeline = pipeline.subset_for_execution(
            parent_pipeline_run.solid_selection)

    parent_logs = instance.all_logs(parent_pipeline_run.run_id)
    parent_plan = create_execution_plan(
        pipeline,
        parent_pipeline_run.run_config,
        mode,
        known_state=KnownExecutionState.derive_from_logs(parent_logs),
    )
    step_keys_to_execute = parse_step_selection(
        parent_plan.get_all_step_deps(), step_selection)
    execution_plan = create_execution_plan(
        pipeline,
        run_config,
        mode,
        step_keys_to_execute=list(step_keys_to_execute),
        known_state=KnownExecutionState.for_reexecution(
            parent_logs, step_keys_to_execute),
    )
    return execution_plan
예제 #2
0
def compute_step_keys_to_execute(graphene_info, external_pipeline, execution_params):
    check.inst_param(graphene_info, "graphene_info", ResolveInfo)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.inst_param(execution_params, "execution_params", ExecutionParams)

    instance = graphene_info.context.instance

    if not execution_params.step_keys and is_resume_retry(execution_params):
        # Get step keys from parent_run_id if it's a resume/retry
        external_execution_plan = get_external_execution_plan_or_raise(
            graphene_info=graphene_info,
            external_pipeline=external_pipeline,
            mode=execution_params.mode,
            run_config=execution_params.run_config,
            step_keys_to_execute=None,
            known_state=None,
        )
        return get_retry_steps_from_execution_plan(
            instance, external_execution_plan, execution_params.execution_metadata.parent_run_id
        )
    else:
        known_state = None
        if execution_params.execution_metadata.parent_run_id:
            known_state = KnownExecutionState.for_reexecution(
                instance.all_logs(execution_params.execution_metadata.parent_run_id),
                execution_params.step_keys,
            )

        return execution_params.step_keys, known_state
예제 #3
0
def compute_step_keys_to_execute(graphene_info, execution_params):
    check.inst_param(graphene_info, "graphene_info", ResolveInfo)
    check.inst_param(execution_params, "execution_params", ExecutionParams)

    instance = graphene_info.context.instance

    if not execution_params.step_keys and is_resume_retry(execution_params):
        # Get step keys from parent_run_id if it's a resume/retry
        return get_retry_steps_from_parent_run(
            instance, execution_params.execution_metadata.parent_run_id
        )
    else:
        known_state = None
        if execution_params.execution_metadata.parent_run_id and execution_params.step_keys:
            known_state = KnownExecutionState.for_reexecution(
                instance.all_logs(execution_params.execution_metadata.parent_run_id),
                execution_params.step_keys,
            )

        return execution_params.step_keys, known_state
예제 #4
0
def create_backfill_run(instance, repo_location, external_pipeline,
                        external_partition_set, backfill_job, partition_data):
    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(repo_location, "repo_location", RepositoryLocation)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.inst_param(external_partition_set, "external_partition_set",
                     ExternalPartitionSet)
    check.inst_param(backfill_job, "backfill_job", PartitionBackfill)
    check.inst_param(partition_data, "partition_data",
                     ExternalPartitionExecutionParamData)

    full_external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline,
        partition_data.run_config,
        external_partition_set.mode,
        step_keys_to_execute=None,
        known_state=None,
    )

    tags = merge_dicts(
        external_pipeline.tags,
        partition_data.tags,
        PipelineRun.tags_for_backfill_id(backfill_job.backfill_id),
        backfill_job.tags,
    )

    if not backfill_job.from_failure and not backfill_job.reexecution_steps:
        step_keys_to_execute = None
        parent_run_id = None
        root_run_id = None
        known_state = None

    elif backfill_job.from_failure:
        last_run = _fetch_last_run(instance, external_partition_set,
                                   partition_data.name)
        if not last_run or last_run.status != PipelineRunStatus.FAILURE:
            return None

        parent_run_id = last_run.run_id
        root_run_id = last_run.root_run_id or last_run.run_id
        tags = merge_dicts(
            tags,
            {
                RESUME_RETRY_TAG: "true",
                PARENT_RUN_ID_TAG: parent_run_id,
                ROOT_RUN_ID_TAG: root_run_id,
            },
        )
        step_keys_to_execute, known_state = get_retry_steps_from_execution_plan(
            instance, full_external_execution_plan, parent_run_id)
    elif backfill_job.reexecution_steps:
        last_run = _fetch_last_run(instance, external_partition_set,
                                   partition_data.name)
        parent_run_id = last_run.run_id if last_run else None
        root_run_id = (last_run.root_run_id
                       or last_run.run_id) if last_run else None
        if parent_run_id and root_run_id:
            tags = merge_dicts(tags, {
                PARENT_RUN_ID_TAG: parent_run_id,
                ROOT_RUN_ID_TAG: root_run_id
            })
        step_keys_to_execute = backfill_job.reexecution_steps
        if last_run and last_run.status == PipelineRunStatus.SUCCESS:
            known_state = KnownExecutionState.for_reexecution(
                instance.all_logs(parent_run_id),
                step_keys_to_execute,
            )
        else:
            known_state = None

    if step_keys_to_execute:
        external_execution_plan = repo_location.get_external_execution_plan(
            external_pipeline,
            partition_data.run_config,
            external_partition_set.mode,
            step_keys_to_execute=step_keys_to_execute,
            known_state=known_state,
        )
    else:
        external_execution_plan = full_external_execution_plan

    return instance.create_run(
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=external_execution_plan.
        execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        pipeline_name=external_pipeline.name,
        run_id=make_new_run_id(),
        solids_to_execute=frozenset(external_partition_set.solid_selection)
        if external_partition_set.solid_selection else None,
        run_config=partition_data.run_config,
        mode=external_partition_set.mode,
        step_keys_to_execute=step_keys_to_execute,
        tags=tags,
        root_run_id=root_run_id,
        parent_run_id=parent_run_id,
        status=PipelineRunStatus.NOT_STARTED,
        external_pipeline_origin=external_pipeline.get_external_origin(),
    )
예제 #5
0
def create_backfill_run(instance, repo_location, external_pipeline,
                        external_partition_set, backfill_job, partition_data):
    from dagster.daemon.daemon import get_telemetry_daemon_session_id

    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(repo_location, "repo_location", RepositoryLocation)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.inst_param(external_partition_set, "external_partition_set",
                     ExternalPartitionSet)
    check.inst_param(backfill_job, "backfill_job", PartitionBackfill)
    check.inst_param(partition_data, "partition_data",
                     ExternalPartitionExecutionParamData)

    log_action(
        instance,
        BACKFILL_RUN_CREATED,
        metadata={
            "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(),
            "repo_hash": hash_name(repo_location.name),
            "pipeline_name_hash": hash_name(external_pipeline.name),
        },
    )

    tags = merge_dicts(
        external_pipeline.tags,
        partition_data.tags,
        PipelineRun.tags_for_backfill_id(backfill_job.backfill_id),
        backfill_job.tags,
    )

    solids_to_execute = None
    solid_selection = None
    if not backfill_job.from_failure and not backfill_job.reexecution_steps:
        step_keys_to_execute = None
        parent_run_id = None
        root_run_id = None
        known_state = None
        if external_partition_set.solid_selection:
            solids_to_execute = frozenset(
                external_partition_set.solid_selection)
            solid_selection = external_partition_set.solid_selection

    elif backfill_job.from_failure:
        last_run = _fetch_last_run(instance, external_partition_set,
                                   partition_data.name)
        if not last_run or last_run.status != PipelineRunStatus.FAILURE:
            return None
        return instance.create_reexecuted_run_from_failure(
            last_run,
            repo_location,
            external_pipeline,
            tags=tags,
            run_config=partition_data.run_config,
            mode=external_partition_set.mode,
        )

    elif backfill_job.reexecution_steps:
        last_run = _fetch_last_run(instance, external_partition_set,
                                   partition_data.name)
        parent_run_id = last_run.run_id if last_run else None
        root_run_id = (last_run.root_run_id
                       or last_run.run_id) if last_run else None
        if parent_run_id and root_run_id:
            tags = merge_dicts(tags, {
                PARENT_RUN_ID_TAG: parent_run_id,
                ROOT_RUN_ID_TAG: root_run_id
            })
        step_keys_to_execute = backfill_job.reexecution_steps
        if last_run and last_run.status == PipelineRunStatus.SUCCESS:
            known_state = KnownExecutionState.for_reexecution(
                instance.all_logs(parent_run_id),
                step_keys_to_execute,
            )
        else:
            known_state = None

        if external_partition_set.solid_selection:
            solids_to_execute = frozenset(
                external_partition_set.solid_selection)
            solid_selection = external_partition_set.solid_selection

    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline,
        partition_data.run_config,
        external_partition_set.mode,
        step_keys_to_execute=step_keys_to_execute,
        known_state=known_state,
        instance=instance,
    )

    return instance.create_run(
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=external_execution_plan.
        execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        pipeline_name=external_pipeline.name,
        run_id=make_new_run_id(),
        solids_to_execute=solids_to_execute,
        run_config=partition_data.run_config,
        mode=external_partition_set.mode,
        step_keys_to_execute=step_keys_to_execute,
        tags=tags,
        root_run_id=root_run_id,
        parent_run_id=parent_run_id,
        status=PipelineRunStatus.NOT_STARTED,
        external_pipeline_origin=external_pipeline.get_external_origin(),
        pipeline_code_origin=external_pipeline.get_python_origin(),
        solid_selection=solid_selection,
    )
예제 #6
0
def get_retry_steps_from_parent_run(
    instance, parent_run_id
) -> Tuple[List[str], Optional[KnownExecutionState]]:
    check.inst_param(instance, "instance", DagsterInstance)
    check.str_param(parent_run_id, "parent_run_id")

    parent_run = instance.get_run_by_id(parent_run_id)
    parent_run_logs = instance.all_logs(parent_run_id)

    execution_plan_snapshot = instance.get_execution_plan_snapshot(
        parent_run.execution_plan_snapshot_id
    )

    if not execution_plan_snapshot:
        raise DagsterExecutionPlanSnapshotNotFoundError(
            f"Could not load execution plan snapshot for run {parent_run_id}"
        )

    execution_plan = ExternalExecutionPlan(execution_plan_snapshot=execution_plan_snapshot)

    # keep track of steps with dicts that point:
    # * step_key -> set(step_handle) in the normal case
    # * unresolved_step_key -> set(resolved_step_handle, ...) for dynamic outputs
    all_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)
    failed_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)
    successful_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)
    interrupted_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)
    skipped_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)

    for record in parent_run_logs:
        if record.dagster_event and record.dagster_event.step_handle:
            step_handle = record.dagster_event.step_handle
            _update_tracking_dict(all_steps_in_parent_run_logs, step_handle)

            if record.dagster_event_type == DagsterEventType.STEP_FAILURE:
                _update_tracking_dict(failed_steps_in_parent_run_logs, step_handle)

            if record.dagster_event_type == DagsterEventType.STEP_SUCCESS:
                _update_tracking_dict(successful_steps_in_parent_run_logs, step_handle)

            if record.dagster_event_type == DagsterEventType.STEP_SKIPPED:
                _update_tracking_dict(skipped_steps_in_parent_run_logs, step_handle)

    for step_set in all_steps_in_parent_run_logs.values():
        for step_handle in step_set:
            if (
                not _in_tracking_dict(step_handle, failed_steps_in_parent_run_logs)
                and not _in_tracking_dict(step_handle, successful_steps_in_parent_run_logs)
                and not _in_tracking_dict(step_handle, skipped_steps_in_parent_run_logs)
            ):
                _update_tracking_dict(interrupted_steps_in_parent_run_logs, step_handle)

    to_retry = defaultdict(set)

    execution_deps = execution_plan.execution_deps()
    for step_snap in execution_plan.topological_steps():
        step_key = step_snap.key
        step_handle = StepHandle.parse_from_key(step_snap.key)

        if parent_run.step_keys_to_execute and step_snap.key not in parent_run.step_keys_to_execute:
            continue

        if step_snap.key in failed_steps_in_parent_run_logs:
            to_retry[step_key].update(failed_steps_in_parent_run_logs[step_key])

        # Interrupted steps can occur when graceful cleanup from a step failure fails to run,
        # and a step failure event is not generated
        if step_key in interrupted_steps_in_parent_run_logs:
            to_retry[step_key].update(interrupted_steps_in_parent_run_logs[step_key])

        # Missing steps did not execute, e.g. when a run was terminated
        if step_key not in all_steps_in_parent_run_logs:
            to_retry[step_key].add(step_handle)

        step_dep_keys = execution_deps[step_key]
        retrying_dep_keys = step_dep_keys.intersection(to_retry.keys())

        # this step is downstream of a step we are about to retry
        if retrying_dep_keys:
            for retrying_key in retrying_dep_keys:
                # If this step and its ancestor are both downstream of a dynamic output,
                # add resolved instances of this step for the retrying mapping keys
                if isinstance(step_handle, UnresolvedStepHandle) and all(
                    map(
                        lambda handle: isinstance(handle, ResolvedFromDynamicStepHandle),
                        to_retry[retrying_key],
                    )
                ):
                    for resolved_handle in to_retry[retrying_key]:
                        to_retry[step_key].add(step_handle.resolve(resolved_handle.mapping_key))

                else:
                    to_retry[step_key].add(step_handle)

    steps_to_retry = [
        step_handle.to_key() for step_set in to_retry.values() for step_handle in step_set
    ]

    return steps_to_retry, KnownExecutionState.for_reexecution(parent_run_logs, steps_to_retry)