예제 #1
0
    def get_external_execution_plan(
        self, external_pipeline, run_config, mode, step_keys_to_execute
    ):
        check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
        check.dict_param(run_config, "run_config")
        check.str_param(mode, "mode")
        check.opt_list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str)

        execution_plan_snapshot_or_error = sync_get_external_execution_plan_grpc(
            api_client=self._handle.client,
            pipeline_origin=external_pipeline.get_external_origin(),
            run_config=run_config,
            mode=mode,
            pipeline_snapshot_id=external_pipeline.identifying_pipeline_snapshot_id,
            solid_selection=external_pipeline.solid_selection,
            step_keys_to_execute=step_keys_to_execute,
        )

        if isinstance(execution_plan_snapshot_or_error, ExecutionPlanSnapshotErrorData):
            return execution_plan_snapshot_or_error

        return ExternalExecutionPlan(
            execution_plan_snapshot=execution_plan_snapshot_or_error,
            represented_pipeline=external_pipeline,
        )
예제 #2
0
def test_run_created_in_0_7_9_snapshot_id_change():
    test_dir = file_relative_path(
        __file__, 'snapshot_0_7_9_shapshot_id_creation_change/sqlite')
    with restore_directory(test_dir):

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6'

        old_pipeline_snapshot_id = '88528edde2ed64da3c39cca0da8ba2f7586c1a5d'
        old_execution_plan_snapshot_id = '2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d'
        with pytest.warns(
                UserWarning,
                match=re.escape(
                    '"input_hydration_schema_key" is deprecated and will be removed in 0.10.0, use '
                    '"loader_schema_key" instead.'),
        ):
            historical_pipeline = instance.get_historical_pipeline(
                old_pipeline_snapshot_id)
        pipeline_snapshot = historical_pipeline.pipeline_snapshot
        ep_snapshot = instance.get_execution_plan_snapshot(
            old_execution_plan_snapshot_id)

        # It is the pipeline snapshot that changed
        # Verify that snapshot ids are not equal. This changed in 0.7.10
        assert create_pipeline_snapshot_id(
            pipeline_snapshot) != old_pipeline_snapshot_id

        # We also changed execution plan schema in 0.7.11.post1
        assert create_execution_plan_snapshot_id(
            ep_snapshot) != old_execution_plan_snapshot_id

        # This previously failed with a check error
        assert ExternalExecutionPlan(ep_snapshot, historical_pipeline)
예제 #3
0
파일: runs.py 프로젝트: crazy32571/dagster
    def resolve_executionPlan(self, graphene_info):
        if not (
            self._pipeline_run.execution_plan_snapshot_id
            and self._pipeline_run.pipeline_snapshot_id
        ):
            return None

        from .execution import DauphinExecutionPlan

        instance = graphene_info.context.instance
        historical_pipeline = instance.get_historical_pipeline(
            self._pipeline_run.pipeline_snapshot_id
        )
        execution_plan_snapshot = instance.get_execution_plan_snapshot(
            self._pipeline_run.execution_plan_snapshot_id
        )
        return (
            DauphinExecutionPlan(
                ExternalExecutionPlan(
                    execution_plan_snapshot=execution_plan_snapshot,
                    represented_pipeline=historical_pipeline,
                )
            )
            if execution_plan_snapshot and historical_pipeline
            else None
        )
예제 #4
0
    def get_external_execution_plan(self, external_pipeline, run_config, mode,
                                    step_keys_to_execute):
        check.inst_param(external_pipeline, 'external_pipeline',
                         ExternalPipeline)
        check.dict_param(run_config, 'run_config')
        check.str_param(mode, 'mode')
        check.opt_list_param(step_keys_to_execute,
                             'step_keys_to_execute',
                             of_type=str)

        execution_plan_snapshot = sync_get_external_execution_plan_grpc(
            api_client=self._handle.client,
            pipeline_origin=external_pipeline.get_origin(),
            run_config=run_config,
            mode=mode,
            pipeline_snapshot_id=external_pipeline.
            identifying_pipeline_snapshot_id,
            solid_selection=external_pipeline.solid_selection,
            step_keys_to_execute=step_keys_to_execute,
        )

        return ExternalExecutionPlan(
            execution_plan_snapshot=execution_plan_snapshot,
            represented_pipeline=external_pipeline,
        )
예제 #5
0
    def get_external_execution_plan(self, external_pipeline, run_config, mode,
                                    step_keys_to_execute):
        from dagster.api.snapshot_execution_plan import sync_get_external_execution_plan

        check.inst_param(external_pipeline, 'external_pipeline',
                         ExternalPipeline)
        check.dict_param(run_config, 'run_config')
        check.str_param(mode, 'mode')
        check.opt_list_param(step_keys_to_execute,
                             'step_keys_to_execute',
                             of_type=str)

        execution_plan_snapshot_or_error = sync_get_external_execution_plan(
            pipeline_origin=external_pipeline.get_origin(),
            solid_selection=external_pipeline.solid_selection,
            run_config=run_config,
            mode=mode,
            step_keys_to_execute=step_keys_to_execute,
            pipeline_snapshot_id=external_pipeline.
            identifying_pipeline_snapshot_id,
        )

        if isinstance(execution_plan_snapshot_or_error,
                      ExecutionPlanSnapshotErrorData):
            return execution_plan_snapshot_or_error

        return ExternalExecutionPlan(
            execution_plan_snapshot=execution_plan_snapshot_or_error,
            represented_pipeline=external_pipeline,
        )
예제 #6
0
    def get_external_execution_plan(self, external_pipeline, run_config, mode,
                                    step_keys_to_execute):
        check.inst_param(external_pipeline, 'external_pipeline',
                         ExternalPipeline)
        check.dict_param(run_config, 'run_config')
        check.str_param(mode, 'mode')
        check.opt_list_param(step_keys_to_execute,
                             'step_keys_to_execute',
                             of_type=str)

        return ExternalExecutionPlan(
            execution_plan_snapshot=snapshot_from_execution_plan(
                create_execution_plan(
                    pipeline=self.get_reconstructable_pipeline(
                        external_pipeline.name).
                    subset_for_execution_from_existing_pipeline(
                        external_pipeline.solids_to_execute),
                    run_config=run_config,
                    mode=mode,
                    step_keys_to_execute=step_keys_to_execute,
                ),
                external_pipeline.identifying_pipeline_snapshot_id,
            ),
            represented_pipeline=external_pipeline,
        )
예제 #7
0
    def get_external_execution_plan(self, external_pipeline, environment_dict,
                                    mode, step_keys_to_execute):
        from dagster.api.snapshot_execution_plan import sync_get_external_execution_plan

        check.inst_param(external_pipeline, 'external_pipeline',
                         ExternalPipeline)
        check.dict_param(environment_dict, 'environment_dict')
        check.str_param(mode, 'mode')
        check.opt_list_param(step_keys_to_execute,
                             'step_keys_to_execute',
                             of_type=str)

        execution_plan_snapshot = sync_get_external_execution_plan(
            pipeline_origin=external_pipeline.get_origin(),
            solid_selection=external_pipeline.solid_selection,
            environment_dict=environment_dict,
            mode=mode,
            step_keys_to_execute=step_keys_to_execute,
            snapshot_id=external_pipeline.identifying_pipeline_snapshot_id,
        )

        return ExternalExecutionPlan(
            execution_plan_snapshot=execution_plan_snapshot,
            represented_pipeline=external_pipeline,
        )
예제 #8
0
def test_run_created_in_0_7_9_snapshot_id_change():
    src_dir = file_relative_path(
        __file__, "snapshot_0_7_9_shapshot_id_creation_change/sqlite")
    with copy_directory(src_dir) as test_dir:

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6'

        old_pipeline_snapshot_id = "88528edde2ed64da3c39cca0da8ba2f7586c1a5d"
        old_execution_plan_snapshot_id = "2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d"

        historical_pipeline = instance.get_historical_pipeline(
            old_pipeline_snapshot_id)
        pipeline_snapshot = historical_pipeline.pipeline_snapshot
        ep_snapshot = instance.get_execution_plan_snapshot(
            old_execution_plan_snapshot_id)

        # It is the pipeline snapshot that changed
        # Verify that snapshot ids are not equal. This changed in 0.7.10
        created_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)
        assert created_snapshot_id != old_pipeline_snapshot_id

        # verify that both are accessible off of the historical pipeline
        assert historical_pipeline.computed_pipeline_snapshot_id == created_snapshot_id
        assert historical_pipeline.identifying_pipeline_snapshot_id == old_pipeline_snapshot_id

        # We also changed execution plan schema in 0.7.11.post1
        assert create_execution_plan_snapshot_id(
            ep_snapshot) != old_execution_plan_snapshot_id

        # This previously failed with a check error
        assert ExternalExecutionPlan(ep_snapshot)
예제 #9
0
def get_pipeline_run_observable(graphene_info, run_id, after=None):
    check.inst_param(graphene_info, 'graphene_info', ResolveInfo)
    check.str_param(run_id, 'run_id')
    check.opt_int_param(after, 'after')
    instance = graphene_info.context.instance
    run = instance.get_run_by_id(run_id)

    if not run:

        def _get_error_observable(observer):
            observer.on_next(
                graphene_info.schema.type_named('PipelineRunLogsSubscriptionFailure')(
                    missingRunId=run_id, message='Could not load run with id {}'.format(run_id)
                )
            )

        return Observable.create(_get_error_observable)  # pylint: disable=E1101

    external_execution_plan = (
        ExternalExecutionPlan(
            execution_plan_snapshot=instance.get_execution_plan_snapshot(
                run.execution_plan_snapshot_id
            ),
            represented_pipeline=instance.get_historical_pipeline(run.pipeline_snapshot_id),
        )
        if run.pipeline_snapshot_id and run.execution_plan_snapshot_id
        else None
    )

    # pylint: disable=E1101
    return Observable.create(
        PipelineRunObservableSubscribe(instance, run_id, after_cursor=after)
    ).map(
        lambda events: graphene_info.schema.type_named('PipelineRunLogsSubscriptionSuccess')(
            run=graphene_info.schema.type_named('PipelineRun')(run),
            messages=[
                from_event_record(event, run.pipeline_name, external_execution_plan)
                for event in events
            ],
        )
    )
예제 #10
0
def get_retry_steps_from_parent_run(
    instance, parent_run_id
) -> Tuple[List[str], Optional[KnownExecutionState]]:
    check.inst_param(instance, "instance", DagsterInstance)
    check.str_param(parent_run_id, "parent_run_id")

    parent_run = instance.get_run_by_id(parent_run_id)
    parent_run_logs = instance.all_logs(parent_run_id)

    execution_plan_snapshot = instance.get_execution_plan_snapshot(
        parent_run.execution_plan_snapshot_id
    )

    if not execution_plan_snapshot:
        raise DagsterExecutionPlanSnapshotNotFoundError(
            f"Could not load execution plan snapshot for run {parent_run_id}"
        )

    execution_plan = ExternalExecutionPlan(execution_plan_snapshot=execution_plan_snapshot)

    # keep track of steps with dicts that point:
    # * step_key -> set(step_handle) in the normal case
    # * unresolved_step_key -> set(resolved_step_handle, ...) for dynamic outputs
    all_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)
    failed_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)
    successful_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)
    interrupted_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)
    skipped_steps_in_parent_run_logs: Dict[str, set] = defaultdict(set)

    for record in parent_run_logs:
        if record.dagster_event and record.dagster_event.step_handle:
            step_handle = record.dagster_event.step_handle
            _update_tracking_dict(all_steps_in_parent_run_logs, step_handle)

            if record.dagster_event_type == DagsterEventType.STEP_FAILURE:
                _update_tracking_dict(failed_steps_in_parent_run_logs, step_handle)

            if record.dagster_event_type == DagsterEventType.STEP_SUCCESS:
                _update_tracking_dict(successful_steps_in_parent_run_logs, step_handle)

            if record.dagster_event_type == DagsterEventType.STEP_SKIPPED:
                _update_tracking_dict(skipped_steps_in_parent_run_logs, step_handle)

    for step_set in all_steps_in_parent_run_logs.values():
        for step_handle in step_set:
            if (
                not _in_tracking_dict(step_handle, failed_steps_in_parent_run_logs)
                and not _in_tracking_dict(step_handle, successful_steps_in_parent_run_logs)
                and not _in_tracking_dict(step_handle, skipped_steps_in_parent_run_logs)
            ):
                _update_tracking_dict(interrupted_steps_in_parent_run_logs, step_handle)

    to_retry = defaultdict(set)

    execution_deps = execution_plan.execution_deps()
    for step_snap in execution_plan.topological_steps():
        step_key = step_snap.key
        step_handle = StepHandle.parse_from_key(step_snap.key)

        if parent_run.step_keys_to_execute and step_snap.key not in parent_run.step_keys_to_execute:
            continue

        if step_snap.key in failed_steps_in_parent_run_logs:
            to_retry[step_key].update(failed_steps_in_parent_run_logs[step_key])

        # Interrupted steps can occur when graceful cleanup from a step failure fails to run,
        # and a step failure event is not generated
        if step_key in interrupted_steps_in_parent_run_logs:
            to_retry[step_key].update(interrupted_steps_in_parent_run_logs[step_key])

        # Missing steps did not execute, e.g. when a run was terminated
        if step_key not in all_steps_in_parent_run_logs:
            to_retry[step_key].add(step_handle)

        step_dep_keys = execution_deps[step_key]
        retrying_dep_keys = step_dep_keys.intersection(to_retry.keys())

        # this step is downstream of a step we are about to retry
        if retrying_dep_keys:
            for retrying_key in retrying_dep_keys:
                # If this step and its ancestor are both downstream of a dynamic output,
                # add resolved instances of this step for the retrying mapping keys
                if isinstance(step_handle, UnresolvedStepHandle) and all(
                    map(
                        lambda handle: isinstance(handle, ResolvedFromDynamicStepHandle),
                        to_retry[retrying_key],
                    )
                ):
                    for resolved_handle in to_retry[retrying_key]:
                        to_retry[step_key].add(step_handle.resolve(resolved_handle.mapping_key))

                else:
                    to_retry[step_key].add(step_handle)

    steps_to_retry = [
        step_handle.to_key() for step_set in to_retry.values() for step_handle in step_set
    ]

    return steps_to_retry, KnownExecutionState.for_reexecution(parent_run_logs, steps_to_retry)