Example #1
0
 def add_execution_plan_snapshot(self, execution_plan_snapshot):
     check.inst_param(execution_plan_snapshot, "execution_plan_snapshot",
                      ExecutionPlanSnapshot)
     execution_plan_snapshot_id = create_execution_plan_snapshot_id(
         execution_plan_snapshot)
     return self._add_snapshot(
         snapshot_id=execution_plan_snapshot_id,
         snapshot_obj=execution_plan_snapshot,
         snapshot_type=SnapshotType.EXECUTION_PLAN,
     )
Example #2
0
 def add_execution_plan_snapshot(
         self,
         execution_plan_snapshot: ExecutionPlanSnapshot,
         snapshot_id: Optional[str] = None) -> str:
     check.inst_param(execution_plan_snapshot, "execution_plan_snapshot",
                      ExecutionPlanSnapshot)
     check.opt_str_param(snapshot_id, "snapshot_id")
     if not snapshot_id:
         snapshot_id = create_execution_plan_snapshot_id(
             execution_plan_snapshot)
     self._ep_snapshots[snapshot_id] = execution_plan_snapshot
     return snapshot_id
Example #3
0
    def add_execution_plan_snapshot(
        self, execution_plan_snapshot: ExecutionPlanSnapshot, snapshot_id: Optional[str] = None
    ) -> str:
        check.inst_param(execution_plan_snapshot, "execution_plan_snapshot", ExecutionPlanSnapshot)
        check.opt_str_param(snapshot_id, "snapshot_id")

        if not snapshot_id:
            snapshot_id = create_execution_plan_snapshot_id(execution_plan_snapshot)

        return self._add_snapshot(
            snapshot_id=snapshot_id,
            snapshot_obj=execution_plan_snapshot,
            snapshot_type=SnapshotType.EXECUTION_PLAN,
        )
Example #4
0
def test_create_execution_plan_snapshot():
    @solid
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    with instance_for_test() as instance:
        execution_plan = create_execution_plan(noop_pipeline)

        ep_snapshot = snapshot_from_execution_plan(
            execution_plan, noop_pipeline.get_pipeline_snapshot_id()
        )
        ep_snapshot_id = create_execution_plan_snapshot_id(ep_snapshot)

        result = execute_pipeline(noop_pipeline, instance=instance)
        assert result.success

        run = instance.get_run_by_id(result.run_id)

        assert run.execution_plan_snapshot_id == ep_snapshot_id
        assert run.execution_plan_snapshot_id == create_execution_plan_snapshot_id(ep_snapshot)
Example #5
0
    def _construct_run_with_snapshots(
        self,
        pipeline_name=None,
        run_id=None,
        environment_dict=None,
        mode=None,
        solid_subset=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        pipeline_snapshot=None,
        execution_plan_snapshot=None,
    ):

        if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags:
            if AIRFLOW_EXECUTION_DATE_STR not in tags:
                tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc(
                ).isoformat()

        pipeline_run = PipelineRun(
            pipeline_name=pipeline_name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=mode,
            solid_subset=solid_subset,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
        )

        if pipeline_snapshot is not None:
            from dagster.core.snap import create_pipeline_snapshot_id

            pipeline_snapshot_id = create_pipeline_snapshot_id(
                pipeline_snapshot)

            if not self._run_storage.has_pipeline_snapshot(
                    pipeline_snapshot_id):
                returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot(
                    pipeline_snapshot)

                check.invariant(
                    pipeline_snapshot_id == returned_pipeline_snapshot_id)

            pipeline_run = pipeline_run.with_pipeline_snapshot_id(
                pipeline_snapshot_id)

        if execution_plan_snapshot is not None:
            from dagster.core.snap import create_execution_plan_snapshot_id

            check.invariant(execution_plan_snapshot.pipeline_snapshot_id ==
                            pipeline_snapshot_id)

            execution_plan_snapshot_id = create_execution_plan_snapshot_id(
                execution_plan_snapshot)

            if not self._run_storage.has_execution_plan_snapshot(
                    execution_plan_snapshot_id):
                returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot(
                    execution_plan_snapshot)

                check.invariant(execution_plan_snapshot_id ==
                                returned_execution_plan_snapshot_id)

            pipeline_run = pipeline_run.with_execution_plan_snapshot_id(
                execution_plan_snapshot_id)

        return pipeline_run
Example #6
0
 def add_execution_plan_snapshot(self, execution_plan_snapshot):
     check.inst_param(execution_plan_snapshot, "execution_plan_snapshot", ExecutionPlanSnapshot)
     execution_plan_snapshot_id = create_execution_plan_snapshot_id(execution_plan_snapshot)
     self._ep_snapshots[execution_plan_snapshot_id] = execution_plan_snapshot
     return execution_plan_snapshot_id
Example #7
0
    def _construct_run_with_snapshots(
        self,
        pipeline_name,
        run_id,
        environment_dict,
        mode,
        solid_subset,
        step_keys_to_execute,
        status,
        tags,
        root_run_id,
        parent_run_id,
        pipeline_snapshot,
        execution_plan_snapshot,
        parent_pipeline_snapshot,
    ):

        # https://github.com/dagster-io/dagster/issues/2403
        if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags:
            if AIRFLOW_EXECUTION_DATE_STR not in tags:
                tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc(
                ).isoformat()

        pipeline_run = PipelineRun(
            pipeline_name=pipeline_name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=mode,
            solid_subset=solid_subset,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
        )

        if pipeline_snapshot is not None:
            from dagster.core.snap import create_pipeline_snapshot_id

            if pipeline_snapshot.lineage_snapshot:
                if not self._run_storage.has_pipeline_snapshot(
                        pipeline_snapshot.lineage_snapshot.parent_snapshot_id):
                    check.invariant(
                        create_pipeline_snapshot_id(
                            parent_pipeline_snapshot) ==
                        pipeline_snapshot.lineage_snapshot.parent_snapshot_id,
                        'Parent pipeline snapshot id out of sync with passed parent pipeline snapshot',
                    )

                    returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot(
                        parent_pipeline_snapshot)
                    check.invariant(
                        pipeline_snapshot.lineage_snapshot.parent_snapshot_id
                        == returned_pipeline_snapshot_id)

            pipeline_snapshot_id = create_pipeline_snapshot_id(
                pipeline_snapshot)
            if not self._run_storage.has_pipeline_snapshot(
                    pipeline_snapshot_id):
                returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot(
                    pipeline_snapshot)
                check.invariant(
                    pipeline_snapshot_id == returned_pipeline_snapshot_id)

            pipeline_run = pipeline_run.with_pipeline_snapshot_id(
                pipeline_snapshot_id)

        if execution_plan_snapshot is not None:
            from dagster.core.snap import create_execution_plan_snapshot_id

            check.invariant(execution_plan_snapshot.pipeline_snapshot_id ==
                            pipeline_snapshot_id)

            check.invariant(
                set(step_keys_to_execute) == set(
                    execution_plan_snapshot.step_keys_to_execute)
                if step_keys_to_execute else set(
                    execution_plan_snapshot.step_keys_to_execute) == set(
                        [step.key for step in execution_plan_snapshot.steps]),
                'We encode step_keys_to_execute twice in our stack, unfortunately. This check '
                'ensures that they are consistent. We check that step_keys_to_execute in the plan '
                'matches the step_keys_to_execute params if it is set. If it is not, this indicates '
                'a full execution plan, and so we verify that.',
            )

            execution_plan_snapshot_id = create_execution_plan_snapshot_id(
                execution_plan_snapshot)

            if not self._run_storage.has_execution_plan_snapshot(
                    execution_plan_snapshot_id):
                returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot(
                    execution_plan_snapshot)

                check.invariant(execution_plan_snapshot_id ==
                                returned_execution_plan_snapshot_id)

            pipeline_run = pipeline_run.with_execution_plan_snapshot_id(
                execution_plan_snapshot_id)

        return pipeline_run