def test_create_execution_plan_snapshot(): @solid def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() instance = DagsterInstance.local_temp() execution_plan = create_execution_plan(noop_pipeline) ep_snapshot = snapshot_from_execution_plan( execution_plan, noop_pipeline.get_pipeline_snapshot_id()) ep_snapshot_id = create_execution_plan_snapshot_id(ep_snapshot) result = execute_pipeline(noop_pipeline, instance=instance) assert result.success run = instance.get_run_by_id(result.run_id) assert run.execution_plan_snapshot_id == ep_snapshot_id assert run.execution_plan_snapshot_id == create_execution_plan_snapshot_id( ep_snapshot)
def add_execution_plan_snapshot(self, execution_plan_snapshot): check.inst_param(execution_plan_snapshot, 'execution_plan_snapshot', ExecutionPlanSnapshot) execution_plan_snapshot_id = create_execution_plan_snapshot_id( execution_plan_snapshot) self._ep_snapshots[ execution_plan_snapshot_id] = execution_plan_snapshot return execution_plan_snapshot_id
def add_execution_plan_snapshot(self, execution_plan_snapshot): check.inst_param(execution_plan_snapshot, 'execution_plan_snapshot', ExecutionPlanSnapshot) execution_plan_snapshot_id = create_execution_plan_snapshot_id( execution_plan_snapshot) return self._add_snapshot( snapshot_id=execution_plan_snapshot_id, snapshot_obj=execution_plan_snapshot, snapshot_type=SnapshotType.EXECUTION_PLAN, )
def _ensure_persisted_execution_plan_snapshot(self, execution_plan_snapshot, pipeline_snapshot_id, step_keys_to_execute): from dagster.core.snap.execution_plan_snapshot import ( ExecutionPlanSnapshot, create_execution_plan_snapshot_id, ) check.inst_param(execution_plan_snapshot, 'execution_plan_snapshot', ExecutionPlanSnapshot) check.str_param(pipeline_snapshot_id, 'pipeline_snapshot_id') check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) check.invariant( execution_plan_snapshot.pipeline_snapshot_id == pipeline_snapshot_id, ('Snapshot mismatch: Snapshot ID in execution plan snapshot is ' '"{ep_pipeline_snapshot_id}" and snapshot_id created in memory is ' '"{pipeline_snapshot_id}"').format( ep_pipeline_snapshot_id=execution_plan_snapshot. pipeline_snapshot_id, pipeline_snapshot_id=pipeline_snapshot_id, ), ) check.invariant( set(step_keys_to_execute) == set( execution_plan_snapshot.step_keys_to_execute) if step_keys_to_execute else set( execution_plan_snapshot.step_keys_to_execute) == set( [step.key for step in execution_plan_snapshot.steps]), 'We encode step_keys_to_execute twice in our stack, unfortunately. This check ' 'ensures that they are consistent. We check that step_keys_to_execute in the plan ' 'matches the step_keys_to_execute params if it is set. If it is not, this indicates ' 'a full execution plan, and so we verify that.', ) execution_plan_snapshot_id = create_execution_plan_snapshot_id( execution_plan_snapshot) if not self._run_storage.has_execution_plan_snapshot( execution_plan_snapshot_id): returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot( execution_plan_snapshot) check.invariant(execution_plan_snapshot_id == returned_execution_plan_snapshot_id) return execution_plan_snapshot_id
def get_or_create_run( self, pipeline_name=None, run_id=None, environment_dict=None, mode=None, selector=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=None, execution_plan_snapshot=None, ): if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags: if AIRFLOW_EXECUTION_DATE_STR not in tags: tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc( ).isoformat() pipeline_run = PipelineRun( pipeline_name=pipeline_name, run_id=run_id, environment_dict=environment_dict, mode=mode, selector=selector, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, ) if pipeline_snapshot is not None: from dagster.core.snap.pipeline_snapshot import create_pipeline_snapshot_id pipeline_snapshot_id = create_pipeline_snapshot_id( pipeline_snapshot) if not self._run_storage.has_pipeline_snapshot( pipeline_snapshot_id): returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot( pipeline_snapshot) check.invariant( pipeline_snapshot_id == returned_pipeline_snapshot_id) pipeline_run = pipeline_run.with_pipeline_snapshot_id( pipeline_snapshot_id) if execution_plan_snapshot is not None: from dagster.core.snap.execution_plan_snapshot import create_execution_plan_snapshot_id check.invariant(execution_plan_snapshot.pipeline_snapshot_id == pipeline_snapshot_id) execution_plan_snapshot_id = create_execution_plan_snapshot_id( execution_plan_snapshot) if not self._run_storage.has_execution_plan_snapshot( execution_plan_snapshot_id): returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot( execution_plan_snapshot) check.invariant(execution_plan_snapshot_id == returned_execution_plan_snapshot_id) pipeline_run = pipeline_run.with_execution_plan_snapshot_id( execution_plan_snapshot_id) if self.has_run(pipeline_run.run_id): candidate_run = self.get_run_by_id(pipeline_run.run_id) field_diff = _check_run_equality(pipeline_run, candidate_run) if field_diff: raise DagsterRunConflict( 'Found conflicting existing run with same id {run_id}. Runs differ in:' '\n{field_diff}'.format( run_id=pipeline_run.run_id, field_diff=_format_field_diff(field_diff), ), ) return candidate_run return self._run_storage.add_run(pipeline_run)