def test_debug_snapshot_import(self, storage): from dagster.core.execution.api import create_execution_plan from dagster.core.snap import ( snapshot_from_execution_plan, create_execution_plan_snapshot_id, ) run_id = make_new_run_id() run_to_add = TestRunStorage.build_run(pipeline_name="pipeline_name", run_id=run_id) storage.add_run(run_to_add) pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[]) pipeline_snapshot = pipeline_def.get_pipeline_snapshot() pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot) new_pipeline_snapshot_id = f"{pipeline_snapshot_id}-new-snapshot" storage.add_snapshot(pipeline_snapshot, snapshot_id=new_pipeline_snapshot_id) assert not storage.has_snapshot(pipeline_snapshot_id) assert storage.has_snapshot(new_pipeline_snapshot_id) execution_plan = create_execution_plan(pipeline_def) ep_snapshot = snapshot_from_execution_plan(execution_plan, new_pipeline_snapshot_id) ep_snapshot_id = create_execution_plan_snapshot_id(ep_snapshot) new_ep_snapshot_id = f"{ep_snapshot_id}-new-snapshot" storage.add_snapshot(ep_snapshot, snapshot_id=new_ep_snapshot_id) assert not storage.has_snapshot(ep_snapshot_id) assert storage.has_snapshot(new_ep_snapshot_id)
def test_single_write_read_with_snapshot(self, storage): run_with_snapshot_id = "lkasjdflkjasdf" pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[]) pipeline_snapshot = pipeline_def.get_pipeline_snapshot() pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot) run_with_snapshot = PipelineRun( run_id=run_with_snapshot_id, pipeline_name=pipeline_def.name, pipeline_snapshot_id=pipeline_snapshot_id, ) assert not storage.has_pipeline_snapshot(pipeline_snapshot_id) assert storage.add_pipeline_snapshot( pipeline_snapshot) == pipeline_snapshot_id assert serialize_pp(storage.get_pipeline_snapshot( pipeline_snapshot_id)) == serialize_pp(pipeline_snapshot) storage.add_run(run_with_snapshot) assert storage.get_run_by_id(run_with_snapshot_id) == run_with_snapshot storage.wipe() assert not storage.has_pipeline_snapshot(pipeline_snapshot_id) assert not storage.has_run(run_with_snapshot_id)
def test_single_write_read_with_snapshot(self, storage): if not isinstance(storage, InMemoryRunStorage): pytest.skip() run_with_snapshot_id = 'lkasjdflkjasdf' pipeline_def = PipelineDefinition(name='some_pipeline', solid_defs=[]) pipeline_snapshot = pipeline_def.get_pipeline_snapshot() pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot) run_with_snapshot = PipelineRun.create_empty_run( run_id=run_with_snapshot_id, pipeline_name=pipeline_def.name, pipeline_snapshot_id=pipeline_snapshot_id, ) assert not storage.has_pipeline_snapshot(pipeline_snapshot_id) assert storage.add_pipeline_snapshot(pipeline_snapshot) == pipeline_snapshot_id assert storage.get_pipeline_snapshot(pipeline_snapshot_id) == pipeline_snapshot storage.add_run(run_with_snapshot) assert storage.get_run_by_id(run_with_snapshot_id) == run_with_snapshot storage.wipe() assert not storage.has_pipeline_snapshot(pipeline_snapshot_id) assert not storage.has_run(run_with_snapshot_id)
def external_pipeline_data_from_def( pipeline_def: PipelineDefinition) -> ExternalPipelineData: check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) return ExternalPipelineData( name=pipeline_def.name, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(), active_presets=sorted( list(map(external_preset_data_from_def, pipeline_def.preset_defs)), key=lambda pd: pd.name, ), is_job=isinstance(pipeline_def, JobDefinition), )
def test_add_get_snapshot(self, storage): pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[]) pipeline_snapshot = pipeline_def.get_pipeline_snapshot() pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot) assert storage.add_pipeline_snapshot(pipeline_snapshot) == pipeline_snapshot_id fetched_pipeline_snapshot = storage.get_pipeline_snapshot(pipeline_snapshot_id) assert fetched_pipeline_snapshot assert serialize_pp(fetched_pipeline_snapshot) == serialize_pp(pipeline_snapshot) assert storage.has_pipeline_snapshot(pipeline_snapshot_id) assert not storage.has_pipeline_snapshot("nope") storage.wipe() assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)
def validate_run_config( pipeline_def: PipelineDefinition, run_config: Optional[Dict[str, Any]] = None, mode: Optional[str] = None, ) -> Dict[str, Any]: """Function to validate a provided run config blob against a given pipeline and mode. If validation is successful, this function will return a dictionary representation of the validated config actually used during execution. Args: pipeline_def (PipelineDefinition): The pipeline definition to validate run config against run_config (Optional[Dict[str, Any]]): The run config to validate mode (str): The mode of the pipeline to validate against (different modes may require different config) Returns: Dict[str, Any]: A dictionary representation of the validated config. """ pipeline_def = check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) mode = check.opt_str_param(mode, "mode", default=pipeline_def.get_default_mode_name()) return ResolvedRunConfig.build(pipeline_def, run_config, mode=mode).to_dict()
def test_write_conflicting_run_id(self, storage): double_run_id = "double_run_id" pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[]) run = DagsterRun(run_id=double_run_id, pipeline_name=pipeline_def.name) assert storage.add_run(run) with pytest.raises(DagsterRunAlreadyExists): storage.add_run(run)
def test_add_get_execution_snapshot(self, storage): from dagster.core.execution.api import create_execution_plan from dagster.core.snap import snapshot_from_execution_plan pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[]) execution_plan = create_execution_plan(pipeline_def) ep_snapshot = snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id()) snapshot_id = storage.add_execution_plan_snapshot(ep_snapshot) fetched_ep_snapshot = storage.get_execution_plan_snapshot(snapshot_id) assert fetched_ep_snapshot assert serialize_pp(fetched_ep_snapshot) == serialize_pp(ep_snapshot) assert storage.has_execution_plan_snapshot(snapshot_id) assert not storage.has_execution_plan_snapshot("nope") storage.wipe() assert not storage.has_execution_plan_snapshot(snapshot_id)
def get_associated_input_def( self, pipeline_def: PipelineDefinition) -> InputDefinition: """ Returns the InputDefinition along the potential composition InputMapping chain that the config was provided at. """ if self.solid_handle: return pipeline_def.get_solid(self.solid_handle).input_def_named( self.input_name) else: return pipeline_def.graph.input_def_named(self.input_name)
def compute_version( self, step_versions: Dict[str, Optional[str]], pipeline_def: PipelineDefinition, resolved_run_config: ResolvedRunConfig, ) -> Optional[str]: solid_config = resolved_run_config.solids.get(str(self.solid_handle)) config_data = solid_config.inputs.get(self.input_name) if solid_config else None solid_def = pipeline_def.get_solid(self.solid_handle) dagster_type = solid_def.input_def_named(self.input_name).dagster_type return dagster_type.loader.compute_loaded_input_version(config_data)
def test_single_write_with_missing_snapshot(self, storage): run_with_snapshot_id = "lkasjdflkjasdf" pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[]) run_with_missing_snapshot = PipelineRun( run_id=run_with_snapshot_id, pipeline_name=pipeline_def.name, pipeline_snapshot_id="nope", ) with pytest.raises(DagsterSnapshotDoesNotExist): storage.add_run(run_with_missing_snapshot)
def do_test_single_write_with_missing_snapshot(self, storage): if not isinstance(storage, InMemoryRunStorage): pytest.skip() run_with_snapshot_id = 'lkasjdflkjasdf' pipeline_def = PipelineDefinition(name='some_pipeline', solid_defs=[]) run_with_missing_snapshot = PipelineRun.create_empty_run( run_id=run_with_snapshot_id, pipeline_name=pipeline_def.name, pipeline_snapshot_id='nope', ) with pytest.raises(DagsterSnapshotDoesNotExist): storage.add_run(run_with_missing_snapshot)
def test_fetch_by_snapshot_id(self, storage): assert storage pipeline_def_a = PipelineDefinition(name="some_pipeline", solid_defs=[]) pipeline_def_b = PipelineDefinition(name="some_other_pipeline", solid_defs=[]) pipeline_snapshot_a = pipeline_def_a.get_pipeline_snapshot() pipeline_snapshot_b = pipeline_def_b.get_pipeline_snapshot() pipeline_snapshot_a_id = create_pipeline_snapshot_id( pipeline_snapshot_a) pipeline_snapshot_b_id = create_pipeline_snapshot_id( pipeline_snapshot_b) assert storage.add_pipeline_snapshot( pipeline_snapshot_a) == pipeline_snapshot_a_id assert storage.add_pipeline_snapshot( pipeline_snapshot_b) == pipeline_snapshot_b_id one = make_new_run_id() two = make_new_run_id() storage.add_run( TestRunStorage.build_run( run_id=one, pipeline_name="some_pipeline", pipeline_snapshot_id=pipeline_snapshot_a_id, )) storage.add_run( TestRunStorage.build_run( run_id=two, pipeline_name="some_other_pipeline", pipeline_snapshot_id=pipeline_snapshot_b_id, )) assert len(storage.get_runs()) == 2 runs_a = storage.get_runs( PipelineRunsFilter(snapshot_id=pipeline_snapshot_a_id)) assert len(runs_a) == 1 assert runs_a[0].run_id == one runs_b = storage.get_runs( PipelineRunsFilter(snapshot_id=pipeline_snapshot_b_id)) assert len(runs_b) == 1 assert runs_b[0].run_id == two
def get_input_def(self, pipeline_def: PipelineDefinition) -> InputDefinition: return pipeline_def.get_solid(self.solid_handle).input_def_named( self.input_name)
def execute_in_process( node: NodeDefinition, run_config: Optional[dict] = None, resources: Optional[Dict[str, ResourceDefinition]] = None, loggers: Optional[Dict[str, LoggerDefinition]] = None, input_values: Optional[Dict[str, Any]] = None, instance: DagsterInstance = None, output_capturing_enabled: Optional[bool] = True, ) -> NodeExecutionResult: node = check.inst_param(node, "node", NodeDefinition) resources = check.opt_dict_param(resources, "resources", key_type=str, value_type=ResourceDefinition) loggers = check.opt_dict_param(loggers, "logger", key_type=str, value_type=LoggerDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) input_values = check.opt_dict_param(input_values, "input_values", key_type=str) node_defs = [node] dependencies: Dict[str, Dict[str, DependencyDefinition]] = defaultdict(dict) for input_name, input_value in input_values.items(): dependencies[node.name][input_name] = DependencyDefinition(input_name) node_defs.append(_create_value_solid(input_name, input_value)) mode_def = ModeDefinition( "created", resource_defs=merge_dicts(resources, {EPHEMERAL_IO_MANAGER_KEY: mem_io_manager}), logger_defs=loggers, ) pipeline_def = PipelineDefinition( node_defs, name=f"ephemeral_{node.name}_node_pipeline", mode_defs=[mode_def], dependencies=dependencies, ) pipeline = InMemoryPipeline(pipeline_def) execution_plan = create_execution_plan(pipeline, run_config=run_config, mode=mode_def.name) recorder: Dict[StepOutputHandle, Any] = {} with ephemeral_instance_if_missing(instance) as execute_instance: pipeline_run = execute_instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config=run_config, mode=mode_def.name, ) _execute_run_iterable = ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=pipeline_execution_iterator, execution_context_manager=PipelineExecutionContextManager( pipeline=pipeline, execution_plan=execution_plan, pipeline_run=pipeline_run, instance=execute_instance, run_config=run_config, output_capture=recorder if output_capturing_enabled else None, ), ) event_list = list(_execute_run_iterable) top_level_node_handle = SolidHandle.from_string(node.name) event_list_for_top_lvl_node = [ event for event in event_list if event.solid_handle and event.solid_handle.is_or_descends_from(top_level_node_handle) ] if isinstance(node, SolidDefinition): return InProcessSolidResult(node, SolidHandle(node.name, None), event_list_for_top_lvl_node, recorder) else: return InProcessGraphResult(node, SolidHandle(node.name, None), event_list_for_top_lvl_node, recorder)
def execute_in_process( node: NodeDefinition, run_config: Optional[dict] = None, resources: Optional[Dict[str, ResourceDefinition]] = None, loggers: Optional[Dict[str, LoggerDefinition]] = None, instance: DagsterInstance = None, ) -> ExecutionResult: node = check.inst_param(node, "node", NodeDefinition) resources = check.opt_dict_param(resources, "resources", key_type=str, value_type=ResourceDefinition) loggers = check.opt_dict_param(loggers, "logger", key_type=str, value_type=LoggerDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) node_defs = [node] mode_def = ModeDefinition( "created", resource_defs=resources, logger_defs=loggers, ) pipeline_def = PipelineDefinition( node_defs, name=f"ephemeral_{node.name}_node_pipeline", mode_defs=[mode_def], ) pipeline = InMemoryPipeline(pipeline_def) execution_plan = create_execution_plan(pipeline, run_config=run_config, mode=mode_def.name) with ephemeral_instance_if_missing(instance) as execute_instance: pipeline_run = execute_instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config=run_config, mode=mode_def.name, ) _execute_run_iterable = ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=pipeline_execution_iterator, execution_context_manager=PipelineExecutionContextManager( execution_plan=execution_plan, pipeline_run=pipeline_run, instance=execute_instance, run_config=run_config, ), ) event_list = list(_execute_run_iterable) top_level_node_handle = SolidHandle.from_string(node.name) event_list_for_top_lvl_node = [ event for event in event_list if event.solid_handle and event.solid_handle.is_or_descends_from(top_level_node_handle) ] return ExecutionResult(node, event_list_for_top_lvl_node)
def required_resource_keys(self, pipeline_def: PipelineDefinition) -> Set[str]: input_def = pipeline_def.get_solid(self.solid_handle).input_def_named( self.input_name) return {input_def.root_manager_key}
def define_foo_pipeline(): @lambda_solid def do_something(): return 1 return PipelineDefinition(name='foo', solid_defs=[do_something])
def _load_value(self, pipeline_def: PipelineDefinition): return pipeline_def.get_solid( self.solid_handle).definition.default_value_for_input( self.input_name)
def execute_in_process( node: NodeDefinition, run_config: Optional[dict] = None, resources: Optional[Dict[str, Any]] = None, loggers: Optional[Dict[str, LoggerDefinition]] = None, input_values: Optional[Dict[str, Any]] = None, instance: Optional[DagsterInstance] = None, output_capturing_enabled: bool = True, ) -> NodeExecutionResult: node = check.inst_param(node, "node", NodeDefinition) loggers = check.opt_dict_param(loggers, "logger", key_type=str, value_type=LoggerDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) input_values = check.opt_dict_param(input_values, "input_values", key_type=str) resources = check.opt_dict_param(resources, "resources", key_type=str) resource_defs = {} # Wrap instantiated resource values in a resource definition. # If an instantiated IO manager is provided, wrap it in an IO manager definition. for resource_key, resource in resources.items(): if isinstance(resource, ResourceDefinition): resource_defs[resource_key] = resource elif isinstance(resource, IOManager): resource_defs[ resource_key] = IOManagerDefinition.hardcoded_io_manager( resource) else: resource_defs[ resource_key] = ResourceDefinition.hardcoded_resource(resource) node_defs = [node] dependencies: Dict[Union[str, SolidInvocation], Dict[str, IDependencyDefinition]] = defaultdict(dict) for input_name, input_value in input_values.items(): dependencies[node.name][input_name] = DependencyDefinition(input_name) node_defs.append(_create_value_solid(input_name, input_value)) mode_def = ModeDefinition( "created", resource_defs=merge_dicts(resource_defs, {EPHEMERAL_IO_MANAGER_KEY: mem_io_manager}), logger_defs=loggers, ) pipeline_def = PipelineDefinition( node_defs, name=f"ephemeral_{node.name}_node_pipeline", mode_defs=[mode_def], dependencies=dependencies, ) return core_execute_in_process( node=node, ephemeral_pipeline=pipeline_def, run_config=run_config, instance=instance, output_capturing_enabled=output_capturing_enabled, )