def step_context_to_step_run_ref(step_context, prior_attempts_count, package_dir=None): ''' Args: step_context (SystemStepExecutionContext): The step context. prior_attempts_count (int): The number of times this time has been tried before in the same pipeline run. package_dir (Optional[str]): If set, the reconstruction file code pointer will be converted to be relative a module pointer relative to the package root. This enables executing steps in remote setups where the package containing the pipeline resides at a different location on the filesystem in the remote environment than in the environment executing the plan process. Returns (StepRunRef): A reference to the step. ''' recon_pipeline = step_context.pipeline if package_dir: if isinstance(recon_pipeline, ReconstructablePipeline) and isinstance( recon_pipeline.pointer, FileCodePointer ): recon_pipeline = ReconstructablePipeline( pointer=ModuleCodePointer( _module_in_package_dir(recon_pipeline.pointer.python_file, package_dir), recon_pipeline.pointer.fn_name, ), frozen_solid_subset=recon_pipeline.frozen_solid_subset, ) elif isinstance(recon_pipeline, ReconstructablePipelineFromRepo) and isinstance( recon_pipeline.repository.pointer, FileCodePointer ): recon_pipeline = ReconstructablePipelineFromRepo( repository=ReconstructableRepository( pointer=ModuleCodePointer( _module_in_package_dir( recon_pipeline.repository.pointer.python_file, package_dir ), recon_pipeline.repository.pointer.fn_name, ), ), pipeline_name=recon_pipeline.pipeline_name, frozen_solid_subset=recon_pipeline.frozen_solid_subset, ) return StepRunRef( environment_dict=step_context.environment_dict, pipeline_run=step_context.pipeline_run, run_id=step_context.pipeline_run.run_id, step_key=step_context.step.key, executor_config=step_context.executor_config, recon_pipeline=recon_pipeline, prior_attempts_count=prior_attempts_count, )
def for_module(cls, module, fn_name, working_directory=None, container_image=None): return cls(ModuleCodePointer(module, fn_name, working_directory), container_image)
def test_yaml_file(): recon_pipeline = recon_pipeline_for_cli_args({ 'module_name': None, 'pipeline_name': 'foobar', 'python_file': None, 'fn_name': None, 'repository_yaml': file_relative_path(__file__, 'repository_module.yaml'), }) assert isinstance(recon_pipeline, ReconstructablePipeline) assert recon_pipeline.repository.pointer == ModuleCodePointer( 'dagster_examples.intro_tutorial.repos', 'define_repo') with pytest.raises(UsageError): assert recon_pipeline_for_cli_args({ 'module_name': 'kdjfdk', 'pipeline_name': 'foobar' }) with pytest.raises(UsageError): assert recon_pipeline_for_cli_args({ 'fn_name': 'kjdfkd', 'pipeline_name': 'foobar' }) with pytest.raises(UsageError): assert recon_pipeline_for_cli_args({ 'pipeline_name': 'foobar', 'python_file': 'kjdfkdj' })
def step_context_to_step_run_ref( step_context: StepExecutionContext, prior_attempts_count: int, package_dir: Optional[str] = None, ) -> StepRunRef: """ Args: step_context (StepExecutionContext): The step context. prior_attempts_count (int): The number of times this time has been tried before in the same pipeline run. package_dir (Optional[str]): If set, the reconstruction file code pointer will be converted to be relative a module pointer relative to the package root. This enables executing steps in remote setups where the package containing the pipeline resides at a different location on the filesystem in the remote environment than in the environment executing the plan process. Returns (StepRunRef): A reference to the step. """ check.inst_param(step_context, "step_context", StepExecutionContext) check.int_param(prior_attempts_count, "prior_attempts_count") retry_mode = step_context.retry_mode recon_pipeline = step_context.pipeline if package_dir: if isinstance(recon_pipeline, ReconstructablePipeline) and isinstance( recon_pipeline.repository.pointer, FileCodePointer): recon_pipeline = ReconstructablePipeline( repository=ReconstructableRepository( pointer=ModuleCodePointer( _module_in_package_dir( recon_pipeline.repository.pointer.python_file, package_dir), recon_pipeline.repository.pointer.fn_name, working_directory=os.getcwd(), ), container_image=recon_pipeline.repository.container_image, executable_path=recon_pipeline.repository.executable_path, entry_point=recon_pipeline.repository.entry_point, ), pipeline_name=recon_pipeline.pipeline_name, solids_to_execute=recon_pipeline.solids_to_execute, ) parent_run_id = step_context.pipeline_run.parent_run_id parent_run = step_context.instance.get_run_by_id( parent_run_id) if parent_run_id else None return StepRunRef( run_config=step_context.run_config, pipeline_run=step_context.pipeline_run, run_id=step_context.pipeline_run.run_id, step_key=step_context.step.key, retry_mode=retry_mode, recon_pipeline=recon_pipeline, prior_attempts_count=prior_attempts_count, known_state=step_context.execution_plan.known_state, parent_run=parent_run, )
def test_queued_pipeline_origin_check(): code_pointer = ModuleCodePointer("fake", "fake") fake_pipeline_origin = ExternalPipelineOrigin( ExternalRepositoryOrigin( InProcessRepositoryLocationOrigin( ReconstructableRepository(code_pointer)), "foo_repo", ), "foo", ) fake_code_origin = PipelinePythonOrigin( pipeline_name="foo", repository_origin=RepositoryPythonOrigin( sys.executable, code_pointer, ), ) PipelineRun( status=PipelineRunStatus.QUEUED, external_pipeline_origin=fake_pipeline_origin, pipeline_code_origin=fake_code_origin, ) with pytest.raises(check.CheckError): PipelineRun(status=PipelineRunStatus.QUEUED) with pytest.raises(check.CheckError): PipelineRun().with_status(PipelineRunStatus.QUEUED)
def test_repository_module(): recon_pipeline = recon_pipeline_for_cli_args({ 'module_name': 'dagster', 'pipeline_name': 'foo', 'python_file': None, 'fn_name': 'define_bar_repo', 'repository_yaml': None, }) assert isinstance(recon_pipeline, ReconstructablePipeline) assert recon_pipeline.repository.pointer == ModuleCodePointer( 'dagster', 'define_bar_repo') assert recon_pipeline.pipeline_name == 'foo'
def test_pipeline_module(): from dagster.core.errors import DagsterInvariantViolationError with pytest.raises(DagsterInvariantViolationError): # can't find it so it pukes recon_pipeline = recon_pipeline_for_cli_args({ 'module_name': 'dagster', 'fn_name': 'define_pipeline', 'pipeline_name': None, 'python_file': None, 'repository_yaml': None, }) assert isinstance(recon_pipeline, ReconstructablePipeline) assert recon_pipeline.repository.pointer == ModuleCodePointer( 'dagster', 'define_pipeline')
def create_invalid_run(instance, **kwargs): create_run_for_test( instance, external_pipeline_origin=ExternalPipelineOrigin( ExternalRepositoryOrigin( InProcessRepositoryLocationOrigin( ReconstructableRepository(ModuleCodePointer( "fake", "fake"))), "foo", ), "wrong-pipeline", ), pipeline_name="wrong-pipeline", **kwargs, )
def from_yaml(cls, file_path): check.str_param(file_path, 'file_path') config = load_yaml_from_path(file_path) repository_config = check.dict_elem(config, 'repository') module_name = check.opt_str_elem(repository_config, 'module') file_name = check.opt_str_elem(repository_config, 'file') fn_name = check.str_elem(repository_config, 'fn') if module_name: pointer = ModuleCodePointer(module_name, fn_name) else: # rebase file in config off of the path in the config file file_name = os.path.join(os.path.dirname(os.path.abspath(file_path)), file_name) pointer = FileCodePointer(file_name, fn_name) return cls(pointer=pointer, yaml_path=file_path,)
def test_queued_pipeline_origin_check(): fake_pipeline_origin = ExternalPipelineOrigin( ExternalRepositoryOrigin( InProcessRepositoryLocationOrigin( ReconstructableRepository(ModuleCodePointer("fake", "fake"))), "foo_repo", ), "foo", ) PipelineRun(status=PipelineRunStatus.QUEUED, external_pipeline_origin=fake_pipeline_origin) with pytest.raises(check.CheckError): PipelineRun(status=PipelineRunStatus.QUEUED) with pytest.raises(check.CheckError): PipelineRun().with_status(PipelineRunStatus.QUEUED)
def for_module(cls, module, fn_name): return cls(ModuleCodePointer(module, fn_name))
def for_module(module, fn_name): return bootstrap_standalone_recon_pipeline(ModuleCodePointer(module, fn_name))
def build_reconstructable_target( reconstructor_module_name, reconstructor_function_name, reconstructable_args=None, reconstructable_kwargs=None, reconstructor_working_directory=None, ): """ Create a :py:class:`dagster.core.definitions.reconstructable.ReconstructablePipeline`. When your pipeline must cross process boundaries, e.g., for execution on multiple nodes or in different systems (like ``dagstermill``), Dagster must know how to reconstruct the pipeline on the other side of the process boundary. This function allows you to use the strategy of your choice for reconstructing pipelines, so that you can reconstruct certain kinds of pipelines that are not supported by :py:func:`~dagster.reconstructable`, such as those defined by lambdas, in nested scopes (e.g., dynamically within a method call), or in interactive environments such as the Python REPL or Jupyter notebooks. If you need to reconstruct pipelines constructed in these ways, use this function instead of :py:func:`~dagster.reconstructable`. Args: reconstructor_module_name (str): The name of the module containing the function to use to reconstruct the pipeline. reconstructor_function_name (str): The name of the function to use to reconstruct the pipeline. reconstructable_args (Tuple): Args to the function to use to reconstruct the pipeline. Values of the tuple must be JSON serializable. reconstructable_kwargs (Dict[str, Any]): Kwargs to the function to use to reconstruct the pipeline. Values of the dict must be JSON serializable. Examples: .. code-block:: python # module: mymodule from dagster import PipelineDefinition, pipeline, build_reconstructable_pipeline class PipelineFactory: def make_pipeline(*args, **kwargs): @pipeline def _pipeline(...): ... return _pipeline def reconstruct_pipeline(*args): factory = PipelineFactory() return factory.make_pipeline(*args) factory = PipelineFactory() foo_pipeline_args = (...,...) foo_pipeline_kwargs = {...:...} foo_pipeline = factory.make_pipeline(*foo_pipeline_args, **foo_pipeline_kwargs) reconstructable_foo_pipeline = build_reconstructable_pipeline( 'mymodule', 'reconstruct_pipeline', foo_pipeline_args, foo_pipeline_kwargs, ) """ check.str_param(reconstructor_module_name, "reconstructor_module_name") check.str_param(reconstructor_function_name, "reconstructor_function_name") check.opt_str_param( reconstructor_working_directory, "reconstructor_working_directory", os.getcwd() ) reconstructable_args = list(check.opt_tuple_param(reconstructable_args, "reconstructable_args")) reconstructable_kwargs = list( ( [key, value] for key, value in check.opt_dict_param( reconstructable_kwargs, "reconstructable_kwargs", key_type=str ).items() ) ) reconstructor_pointer = ModuleCodePointer( reconstructor_module_name, reconstructor_function_name, working_directory=reconstructor_working_directory, ) pointer = CustomPointer(reconstructor_pointer, reconstructable_args, reconstructable_kwargs) pipeline_def = pipeline_def_from_pointer(pointer) return ReconstructablePipeline( repository=ReconstructableRepository(pointer), # creates ephemeral repo pipeline_name=pipeline_def.name, )
def step_context_to_step_run_ref(step_context, prior_attempts_count, package_dir=None): ''' Args: step_context (SystemStepExecutionContext): The step context. prior_attempts_count (int): The number of times this time has been tried before in the same pipeline run. package_dir (Optional[str]): If set, the reconstruction file code pointer will be converted to be relative a module pointer relative to the package root. This enables executing steps in remote setups where the package containing the pipeline resides at a different location on the filesystem in the remote environment than in the environment executing the plan process. Returns (StepRunRef): A reference to the step. ''' check.inst_param(step_context, 'step_context', SystemStepExecutionContext) # This is a bit goofy right now, as the relationship between # step launcher and the executor is a little weird. # # As far as I can tell, the only goal of interacting with the executor here # is for the step launcher to inherit the retry policy. The step launcher # actually assumes that a retry policy exists (hence the invariants below). # However this isn't a formal requirement, so this can break. E.g. right # now using a dask executor and the step launcher would fail. # # Once migration is complete, I think we should add a retries property # to the Executor abc and make it optional. Then change this to handle # the "no retry" case check.param_invariant( hasattr(step_context.executor, 'retries'), 'step_context', 'Executor must have retries property', ) check.param_invariant( isinstance(step_context.executor.retries, Retries), 'step_context', 'Executor retries property must be of type Retries. Object was {}'. format(step_context.executor), ) check.int_param(prior_attempts_count, 'prior_attempts_count') retries = step_context.executor.retries recon_pipeline = step_context.pipeline if package_dir: if isinstance(recon_pipeline, ReconstructablePipeline) and isinstance( recon_pipeline.repository.pointer, FileCodePointer): recon_pipeline = ReconstructablePipeline( repository=ReconstructableRepository(pointer=ModuleCodePointer( _module_in_package_dir( recon_pipeline.repository.pointer.python_file, package_dir), recon_pipeline.repository.pointer.fn_name, ), ), pipeline_name=recon_pipeline.pipeline_name, solids_to_execute=recon_pipeline.solids_to_execute, ) return StepRunRef( run_config=step_context.run_config, pipeline_run=step_context.pipeline_run, run_id=step_context.pipeline_run.run_id, step_key=step_context.step.key, retries=retries, recon_pipeline=recon_pipeline, prior_attempts_count=prior_attempts_count, )
def for_module(cls, module, fn_name, container_image=None): return cls(ModuleCodePointer(module, fn_name), container_image)
def fake_repo_target(): return RepositoryPythonOrigin(sys.executable, ModuleCodePointer("fake", "fake"))