def get_test_external_repo(): return PythonEnvRepositoryLocation( RepositoryLocationHandle.create_out_of_process_location( location_name='test_location', repository_code_pointer_dict={ 'test_repository': FileCodePointer(__file__, 'test_repository'), }, )).get_repository('test_repository')
def get_bar_repo_repository_location_handle(): return RepositoryLocationHandle.create_out_of_process_location( location_name='bar_repo_location', repository_code_pointer_dict={ 'bar_repo': FileCodePointer(file_relative_path(__file__, 'api_tests_repo.py'), 'bar_repo') }, )
def for_file(cls, file, fn_name, working_directory=None, container_image=None): if not working_directory: working_directory = os.getcwd() return cls(FileCodePointer(file, fn_name, working_directory), container_image)
def execute_eagerly_on_celery(tempdir, pipeline_name, tags=None): return execute_pipeline( ReconstructablePipeline(FileCodePointer(__file__, pipeline_name)), environment_dict={ 'storage': {'filesystem': {'config': {'base_dir': tempdir}}}, 'execution': {'celery': {'config': {'config_source': {'task_always_eager': True}}}}, }, instance=DagsterInstance.local_temp(tempdir=tempdir), tags=tags, )
def test_repository_snapshot_api(): location_handle = LocationHandle( 'test', FileCodePointer(file_relative_path(__file__, 'api_tests_repo.py'), 'bar_repo'), ) external_repository = sync_get_external_repository(location_handle) assert isinstance(external_repository, ExternalRepository) assert external_repository.name == 'bar'
def test_dagster_out_of_process_location(): env = PythonEnvRepositoryLocation( RepositoryLocationHandle.create_out_of_process_location( location_name='test_location', repository_code_pointer_dict={ 'test_repo': FileCodePointer(file_relative_path(__file__, 'setup.py'), 'test_repo'), }, )) assert env.get_repository('test_repo')
def define_out_of_process_context(python_file, fn_name, instance): check.inst_param(instance, 'instance', DagsterInstance) return DagsterGraphQLContext( locations=[ PythonEnvRepositoryLocation( RepositoryLocationHandle.create_out_of_process_location( 'test_location', {fn_name: FileCodePointer(python_file, fn_name)})) ], instance=instance, )
def execute_pipeline_on_celery(tempdir, pipeline_name, tags=None): pipe = ReconstructablePipeline(FileCodePointer(__file__, pipeline_name)) instance = DagsterInstance.local_temp(tempdir=tempdir) return execute_pipeline( pipe, environment_dict={ 'storage': {'filesystem': {'config': {'base_dir': tempdir}}}, 'execution': {'celery': {}}, }, instance=instance, tags=tags, )
def test_pipeline_python_file(): python_file = file_relative_path(__file__, 'foo_pipeline.py') recon_pipeline = recon_pipeline_for_cli_args({ 'module_name': None, 'fn_name': 'define_pipeline', 'pipeline_name': None, 'python_file': python_file, 'repository_yaml': None, }) assert isinstance(recon_pipeline, ReconstructablePipeline) assert recon_pipeline.repository.pointer == FileCodePointer( python_file, 'define_pipeline')
def execute_pipeline_on_celery(pipeline_name): with seven.TemporaryDirectory() as tempdir: pipeline_def = ReconstructablePipeline(FileCodePointer(__file__, pipeline_name)) instance = DagsterInstance.local_temp(tempdir=tempdir) result = execute_pipeline( pipeline_def, environment_dict={ 'storage': {'filesystem': {'config': {'base_dir': tempdir}}}, 'execution': {'celery': {}}, }, instance=instance, ) yield result
def from_yaml(cls, file_path): check.str_param(file_path, 'file_path') config = load_yaml_from_path(file_path) repository_config = check.dict_elem(config, 'repository') module_name = check.opt_str_elem(repository_config, 'module') file_name = check.opt_str_elem(repository_config, 'file') fn_name = check.str_elem(repository_config, 'fn') if module_name: pointer = ModuleCodePointer(module_name, fn_name) else: # rebase file in config off of the path in the config file file_name = os.path.join(os.path.dirname(os.path.abspath(file_path)), file_name) pointer = FileCodePointer(file_name, fn_name) return cls(pointer=pointer, yaml_path=file_path,)
def get_origin(self): """ Hack! Inject origin that the k8s images will use. The BK image uses a different directory structure (/workdir/python_modules/dagster-test/dagster_test/test_project) than the images inside the kind cluster (/dagster_test/test_project). As a result the normal origin won't work, we need to inject this one. """ return PipelinePythonOrigin( self._pipeline_index.name, RepositoryPythonOrigin( executable_path="python", code_pointer=FileCodePointer( "/dagster_test/test_project/test_pipelines/repo.py", "define_demo_execution_repo", ), ), )
def get_python_origin(self): """ Hack! Inject origin that the docker-celery images will use. The BK image uses a different directory structure (/workdir/python_modules/dagster-test/dagster_test/test_project) than the test that creates the ReconstructablePipeline. As a result the normal origin won't work, we need to inject this one. """ return PipelinePythonOrigin( self.pipeline_name, RepositoryPythonOrigin( executable_path="python", code_pointer=FileCodePointer( "/dagster_test/test_project/test_pipelines/repo.py", "define_demo_execution_repo", ), container_image=self.repository.container_image, ), )
def get_external_origin(self): """ Hack! Inject origin that the k8s images will use. The BK image uses a different directory structure (/workdir/python_modules/dagster-test/dagster_test/test_project) than the images inside the kind cluster (/dagster_test/test_project). As a result the normal origin won't work, we need to inject this one. """ return ExternalJobOrigin( external_repository_origin=ExternalRepositoryOrigin( repository_location_origin=InProcessRepositoryLocationOrigin( recon_repo=ReconstructableRepository( pointer=FileCodePointer( python_file= "/dagster_test/test_project/test_pipelines/repo.py", fn_name="define_demo_execution_repo", ))), repository_name="demo_execution_repo", ), job_name=self.name, )
def test_reconstruct_from_origin(): origin = PipelinePythonOrigin( pipeline_name="foo_pipe", repository_origin=RepositoryPythonOrigin( executable_path="my_python", code_pointer=FileCodePointer( python_file="foo.py", fn_name="bar", working_directory="/", ), container_image="my_image", entry_point=DEFAULT_DAGSTER_ENTRY_POINT, ), ) recon_pipeline = recon_pipeline_from_origin(origin) assert recon_pipeline.pipeline_name == origin.pipeline_name assert recon_pipeline.repository.pointer == origin.repository_origin.code_pointer assert recon_pipeline.repository.container_image == origin.repository_origin.container_image assert recon_pipeline.repository.executable_path == origin.repository_origin.executable_path
def test_repository_python_file(): python_file = file_relative_path(__file__, 'bar_repo.py') recon_pipeline = recon_pipeline_for_cli_args({ 'pipeline_name': 'foo', 'python_file': python_file, 'fn_name': 'define_bar_repo' }) assert isinstance(recon_pipeline, ReconstructablePipeline) assert recon_pipeline.repository.pointer == FileCodePointer( python_file, 'define_bar_repo') assert recon_pipeline.pipeline_name == 'foo' with pytest.raises(UsageError): recon_pipeline_for_cli_args({ 'module_name': 'kdjfkd', 'pipeline_name': 'foo', 'python_file': file_relative_path(__file__, 'bar_repo.py'), 'fn_name': 'define_bar_repo', 'repository_yaml': None, }) with pytest.raises(UsageError): recon_pipeline_for_cli_args({ 'module_name': None, 'pipeline_name': 'foo', 'python_file': file_relative_path(__file__, 'bar_repo.py'), 'fn_name': 'define_bar_repo', 'repository_yaml': 'kjdfkdjf', })
def reconstructable(target): ''' Create a ReconstructablePipeline from a function that returns a PipelineDefinition or a @pipeline decorated function. ''' from dagster.core.definitions import PipelineDefinition if not seven.is_function_or_decorator_instance_of(target, PipelineDefinition): raise DagsterInvariantViolationError( 'Reconstructable target should be a function or definition produced ' 'by a decorated function, got {type}.'.format(type=type(target)), ) if seven.is_lambda(target): raise DagsterInvariantViolationError( 'Reconstructable target can not be a lambda. Use a function or ' 'decorated function defined at module scope instead.' ) if seven.qualname_differs(target): raise DagsterInvariantViolationError( 'Reconstructable target "{target.__name__}" has a different ' '__qualname__ "{target.__qualname__}" indicating it is not ' 'defined at module scope. Use a function or decorated function ' 'defined at module scope instead.'.format(target=target) ) python_file = get_python_file_from_previous_stack_frame() if python_file.endswith('<stdin>'): raise DagsterInvariantViolationError( 'reconstructable() can not reconstruct pipelines from <stdin>, unable to target file {}. '.format( python_file ) ) pointer = FileCodePointer(python_file=python_file, fn_name=target.__name__,) return bootstrap_standalone_recon_pipeline(pointer)
def for_file(cls, file, fn_name, working_directory=None): if working_directory: return cls(FileInDirectoryCodePointer(file, fn_name, working_directory)) return cls(FileCodePointer(file, fn_name))
def for_file(python_file, fn_name): return bootstrap_standalone_recon_pipeline(FileCodePointer(python_file, fn_name))
def reconstructable(target): """ Create a :py:class:`~dagster.core.definitions.reconstructable.ReconstructablePipeline` from a function that returns a :py:class:`~dagster.PipelineDefinition`/:py:class:`~dagster.JobDefinition`, or a function decorated with :py:func:`@pipeline <dagster.pipeline>`/:py:func:`@job <dagster.job>`. When your pipeline/job must cross process boundaries, e.g., for execution on multiple nodes or in different systems (like ``dagstermill``), Dagster must know how to reconstruct the pipeline/job on the other side of the process boundary. Passing a job created with ``~dagster.GraphDefinition.to_job`` to ``reconstructable()``, requires you to wrap that job's definition in a module-scoped function, and pass that function instead: .. code-block:: python from dagster import graph, reconstructable @graph def my_graph(): ... def define_my_job(): return my_graph.to_job() reconstructable(define_my_job) This function implements a very conservative strategy for reconstruction, so that its behavior is easy to predict, but as a consequence it is not able to reconstruct certain kinds of pipelines or jobs, such as those defined by lambdas, in nested scopes (e.g., dynamically within a method call), or in interactive environments such as the Python REPL or Jupyter notebooks. If you need to reconstruct objects constructed in these ways, you should use :py:func:`~dagster.core.definitions.reconstructable.build_reconstructable_pipeline` instead, which allows you to specify your own reconstruction strategy. Examples: .. code-block:: python from dagster import job, reconstructable @job def foo_job(): ... reconstructable_foo_job = reconstructable(foo_job) @graph def foo(): ... def make_bar_job(): return foo.to_job() reconstructable_bar_job = reconstructable(make_bar_job) """ from dagster.core.definitions import PipelineDefinition, JobDefinition if not seven.is_function_or_decorator_instance_of(target, PipelineDefinition): if isinstance(target, JobDefinition): raise DagsterInvariantViolationError( "Reconstructable target was not a function returning a job definition, or a job " "definition produced by a decorated function. If your job was constructed using " "``GraphDefinition.to_job``, you must wrap the ``to_job`` call in a function at " "module scope, ie not within any other functions. " "To learn more, check out the docs on ``reconstructable``: " "https://docs.dagster.io/_apidocs/execution#dagster.reconstructable" ) raise DagsterInvariantViolationError( "Reconstructable target should be a function or definition produced " "by a decorated function, got {type}.".format(type=type(target)), ) if seven.is_lambda(target): raise DagsterInvariantViolationError( "Reconstructable target can not be a lambda. Use a function or " "decorated function defined at module scope instead, or use " "build_reconstructable_target." ) if seven.qualname_differs(target): raise DagsterInvariantViolationError( 'Reconstructable target "{target.__name__}" has a different ' '__qualname__ "{target.__qualname__}" indicating it is not ' "defined at module scope. Use a function or decorated function " "defined at module scope instead, or use build_reconstructable_pipeline.".format( target=target ) ) try: if ( hasattr(target, "__module__") and hasattr(target, "__name__") and inspect.getmodule(target).__name__ != "__main__" ): return ReconstructablePipeline.for_module(target.__module__, target.__name__) except: pass python_file = get_python_file_from_target(target) if not python_file: raise DagsterInvariantViolationError( "reconstructable() can not reconstruct jobs or pipelines defined in interactive environments " "like <stdin>, IPython, or Jupyter notebooks. " "Use a pipeline defined in a module or file instead, or " "use build_reconstructable_target." ) pointer = FileCodePointer( python_file=python_file, fn_name=target.__name__, working_directory=os.getcwd() ) return bootstrap_standalone_recon_pipeline(pointer)
def reconstructable(target): """ Create a ReconstructablePipeline from a function that returns a PipelineDefinition, or a function decorated with :py:func:`@pipeline <dagster.pipeline>` When your pipeline must cross process boundaries, e.g., for execution on multiple nodes or in different systems (like dagstermill), Dagster must know how to reconstruct the pipeline on the other side of the process boundary. This function implements a very conservative strategy for reconstructing pipelines, so that its behavior is easy to predict, but as a consequence it is not able to reconstruct certain kinds of pipelines, such as those defined by lambdas, in nested scopes (e.g., dynamically within a method call), or in interactive environments such as the Python REPL or Jupyter notebooks. If you need to reconstruct pipelines constructed in these ways, you should use :py:func:`build_reconstructable_pipeline` instead, which allows you to specify your own strategy for reconstructing a pipeline. Examples: .. code-block:: python from dagster import PipelineDefinition, pipeline, reconstructable @pipeline def foo_pipeline(): ... reconstructable_foo_pipeline = reconstructable(foo_pipeline) def make_bar_pipeline(): return PipelineDefinition(...) reconstructable_bar_pipeline = reconstructable(bar_pipeline) """ from dagster.core.definitions import PipelineDefinition if not seven.is_function_or_decorator_instance_of(target, PipelineDefinition): raise DagsterInvariantViolationError( "Reconstructable target should be a function or definition produced " "by a decorated function, got {type}.".format(type=type(target)), ) if seven.is_lambda(target): raise DagsterInvariantViolationError( "Reconstructable target can not be a lambda. Use a function or " "decorated function defined at module scope instead, or use " "build_reconstructable_pipeline.") if seven.qualname_differs(target): raise DagsterInvariantViolationError( 'Reconstructable target "{target.__name__}" has a different ' '__qualname__ "{target.__qualname__}" indicating it is not ' "defined at module scope. Use a function or decorated function " "defined at module scope instead, or use build_reconstructable_pipeline." .format(target=target)) try: if (hasattr(target, "__module__") and hasattr(target, "__name__") and inspect.getmodule(target).__name__ != "__main__"): return ReconstructablePipeline.for_module(target.__module__, target.__name__) except: # pylint: disable=bare-except pass python_file = get_python_file_from_target(target) if not python_file: raise DagsterInvariantViolationError( "reconstructable() can not reconstruct pipelines defined in interactive environments " "like <stdin>, IPython, or Jupyter notebooks. " "Use a pipeline defined in a module or file instead, or " "use build_reconstructable_pipeline.") pointer = FileCodePointer(python_file=python_file, fn_name=target.__name__, working_directory=os.getcwd()) return bootstrap_standalone_recon_pipeline(pointer)
def for_file(cls, file, fn_name): return cls(FileCodePointer(file, fn_name))
def reconstructable(target): """ Create a ReconstructablePipeline from a function that returns a PipelineDefinition, or a function decorated with :py:func:`@pipeline <dagster.pipeline>` When your pipeline must cross process boundaries, e.g., for execution on multiple nodes or in different systems (like dagstermill), Dagster must know how to reconstruct the pipeline on the other side of the process boundary. This function implements a very conservative strategy for reconstructing pipelines, so that its behavior is easy to predict, but as a consequence it is not able to reconstruct certain kinds of pipelines, such as those defined by lambdas, in nested scopes (e.g., dynamically within a method call), or in interactive environments such as the Python REPL or Jupyter notebooks. If you need to reconstruct pipelines constructed in these ways, you should use :py:func:`build_reconstructable_pipeline` instead, which allows you to specify your own strategy for reconstructing a pipeline. Examples: .. code-block:: python from dagster import PipelineDefinition, pipeline, recontructable @pipeline def foo_pipeline(): ... reconstructable_foo_pipeline = reconstructable(foo_pipeline) def make_bar_pipeline(): return PipelineDefinition(...) reconstructable_bar_pipeline = reconstructable(bar_pipeline) """ from dagster.core.definitions import PipelineDefinition if not seven.is_function_or_decorator_instance_of(target, PipelineDefinition): raise DagsterInvariantViolationError( "Reconstructable target should be a function or definition produced " "by a decorated function, got {type}.".format(type=type(target)), ) if seven.is_lambda(target): raise DagsterInvariantViolationError( "Reconstructable target can not be a lambda. Use a function or " "decorated function defined at module scope instead, or use " "build_reconstructable_pipeline.") if seven.qualname_differs(target): raise DagsterInvariantViolationError( 'Reconstructable target "{target.__name__}" has a different ' '__qualname__ "{target.__qualname__}" indicating it is not ' "defined at module scope. Use a function or decorated function " "defined at module scope instead, or use build_reconstructable_pipeline." .format(target=target)) python_file = get_python_file_from_previous_stack_frame() if python_file.endswith("<stdin>"): raise DagsterInvariantViolationError( "reconstructable() can not reconstruct pipelines from <stdin>, unable to " "target file {}. Use a pipeline defined in a module or file instead, or " "use build_reconstructable_pipeline.".format(python_file)) pointer = FileCodePointer(python_file=python_file, fn_name=target.__name__, working_directory=os.getcwd()) # ipython: # Exception: Can not import module <ipython-input-3-70f55f9e97d2> from path /Users/max/Desktop/richard_brady_repro/<ipython-input-3-70f55f9e97d2>, unable to load spec. # Exception: Can not import module from path /private/var/folders/zc/zyv5jx615157j4mypwcx_kxr0000gn/T/b3edec1e-b4c5-4ea4-a4ae-24a01e566aba/, unable to load spec. return bootstrap_standalone_recon_pipeline(pointer)
def test_dagster_out_of_process_environment(): env = OutOfProcessRepositoryLocation( 'test', FileCodePointer(file_relative_path(__file__, 'setup.py'), 'define_repository'), ) assert env.get_repository('test')