Exemple #1
0
def get_test_external_repo():
    return PythonEnvRepositoryLocation(
        RepositoryLocationHandle.create_out_of_process_location(
            location_name='test_location',
            repository_code_pointer_dict={
                'test_repository': FileCodePointer(__file__,
                                                   'test_repository'),
            },
        )).get_repository('test_repository')
Exemple #2
0
def get_bar_repo_repository_location_handle():
    return RepositoryLocationHandle.create_out_of_process_location(
        location_name='bar_repo_location',
        repository_code_pointer_dict={
            'bar_repo':
            FileCodePointer(file_relative_path(__file__, 'api_tests_repo.py'),
                            'bar_repo')
        },
    )
 def for_file(cls,
              file,
              fn_name,
              working_directory=None,
              container_image=None):
     if not working_directory:
         working_directory = os.getcwd()
     return cls(FileCodePointer(file, fn_name, working_directory),
                container_image)
Exemple #4
0
def execute_eagerly_on_celery(tempdir, pipeline_name, tags=None):
    return execute_pipeline(
        ReconstructablePipeline(FileCodePointer(__file__, pipeline_name)),
        environment_dict={
            'storage': {'filesystem': {'config': {'base_dir': tempdir}}},
            'execution': {'celery': {'config': {'config_source': {'task_always_eager': True}}}},
        },
        instance=DagsterInstance.local_temp(tempdir=tempdir),
        tags=tags,
    )
Exemple #5
0
def test_repository_snapshot_api():
    location_handle = LocationHandle(
        'test',
        FileCodePointer(file_relative_path(__file__, 'api_tests_repo.py'),
                        'bar_repo'),
    )
    external_repository = sync_get_external_repository(location_handle)

    assert isinstance(external_repository, ExternalRepository)
    assert external_repository.name == 'bar'
def test_dagster_out_of_process_location():
    env = PythonEnvRepositoryLocation(
        RepositoryLocationHandle.create_out_of_process_location(
            location_name='test_location',
            repository_code_pointer_dict={
                'test_repo':
                FileCodePointer(file_relative_path(__file__, 'setup.py'),
                                'test_repo'),
            },
        ))
    assert env.get_repository('test_repo')
Exemple #7
0
def define_out_of_process_context(python_file, fn_name, instance):
    check.inst_param(instance, 'instance', DagsterInstance)

    return DagsterGraphQLContext(
        locations=[
            PythonEnvRepositoryLocation(
                RepositoryLocationHandle.create_out_of_process_location(
                    'test_location',
                    {fn_name: FileCodePointer(python_file, fn_name)}))
        ],
        instance=instance,
    )
Exemple #8
0
def execute_pipeline_on_celery(tempdir, pipeline_name, tags=None):
    pipe = ReconstructablePipeline(FileCodePointer(__file__, pipeline_name))
    instance = DagsterInstance.local_temp(tempdir=tempdir)
    return execute_pipeline(
        pipe,
        environment_dict={
            'storage': {'filesystem': {'config': {'base_dir': tempdir}}},
            'execution': {'celery': {}},
        },
        instance=instance,
        tags=tags,
    )
Exemple #9
0
def test_pipeline_python_file():
    python_file = file_relative_path(__file__, 'foo_pipeline.py')

    recon_pipeline = recon_pipeline_for_cli_args({
        'module_name': None,
        'fn_name': 'define_pipeline',
        'pipeline_name': None,
        'python_file': python_file,
        'repository_yaml': None,
    })
    assert isinstance(recon_pipeline, ReconstructablePipeline)
    assert recon_pipeline.repository.pointer == FileCodePointer(
        python_file, 'define_pipeline')
def execute_pipeline_on_celery(pipeline_name):
    with seven.TemporaryDirectory() as tempdir:
        pipeline_def = ReconstructablePipeline(FileCodePointer(__file__, pipeline_name))
        instance = DagsterInstance.local_temp(tempdir=tempdir)
        result = execute_pipeline(
            pipeline_def,
            environment_dict={
                'storage': {'filesystem': {'config': {'base_dir': tempdir}}},
                'execution': {'celery': {}},
            },
            instance=instance,
        )
        yield result
Exemple #11
0
    def from_yaml(cls, file_path):
        check.str_param(file_path, 'file_path')

        config = load_yaml_from_path(file_path)
        repository_config = check.dict_elem(config, 'repository')
        module_name = check.opt_str_elem(repository_config, 'module')
        file_name = check.opt_str_elem(repository_config, 'file')
        fn_name = check.str_elem(repository_config, 'fn')

        if module_name:
            pointer = ModuleCodePointer(module_name, fn_name)
        else:
            # rebase file in config off of the path in the config file
            file_name = os.path.join(os.path.dirname(os.path.abspath(file_path)), file_name)
            pointer = FileCodePointer(file_name, fn_name)

        return cls(pointer=pointer, yaml_path=file_path,)
    def get_origin(self):
        """
        Hack! Inject origin that the k8s images will use. The BK image uses a different directory
        structure (/workdir/python_modules/dagster-test/dagster_test/test_project) than the images
        inside the kind cluster (/dagster_test/test_project). As a result the normal origin won't
        work, we need to inject this one.
        """

        return PipelinePythonOrigin(
            self._pipeline_index.name,
            RepositoryPythonOrigin(
                executable_path="python",
                code_pointer=FileCodePointer(
                    "/dagster_test/test_project/test_pipelines/repo.py",
                    "define_demo_execution_repo",
                ),
            ),
        )
Exemple #13
0
    def get_python_origin(self):
        """
        Hack! Inject origin that the docker-celery images will use. The BK image uses a different
        directory structure (/workdir/python_modules/dagster-test/dagster_test/test_project) than
        the test that creates the ReconstructablePipeline. As a result the normal origin won't
        work, we need to inject this one.
        """

        return PipelinePythonOrigin(
            self.pipeline_name,
            RepositoryPythonOrigin(
                executable_path="python",
                code_pointer=FileCodePointer(
                    "/dagster_test/test_project/test_pipelines/repo.py",
                    "define_demo_execution_repo",
                ),
                container_image=self.repository.container_image,
            ),
        )
Exemple #14
0
    def get_external_origin(self):
        """
        Hack! Inject origin that the k8s images will use. The BK image uses a different directory
        structure (/workdir/python_modules/dagster-test/dagster_test/test_project) than the images
        inside the kind cluster (/dagster_test/test_project). As a result the normal origin won't
        work, we need to inject this one.
        """

        return ExternalJobOrigin(
            external_repository_origin=ExternalRepositoryOrigin(
                repository_location_origin=InProcessRepositoryLocationOrigin(
                    recon_repo=ReconstructableRepository(
                        pointer=FileCodePointer(
                            python_file=
                            "/dagster_test/test_project/test_pipelines/repo.py",
                            fn_name="define_demo_execution_repo",
                        ))),
                repository_name="demo_execution_repo",
            ),
            job_name=self.name,
        )
Exemple #15
0
def test_reconstruct_from_origin():
    origin = PipelinePythonOrigin(
        pipeline_name="foo_pipe",
        repository_origin=RepositoryPythonOrigin(
            executable_path="my_python",
            code_pointer=FileCodePointer(
                python_file="foo.py",
                fn_name="bar",
                working_directory="/",
            ),
            container_image="my_image",
            entry_point=DEFAULT_DAGSTER_ENTRY_POINT,
        ),
    )

    recon_pipeline = recon_pipeline_from_origin(origin)

    assert recon_pipeline.pipeline_name == origin.pipeline_name
    assert recon_pipeline.repository.pointer == origin.repository_origin.code_pointer
    assert recon_pipeline.repository.container_image == origin.repository_origin.container_image
    assert recon_pipeline.repository.executable_path == origin.repository_origin.executable_path
Exemple #16
0
def test_repository_python_file():
    python_file = file_relative_path(__file__, 'bar_repo.py')

    recon_pipeline = recon_pipeline_for_cli_args({
        'pipeline_name': 'foo',
        'python_file': python_file,
        'fn_name': 'define_bar_repo'
    })
    assert isinstance(recon_pipeline, ReconstructablePipeline)
    assert recon_pipeline.repository.pointer == FileCodePointer(
        python_file, 'define_bar_repo')
    assert recon_pipeline.pipeline_name == 'foo'

    with pytest.raises(UsageError):
        recon_pipeline_for_cli_args({
            'module_name':
            'kdjfkd',
            'pipeline_name':
            'foo',
            'python_file':
            file_relative_path(__file__, 'bar_repo.py'),
            'fn_name':
            'define_bar_repo',
            'repository_yaml':
            None,
        })

    with pytest.raises(UsageError):
        recon_pipeline_for_cli_args({
            'module_name':
            None,
            'pipeline_name':
            'foo',
            'python_file':
            file_relative_path(__file__, 'bar_repo.py'),
            'fn_name':
            'define_bar_repo',
            'repository_yaml':
            'kjdfkdjf',
        })
Exemple #17
0
def reconstructable(target):
    '''
    Create a ReconstructablePipeline from a function that returns a PipelineDefinition
    or a @pipeline decorated function.
    '''
    from dagster.core.definitions import PipelineDefinition

    if not seven.is_function_or_decorator_instance_of(target, PipelineDefinition):
        raise DagsterInvariantViolationError(
            'Reconstructable target should be a function or definition produced '
            'by a decorated function, got {type}.'.format(type=type(target)),
        )

    if seven.is_lambda(target):
        raise DagsterInvariantViolationError(
            'Reconstructable target can not be a lambda. Use a function or '
            'decorated function defined at module scope instead.'
        )

    if seven.qualname_differs(target):
        raise DagsterInvariantViolationError(
            'Reconstructable target "{target.__name__}" has a different '
            '__qualname__ "{target.__qualname__}" indicating it is not '
            'defined at module scope. Use a function or decorated function '
            'defined at module scope instead.'.format(target=target)
        )

    python_file = get_python_file_from_previous_stack_frame()
    if python_file.endswith('<stdin>'):
        raise DagsterInvariantViolationError(
            'reconstructable() can not reconstruct pipelines from <stdin>, unable to target file {}. '.format(
                python_file
            )
        )
    pointer = FileCodePointer(python_file=python_file, fn_name=target.__name__,)

    return bootstrap_standalone_recon_pipeline(pointer)
Exemple #18
0
 def for_file(cls, file, fn_name, working_directory=None):
     if working_directory:
         return cls(FileInDirectoryCodePointer(file, fn_name, working_directory))
     return cls(FileCodePointer(file, fn_name))
Exemple #19
0
 def for_file(python_file, fn_name):
     return bootstrap_standalone_recon_pipeline(FileCodePointer(python_file, fn_name))
Exemple #20
0
def reconstructable(target):
    """
    Create a :py:class:`~dagster.core.definitions.reconstructable.ReconstructablePipeline` from a
    function that returns a :py:class:`~dagster.PipelineDefinition`/:py:class:`~dagster.JobDefinition`,
    or a function decorated with :py:func:`@pipeline <dagster.pipeline>`/:py:func:`@job <dagster.job>`.

    When your pipeline/job must cross process boundaries, e.g., for execution on multiple nodes or
    in different systems (like ``dagstermill``), Dagster must know how to reconstruct the pipeline/job
    on the other side of the process boundary.

    Passing a job created with ``~dagster.GraphDefinition.to_job`` to ``reconstructable()``,
    requires you to wrap that job's definition in a module-scoped function, and pass that function
    instead:

    .. code-block:: python

        from dagster import graph, reconstructable

        @graph
        def my_graph():
            ...

        def define_my_job():
            return my_graph.to_job()

        reconstructable(define_my_job)

    This function implements a very conservative strategy for reconstruction, so that its behavior
    is easy to predict, but as a consequence it is not able to reconstruct certain kinds of pipelines
    or jobs, such as those defined by lambdas, in nested scopes (e.g., dynamically within a method
    call), or in interactive environments such as the Python REPL or Jupyter notebooks.

    If you need to reconstruct objects constructed in these ways, you should use
    :py:func:`~dagster.core.definitions.reconstructable.build_reconstructable_pipeline` instead,
    which allows you to specify your own reconstruction strategy.

    Examples:

    .. code-block:: python

        from dagster import job, reconstructable

        @job
        def foo_job():
            ...

        reconstructable_foo_job = reconstructable(foo_job)


        @graph
        def foo():
            ...

        def make_bar_job():
            return foo.to_job()

        reconstructable_bar_job = reconstructable(make_bar_job)
    """
    from dagster.core.definitions import PipelineDefinition, JobDefinition

    if not seven.is_function_or_decorator_instance_of(target, PipelineDefinition):
        if isinstance(target, JobDefinition):
            raise DagsterInvariantViolationError(
                "Reconstructable target was not a function returning a job definition, or a job "
                "definition produced by a decorated function. If your job was constructed using "
                "``GraphDefinition.to_job``, you must wrap the ``to_job`` call in a function at "
                "module scope, ie not within any other functions. "
                "To learn more, check out the docs on ``reconstructable``: "
                "https://docs.dagster.io/_apidocs/execution#dagster.reconstructable"
            )
        raise DagsterInvariantViolationError(
            "Reconstructable target should be a function or definition produced "
            "by a decorated function, got {type}.".format(type=type(target)),
        )

    if seven.is_lambda(target):
        raise DagsterInvariantViolationError(
            "Reconstructable target can not be a lambda. Use a function or "
            "decorated function defined at module scope instead, or use "
            "build_reconstructable_target."
        )

    if seven.qualname_differs(target):
        raise DagsterInvariantViolationError(
            'Reconstructable target "{target.__name__}" has a different '
            '__qualname__ "{target.__qualname__}" indicating it is not '
            "defined at module scope. Use a function or decorated function "
            "defined at module scope instead, or use build_reconstructable_pipeline.".format(
                target=target
            )
        )

    try:
        if (
            hasattr(target, "__module__")
            and hasattr(target, "__name__")
            and inspect.getmodule(target).__name__ != "__main__"
        ):
            return ReconstructablePipeline.for_module(target.__module__, target.__name__)
    except:
        pass

    python_file = get_python_file_from_target(target)
    if not python_file:
        raise DagsterInvariantViolationError(
            "reconstructable() can not reconstruct jobs or pipelines defined in interactive environments "
            "like <stdin>, IPython, or Jupyter notebooks. "
            "Use a pipeline defined in a module or file instead, or "
            "use build_reconstructable_target."
        )

    pointer = FileCodePointer(
        python_file=python_file, fn_name=target.__name__, working_directory=os.getcwd()
    )

    return bootstrap_standalone_recon_pipeline(pointer)
def reconstructable(target):
    """
    Create a ReconstructablePipeline from a function that returns a PipelineDefinition, or a
    function decorated with :py:func:`@pipeline <dagster.pipeline>`

    When your pipeline must cross process boundaries, e.g., for execution on multiple nodes or
    in different systems (like dagstermill), Dagster must know how to reconstruct the pipeline
    on the other side of the process boundary.

    This function implements a very conservative strategy for reconstructing pipelines, so that
    its behavior is easy to predict, but as a consequence it is not able to reconstruct certain
    kinds of pipelines, such as those defined by lambdas, in nested scopes (e.g., dynamically
    within a method call), or in interactive environments such as the Python REPL or Jupyter
    notebooks.

    If you need to reconstruct pipelines constructed in these ways, you should use
    :py:func:`build_reconstructable_pipeline` instead, which allows you to specify your own
    strategy for reconstructing a pipeline.

    Examples:

    .. code-block:: python

        from dagster import PipelineDefinition, pipeline, reconstructable

        @pipeline
        def foo_pipeline():
            ...

        reconstructable_foo_pipeline = reconstructable(foo_pipeline)


        def make_bar_pipeline():
            return PipelineDefinition(...)

        reconstructable_bar_pipeline = reconstructable(bar_pipeline)
    """
    from dagster.core.definitions import PipelineDefinition

    if not seven.is_function_or_decorator_instance_of(target,
                                                      PipelineDefinition):
        raise DagsterInvariantViolationError(
            "Reconstructable target should be a function or definition produced "
            "by a decorated function, got {type}.".format(type=type(target)), )

    if seven.is_lambda(target):
        raise DagsterInvariantViolationError(
            "Reconstructable target can not be a lambda. Use a function or "
            "decorated function defined at module scope instead, or use "
            "build_reconstructable_pipeline.")

    if seven.qualname_differs(target):
        raise DagsterInvariantViolationError(
            'Reconstructable target "{target.__name__}" has a different '
            '__qualname__ "{target.__qualname__}" indicating it is not '
            "defined at module scope. Use a function or decorated function "
            "defined at module scope instead, or use build_reconstructable_pipeline."
            .format(target=target))

    try:
        if (hasattr(target, "__module__") and hasattr(target, "__name__")
                and inspect.getmodule(target).__name__ != "__main__"):
            return ReconstructablePipeline.for_module(target.__module__,
                                                      target.__name__)
    except:  # pylint: disable=bare-except
        pass

    python_file = get_python_file_from_target(target)
    if not python_file:
        raise DagsterInvariantViolationError(
            "reconstructable() can not reconstruct pipelines defined in interactive environments "
            "like <stdin>, IPython, or Jupyter notebooks. "
            "Use a pipeline defined in a module or file instead, or "
            "use build_reconstructable_pipeline.")

    pointer = FileCodePointer(python_file=python_file,
                              fn_name=target.__name__,
                              working_directory=os.getcwd())

    return bootstrap_standalone_recon_pipeline(pointer)
Exemple #22
0
 def for_file(cls, file, fn_name):
     return cls(FileCodePointer(file, fn_name))
Exemple #23
0
def reconstructable(target):
    """
    Create a ReconstructablePipeline from a function that returns a PipelineDefinition, or a
    function decorated with :py:func:`@pipeline <dagster.pipeline>`

    When your pipeline must cross process boundaries, e.g., for execution on multiple nodes or
    in different systems (like dagstermill), Dagster must know how to reconstruct the pipeline
    on the other side of the process boundary.

    This function implements a very conservative strategy for reconstructing pipelines, so that
    its behavior is easy to predict, but as a consequence it is not able to reconstruct certain
    kinds of pipelines, such as those defined by lambdas, in nested scopes (e.g., dynamically
    within a method call), or in interactive environments such as the Python REPL or Jupyter
    notebooks.

    If you need to reconstruct pipelines constructed in these ways, you should use
    :py:func:`build_reconstructable_pipeline` instead, which allows you to specify your own
    strategy for reconstructing a pipeline.

    Examples:

    .. code-block:: python

        from dagster import PipelineDefinition, pipeline, recontructable

        @pipeline
        def foo_pipeline():
            ...

        reconstructable_foo_pipeline = reconstructable(foo_pipeline)


        def make_bar_pipeline():
            return PipelineDefinition(...)

        reconstructable_bar_pipeline = reconstructable(bar_pipeline)
    """
    from dagster.core.definitions import PipelineDefinition

    if not seven.is_function_or_decorator_instance_of(target,
                                                      PipelineDefinition):
        raise DagsterInvariantViolationError(
            "Reconstructable target should be a function or definition produced "
            "by a decorated function, got {type}.".format(type=type(target)), )

    if seven.is_lambda(target):
        raise DagsterInvariantViolationError(
            "Reconstructable target can not be a lambda. Use a function or "
            "decorated function defined at module scope instead, or use "
            "build_reconstructable_pipeline.")

    if seven.qualname_differs(target):
        raise DagsterInvariantViolationError(
            'Reconstructable target "{target.__name__}" has a different '
            '__qualname__ "{target.__qualname__}" indicating it is not '
            "defined at module scope. Use a function or decorated function "
            "defined at module scope instead, or use build_reconstructable_pipeline."
            .format(target=target))

    python_file = get_python_file_from_previous_stack_frame()
    if python_file.endswith("<stdin>"):
        raise DagsterInvariantViolationError(
            "reconstructable() can not reconstruct pipelines from <stdin>, unable to "
            "target file {}. Use a pipeline defined in a module or file instead, or "
            "use build_reconstructable_pipeline.".format(python_file))
    pointer = FileCodePointer(python_file=python_file,
                              fn_name=target.__name__,
                              working_directory=os.getcwd())

    # ipython:
    # Exception: Can not import module <ipython-input-3-70f55f9e97d2> from path /Users/max/Desktop/richard_brady_repro/<ipython-input-3-70f55f9e97d2>, unable to load spec.
    # Exception: Can not import module  from path /private/var/folders/zc/zyv5jx615157j4mypwcx_kxr0000gn/T/b3edec1e-b4c5-4ea4-a4ae-24a01e566aba/, unable to load spec.
    return bootstrap_standalone_recon_pipeline(pointer)
Exemple #24
0
def test_dagster_out_of_process_environment():
    env = OutOfProcessRepositoryLocation(
        'test', FileCodePointer(file_relative_path(__file__, 'setup.py'), 'define_repository'),
    )
    assert env.get_repository('test')