Exemple #1
0
def step_context_to_step_run_ref(step_context, prior_attempts_count, package_dir=None):
    '''
    Args:
        step_context (SystemStepExecutionContext): The step context.
        prior_attempts_count (int): The number of times this time has been tried before in the same
            pipeline run.
        package_dir (Optional[str]): If set, the reconstruction file code pointer will be converted
            to be relative a module pointer relative to the package root.  This enables executing
            steps in remote setups where the package containing the pipeline resides at a different
            location on the filesystem in the remote environment than in the environment executing
            the plan process.

    Returns (StepRunRef):
        A reference to the step.
    '''
    recon_pipeline = step_context.pipeline
    if package_dir:

        if isinstance(recon_pipeline, ReconstructablePipeline) and isinstance(
            recon_pipeline.pointer, FileCodePointer
        ):
            recon_pipeline = ReconstructablePipeline(
                pointer=ModuleCodePointer(
                    _module_in_package_dir(recon_pipeline.pointer.python_file, package_dir),
                    recon_pipeline.pointer.fn_name,
                ),
                frozen_solid_subset=recon_pipeline.frozen_solid_subset,
            )
        elif isinstance(recon_pipeline, ReconstructablePipelineFromRepo) and isinstance(
            recon_pipeline.repository.pointer, FileCodePointer
        ):
            recon_pipeline = ReconstructablePipelineFromRepo(
                repository=ReconstructableRepository(
                    pointer=ModuleCodePointer(
                        _module_in_package_dir(
                            recon_pipeline.repository.pointer.python_file, package_dir
                        ),
                        recon_pipeline.repository.pointer.fn_name,
                    ),
                ),
                pipeline_name=recon_pipeline.pipeline_name,
                frozen_solid_subset=recon_pipeline.frozen_solid_subset,
            )

    return StepRunRef(
        environment_dict=step_context.environment_dict,
        pipeline_run=step_context.pipeline_run,
        run_id=step_context.pipeline_run.run_id,
        step_key=step_context.step.key,
        executor_config=step_context.executor_config,
        recon_pipeline=recon_pipeline,
        prior_attempts_count=prior_attempts_count,
    )
Exemple #2
0
 def for_module(cls,
                module,
                fn_name,
                working_directory=None,
                container_image=None):
     return cls(ModuleCodePointer(module, fn_name, working_directory),
                container_image)
Exemple #3
0
def test_yaml_file():
    recon_pipeline = recon_pipeline_for_cli_args({
        'module_name':
        None,
        'pipeline_name':
        'foobar',
        'python_file':
        None,
        'fn_name':
        None,
        'repository_yaml':
        file_relative_path(__file__, 'repository_module.yaml'),
    })
    assert isinstance(recon_pipeline, ReconstructablePipeline)

    assert recon_pipeline.repository.pointer == ModuleCodePointer(
        'dagster_examples.intro_tutorial.repos', 'define_repo')

    with pytest.raises(UsageError):
        assert recon_pipeline_for_cli_args({
            'module_name': 'kdjfdk',
            'pipeline_name': 'foobar'
        })

    with pytest.raises(UsageError):
        assert recon_pipeline_for_cli_args({
            'fn_name': 'kjdfkd',
            'pipeline_name': 'foobar'
        })

    with pytest.raises(UsageError):
        assert recon_pipeline_for_cli_args({
            'pipeline_name': 'foobar',
            'python_file': 'kjdfkdj'
        })
Exemple #4
0
def step_context_to_step_run_ref(
    step_context: StepExecutionContext,
    prior_attempts_count: int,
    package_dir: Optional[str] = None,
) -> StepRunRef:
    """
    Args:
        step_context (StepExecutionContext): The step context.
        prior_attempts_count (int): The number of times this time has been tried before in the same
            pipeline run.
        package_dir (Optional[str]): If set, the reconstruction file code pointer will be converted
            to be relative a module pointer relative to the package root.  This enables executing
            steps in remote setups where the package containing the pipeline resides at a different
            location on the filesystem in the remote environment than in the environment executing
            the plan process.

    Returns (StepRunRef):
        A reference to the step.
    """

    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.int_param(prior_attempts_count, "prior_attempts_count")

    retry_mode = step_context.retry_mode

    recon_pipeline = step_context.pipeline
    if package_dir:
        if isinstance(recon_pipeline, ReconstructablePipeline) and isinstance(
                recon_pipeline.repository.pointer, FileCodePointer):
            recon_pipeline = ReconstructablePipeline(
                repository=ReconstructableRepository(
                    pointer=ModuleCodePointer(
                        _module_in_package_dir(
                            recon_pipeline.repository.pointer.python_file,
                            package_dir),
                        recon_pipeline.repository.pointer.fn_name,
                        working_directory=os.getcwd(),
                    ),
                    container_image=recon_pipeline.repository.container_image,
                    executable_path=recon_pipeline.repository.executable_path,
                    entry_point=recon_pipeline.repository.entry_point,
                ),
                pipeline_name=recon_pipeline.pipeline_name,
                solids_to_execute=recon_pipeline.solids_to_execute,
            )

    parent_run_id = step_context.pipeline_run.parent_run_id
    parent_run = step_context.instance.get_run_by_id(
        parent_run_id) if parent_run_id else None
    return StepRunRef(
        run_config=step_context.run_config,
        pipeline_run=step_context.pipeline_run,
        run_id=step_context.pipeline_run.run_id,
        step_key=step_context.step.key,
        retry_mode=retry_mode,
        recon_pipeline=recon_pipeline,
        prior_attempts_count=prior_attempts_count,
        known_state=step_context.execution_plan.known_state,
        parent_run=parent_run,
    )
Exemple #5
0
def test_queued_pipeline_origin_check():

    code_pointer = ModuleCodePointer("fake", "fake")
    fake_pipeline_origin = ExternalPipelineOrigin(
        ExternalRepositoryOrigin(
            InProcessRepositoryLocationOrigin(
                ReconstructableRepository(code_pointer)),
            "foo_repo",
        ),
        "foo",
    )

    fake_code_origin = PipelinePythonOrigin(
        pipeline_name="foo",
        repository_origin=RepositoryPythonOrigin(
            sys.executable,
            code_pointer,
        ),
    )

    PipelineRun(
        status=PipelineRunStatus.QUEUED,
        external_pipeline_origin=fake_pipeline_origin,
        pipeline_code_origin=fake_code_origin,
    )

    with pytest.raises(check.CheckError):
        PipelineRun(status=PipelineRunStatus.QUEUED)

    with pytest.raises(check.CheckError):
        PipelineRun().with_status(PipelineRunStatus.QUEUED)
Exemple #6
0
def test_repository_module():
    recon_pipeline = recon_pipeline_for_cli_args({
        'module_name': 'dagster',
        'pipeline_name': 'foo',
        'python_file': None,
        'fn_name': 'define_bar_repo',
        'repository_yaml': None,
    })
    assert isinstance(recon_pipeline, ReconstructablePipeline)
    assert recon_pipeline.repository.pointer == ModuleCodePointer(
        'dagster', 'define_bar_repo')
    assert recon_pipeline.pipeline_name == 'foo'
Exemple #7
0
def test_pipeline_module():
    from dagster.core.errors import DagsterInvariantViolationError

    with pytest.raises(DagsterInvariantViolationError):
        # can't find it so it pukes
        recon_pipeline = recon_pipeline_for_cli_args({
            'module_name': 'dagster',
            'fn_name': 'define_pipeline',
            'pipeline_name': None,
            'python_file': None,
            'repository_yaml': None,
        })
        assert isinstance(recon_pipeline, ReconstructablePipeline)
        assert recon_pipeline.repository.pointer == ModuleCodePointer(
            'dagster', 'define_pipeline')
Exemple #8
0
def create_invalid_run(instance, **kwargs):
    create_run_for_test(
        instance,
        external_pipeline_origin=ExternalPipelineOrigin(
            ExternalRepositoryOrigin(
                InProcessRepositoryLocationOrigin(
                    ReconstructableRepository(ModuleCodePointer(
                        "fake", "fake"))),
                "foo",
            ),
            "wrong-pipeline",
        ),
        pipeline_name="wrong-pipeline",
        **kwargs,
    )
Exemple #9
0
    def from_yaml(cls, file_path):
        check.str_param(file_path, 'file_path')

        config = load_yaml_from_path(file_path)
        repository_config = check.dict_elem(config, 'repository')
        module_name = check.opt_str_elem(repository_config, 'module')
        file_name = check.opt_str_elem(repository_config, 'file')
        fn_name = check.str_elem(repository_config, 'fn')

        if module_name:
            pointer = ModuleCodePointer(module_name, fn_name)
        else:
            # rebase file in config off of the path in the config file
            file_name = os.path.join(os.path.dirname(os.path.abspath(file_path)), file_name)
            pointer = FileCodePointer(file_name, fn_name)

        return cls(pointer=pointer, yaml_path=file_path,)
Exemple #10
0
def test_queued_pipeline_origin_check():
    fake_pipeline_origin = ExternalPipelineOrigin(
        ExternalRepositoryOrigin(
            InProcessRepositoryLocationOrigin(
                ReconstructableRepository(ModuleCodePointer("fake", "fake"))),
            "foo_repo",
        ),
        "foo",
    )

    PipelineRun(status=PipelineRunStatus.QUEUED,
                external_pipeline_origin=fake_pipeline_origin)

    with pytest.raises(check.CheckError):
        PipelineRun(status=PipelineRunStatus.QUEUED)

    with pytest.raises(check.CheckError):
        PipelineRun().with_status(PipelineRunStatus.QUEUED)
Exemple #11
0
 def for_module(cls, module, fn_name):
     return cls(ModuleCodePointer(module, fn_name))
Exemple #12
0
 def for_module(module, fn_name):
     return bootstrap_standalone_recon_pipeline(ModuleCodePointer(module, fn_name))
Exemple #13
0
def build_reconstructable_target(
    reconstructor_module_name,
    reconstructor_function_name,
    reconstructable_args=None,
    reconstructable_kwargs=None,
    reconstructor_working_directory=None,
):
    """
    Create a :py:class:`dagster.core.definitions.reconstructable.ReconstructablePipeline`.

    When your pipeline must cross process boundaries, e.g., for execution on multiple nodes or
    in different systems (like ``dagstermill``), Dagster must know how to reconstruct the pipeline
    on the other side of the process boundary.

    This function allows you to use the strategy of your choice for reconstructing pipelines, so
    that you can reconstruct certain kinds of pipelines that are not supported by
    :py:func:`~dagster.reconstructable`, such as those defined by lambdas, in nested scopes (e.g.,
    dynamically within a method call), or in interactive environments such as the Python REPL or
    Jupyter notebooks.

    If you need to reconstruct pipelines constructed in these ways, use this function instead of
    :py:func:`~dagster.reconstructable`.

    Args:
        reconstructor_module_name (str): The name of the module containing the function to use to
            reconstruct the pipeline.
        reconstructor_function_name (str): The name of the function to use to reconstruct the
            pipeline.
        reconstructable_args (Tuple): Args to the function to use to reconstruct the pipeline.
            Values of the tuple must be JSON serializable.
        reconstructable_kwargs (Dict[str, Any]): Kwargs to the function to use to reconstruct the
            pipeline. Values of the dict must be JSON serializable.

    Examples:

    .. code-block:: python

        # module: mymodule

        from dagster import PipelineDefinition, pipeline, build_reconstructable_pipeline

        class PipelineFactory:
            def make_pipeline(*args, **kwargs):

                @pipeline
                def _pipeline(...):
                    ...

                return _pipeline

        def reconstruct_pipeline(*args):
            factory = PipelineFactory()
            return factory.make_pipeline(*args)

        factory = PipelineFactory()

        foo_pipeline_args = (...,...)

        foo_pipeline_kwargs = {...:...}

        foo_pipeline = factory.make_pipeline(*foo_pipeline_args, **foo_pipeline_kwargs)

        reconstructable_foo_pipeline = build_reconstructable_pipeline(
            'mymodule',
            'reconstruct_pipeline',
            foo_pipeline_args,
            foo_pipeline_kwargs,
        )
    """
    check.str_param(reconstructor_module_name, "reconstructor_module_name")
    check.str_param(reconstructor_function_name, "reconstructor_function_name")
    check.opt_str_param(
        reconstructor_working_directory, "reconstructor_working_directory", os.getcwd()
    )

    reconstructable_args = list(check.opt_tuple_param(reconstructable_args, "reconstructable_args"))
    reconstructable_kwargs = list(
        (
            [key, value]
            for key, value in check.opt_dict_param(
                reconstructable_kwargs, "reconstructable_kwargs", key_type=str
            ).items()
        )
    )

    reconstructor_pointer = ModuleCodePointer(
        reconstructor_module_name,
        reconstructor_function_name,
        working_directory=reconstructor_working_directory,
    )

    pointer = CustomPointer(reconstructor_pointer, reconstructable_args, reconstructable_kwargs)

    pipeline_def = pipeline_def_from_pointer(pointer)

    return ReconstructablePipeline(
        repository=ReconstructableRepository(pointer),  # creates ephemeral repo
        pipeline_name=pipeline_def.name,
    )
Exemple #14
0
def step_context_to_step_run_ref(step_context,
                                 prior_attempts_count,
                                 package_dir=None):
    '''
    Args:
        step_context (SystemStepExecutionContext): The step context.
        prior_attempts_count (int): The number of times this time has been tried before in the same
            pipeline run.
        package_dir (Optional[str]): If set, the reconstruction file code pointer will be converted
            to be relative a module pointer relative to the package root.  This enables executing
            steps in remote setups where the package containing the pipeline resides at a different
            location on the filesystem in the remote environment than in the environment executing
            the plan process.

    Returns (StepRunRef):
        A reference to the step.
    '''

    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)

    # This is a bit goofy right now, as the relationship between
    # step launcher and the executor is a little weird.
    #
    # As far as I can tell, the only goal of interacting with the executor here
    # is for the step launcher to inherit the retry policy. The step launcher
    # actually assumes that a retry policy exists (hence the invariants below).
    # However this isn't a formal requirement, so this can break. E.g. right
    # now using a dask executor and the step launcher would fail.
    #
    # Once migration is complete, I think we should add a retries property
    # to the Executor abc and make it optional. Then change this to handle
    # the "no retry" case
    check.param_invariant(
        hasattr(step_context.executor, 'retries'),
        'step_context',
        'Executor must have retries property',
    )
    check.param_invariant(
        isinstance(step_context.executor.retries, Retries),
        'step_context',
        'Executor retries property must be of type Retries. Object was {}'.
        format(step_context.executor),
    )
    check.int_param(prior_attempts_count, 'prior_attempts_count')

    retries = step_context.executor.retries

    recon_pipeline = step_context.pipeline
    if package_dir:
        if isinstance(recon_pipeline, ReconstructablePipeline) and isinstance(
                recon_pipeline.repository.pointer, FileCodePointer):
            recon_pipeline = ReconstructablePipeline(
                repository=ReconstructableRepository(pointer=ModuleCodePointer(
                    _module_in_package_dir(
                        recon_pipeline.repository.pointer.python_file,
                        package_dir),
                    recon_pipeline.repository.pointer.fn_name,
                ), ),
                pipeline_name=recon_pipeline.pipeline_name,
                solids_to_execute=recon_pipeline.solids_to_execute,
            )

    return StepRunRef(
        run_config=step_context.run_config,
        pipeline_run=step_context.pipeline_run,
        run_id=step_context.pipeline_run.run_id,
        step_key=step_context.step.key,
        retries=retries,
        recon_pipeline=recon_pipeline,
        prior_attempts_count=prior_attempts_count,
    )
 def for_module(cls, module, fn_name, container_image=None):
     return cls(ModuleCodePointer(module, fn_name), container_image)
Exemple #16
0
 def fake_repo_target():
     return RepositoryPythonOrigin(sys.executable,
                                   ModuleCodePointer("fake", "fake"))