Exemple #1
0
def execute_execute_command(instance, kwargs):
    check.inst_param(instance, "instance", DagsterInstance)

    config = list(
        check.opt_tuple_param(kwargs.get("config"),
                              "config",
                              default=(),
                              of_type=str))
    preset = kwargs.get("preset")
    mode = kwargs.get("mode")

    if preset and config:
        raise click.UsageError("Can not use --preset with --config.")

    tags = get_tags_from_args(kwargs)

    pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs)
    pipeline = recon_pipeline_from_origin(pipeline_origin)
    solid_selection = get_solid_selection_from_args(kwargs)
    result = do_execute_command(pipeline, instance, config, mode, tags,
                                solid_selection, preset)

    if not result.success:
        raise click.ClickException(
            "Pipeline run {} resulted in failure.".format(result.run_id))

    return result
Exemple #2
0
def get_config_from_args(kwargs):
    config_files = kwargs.get("config")
    config_json = kwargs.get("config_json")

    if not config_files and not config_json:
        return {}

    if config_files and config_json:
        raise click.UsageError(
            "Cannot specify both -c / --config and --config-json")

    if config_files:
        config_file_list = list(
            check.opt_tuple_param(kwargs.get("config"),
                                  "config",
                                  default=(),
                                  of_type=str))
        return get_run_config_from_file_list(config_file_list)

    if config_json:
        try:
            return json.loads(config_json)

        except JSONDecodeError:
            raise click.UsageError(
                "Invalid JSON-string given for `--config-json`: {}\n\n{}".
                format(
                    config_json,
                    serializable_error_info_from_exc_info(
                        sys.exc_info()).to_string(),
                ))
Exemple #3
0
def test_opt_tuple_param():
    assert check.opt_tuple_param((1, 2), 'something')
    assert check.opt_tuple_param(None, 'something') is None
    assert check.opt_tuple_param(None, 'something', (2)) == (2)

    with pytest.raises(CheckError):
        assert check.tuple_param(1, 'something')

    with pytest.raises(CheckError):
        assert check.tuple_param([1], 'something')

    with pytest.raises(CheckError):
        assert check.tuple_param({1: 2}, 'something')

    with pytest.raises(CheckError):
        assert check.tuple_param('kdjfkd', 'something')
Exemple #4
0
def pipeline_launch_command(env, preset_name, mode, **kwargs):
    env = list(check.opt_tuple_param(env, 'env', default=(), of_type=str))

    pipeline = recon_pipeline_for_cli_args(kwargs)

    instance = DagsterInstance.get()
    log_repo_stats(instance=instance, pipeline=pipeline)

    if preset_name:
        if env:
            raise click.UsageError('Can not use --preset with --env.')

        if mode:
            raise click.UsageError('Can not use --preset with --mode.')

        preset = pipeline.get_preset(preset_name)
    else:
        preset = None

    run_tags = get_tags_from_args(kwargs)

    # FIXME need to check the env against environment_dict
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline.get_definition(),
        solid_subset=preset.solid_subset if preset else None,
        environment_dict=preset.environment_dict
        if preset else load_yaml_from_glob_list(env),
        mode=(preset.mode if preset else mode) or 'default',
        tags=run_tags,
    )

    return instance.launch_run(pipeline_run.run_id)
Exemple #5
0
def execute_list_versions_command(instance, kwargs):
    check.inst_param(instance, "instance", DagsterInstance)

    config = list(
        check.opt_tuple_param(kwargs.get("config"),
                              "config",
                              default=(),
                              of_type=str))
    preset = kwargs.get("preset")
    mode = kwargs.get("mode")

    if preset and config:
        raise click.UsageError("Can not use --preset with --config.")

    pipeline_origin = get_pipeline_or_job_python_origin_from_kwargs(kwargs)
    pipeline = recon_pipeline_from_origin(pipeline_origin)
    run_config = get_run_config_from_file_list(config)

    memoized_plan = create_execution_plan(
        pipeline,
        run_config=run_config,
        mode=mode,
        instance_ref=instance.get_ref(),
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    add_step_to_table(memoized_plan)
Exemple #6
0
def pipeline_launch_command(env, preset_name, mode, **kwargs):
    env = list(check.opt_tuple_param(env, 'env', default=(), of_type=str))
    pipeline = recon_pipeline_for_cli_args(kwargs)

    instance = DagsterInstance.get()
    log_repo_stats(instance=instance, pipeline=pipeline, source='pipeline_launch_command')

    if preset_name:
        if env:
            raise click.UsageError('Can not use --preset with --env.')

        if mode:
            raise click.UsageError('Can not use --preset with --mode.')

        preset = pipeline.get_preset(preset_name)
    else:
        preset = None

    run_tags = get_tags_from_args(kwargs)

    solid_selection = get_solid_selection_from_args(kwargs)

    if preset and preset.solid_selection is not None:
        check.invariant(
            solid_selection is None or solid_selection == preset.solid_selection,
            'The solid_selection set in preset \'{preset}\', {preset_subset}, does not agree with '
            'the `solid_selection` argument: {solid_selection}'.format(
                preset=preset,
                preset_subset=preset.solid_selection,
                solid_selection=solid_selection,
            ),
        )
        solid_selection = preset.solid_selection

    # generate pipeline subset from the given solid_selection
    if solid_selection:
        pipeline = pipeline.subset_for_execution(solid_selection)

    # FIXME need to check the env against environment_dict
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline.get_definition(),
        solid_selection=solid_selection,
        solids_to_execute=pipeline.solids_to_execute,
        environment_dict=preset.environment_dict if preset else load_yaml_from_glob_list(env),
        mode=(preset.mode if preset else mode) or 'default',
        tags=run_tags,
    )

    recon_repo = pipeline.get_reconstructable_repository()

    repo_location = InProcessRepositoryLocation(recon_repo)
    external_pipeline = (
        repo_location.get_repository(recon_repo.get_definition().name).get_full_external_pipeline(
            pipeline.get_definition().name
        ),
    )

    return instance.launch_run(pipeline_run.run_id, external_pipeline)
Exemple #7
0
def _logged_pipeline_launch_command(config, preset, mode, instance, kwargs):
    check.inst_param(instance, 'instance', DagsterInstance)
    env = (
        canonicalize_backcompat_args(
            (config if config else None),
            '--config',
            (kwargs.get('env') if kwargs.get('env') else None),
            '--env',
            '0.9.0',
            stacklevel=2,  # this stacklevel can point the warning to this line
        ) or tuple()  # back to default empty tuple
    )

    env = list(check.opt_tuple_param(env, 'env', default=(), of_type=str))

    repo_location = get_repository_location_from_kwargs(kwargs, instance)
    external_repo = get_external_repository_from_repo_location(
        repo_location, kwargs.get('repository'))
    external_pipeline = get_external_pipeline_from_external_repo(
        external_repo,
        kwargs.get('pipeline'),
    )

    log_external_repo_stats(
        instance=instance,
        external_pipeline=external_pipeline,
        external_repo=external_repo,
        source='pipeline_launch_command',
    )

    if preset:
        if env:
            raise click.UsageError('Can not use --preset with --config.')

        preset = external_pipeline.get_preset(preset)
    else:
        preset = None

    run_tags = get_tags_from_args(kwargs)

    solid_selection = get_solid_selection_from_args(kwargs)

    pipeline_run = _create_external_pipeline_run(
        instance=instance,
        repo_location=repo_location,
        external_repo=external_repo,
        external_pipeline=external_pipeline,
        run_config=get_run_config_from_env_file_list(env),
        mode=mode,
        preset=preset,
        tags=run_tags,
        solid_selection=solid_selection,
    )

    return instance.launch_run(pipeline_run.run_id, external_pipeline)
Exemple #8
0
    def __init__(self, *args, **kwargs):
        # original_exc_info should be gotten from a sys.exc_info() call at the
        # callsite inside of the exception handler. this will allow consuming
        # code to *re-raise* the user error in it's original format
        # for cleaner error reporting that does not have framework code in it
        user_exception = check.inst_param(kwargs.pop('user_exception'), 'user_exception', Exception)
        original_exc_info = check.opt_tuple_param(
            kwargs.pop('original_exc_info', None), 'original_exc_info'
        )
        super(DagsterUserCodeExecutionError, self).__init__(*args, **kwargs)

        self.user_exception = check.opt_inst_param(user_exception, 'user_exception', Exception)
        self.original_exc_info = original_exc_info
Exemple #9
0
def execute_list_versions_command(instance, kwargs):
    check.inst_param(instance, "instance", DagsterInstance)

    config = list(
        check.opt_tuple_param(kwargs.get("config"),
                              "config",
                              default=(),
                              of_type=str))
    preset = kwargs.get("preset")
    mode = kwargs.get("mode")

    if preset and config:
        raise click.UsageError("Can not use --preset with --config.")

    pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs)
    pipeline = recon_pipeline_from_origin(pipeline_origin)
    run_config = get_run_config_from_file_list(config)
    pipeline_def = pipeline.get_definition()
    pipeline_name = pipeline_def.name
    execution_plan = create_execution_plan(pipeline.get_definition(),
                                           run_config=run_config,
                                           mode=mode)
    step_output_versions = resolve_step_output_versions(
        execution_plan,
        environment_config=EnvironmentConfig.build(pipeline_def,
                                                   run_config=run_config,
                                                   mode=mode),
        mode_def=pipeline_def.get_mode_definition(mode),
    )
    step_output_addresses = instance.get_addresses_for_step_output_versions({
        (pipeline_name, step_output_handle): version
        for step_output_handle, version in step_output_versions.items()
        if version
    })
    table = []
    for step_output_handle, version in step_output_versions.items():
        address = step_output_addresses.get(
            (pipeline_name, step_output_handle), "None")
        table.append([
            "{key}.{output}".format(key=step_output_handle.step_key,
                                    output=step_output_handle.output_name),
            version,
            address,
        ])
    table_str = tabulate(table,
                         headers=["Step Output", "Version", "Address"],
                         tablefmt="github")
    click.echo(table_str)
Exemple #10
0
def execute_list_versions_command(instance, kwargs):
    check.inst_param(instance, "instance", DagsterInstance)

    config = list(
        check.opt_tuple_param(kwargs.get("config"),
                              "config",
                              default=(),
                              of_type=str))
    preset = kwargs.get("preset")
    mode = kwargs.get("mode")

    if preset and config:
        raise click.UsageError("Can not use --preset with --config.")

    pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs)
    pipeline = recon_pipeline_from_origin(pipeline_origin)
    run_config = get_run_config_from_file_list(config)

    environment_config = EnvironmentConfig.build(pipeline.get_definition(),
                                                 run_config,
                                                 mode=mode)
    execution_plan = ExecutionPlan.build(pipeline, environment_config)

    step_output_versions = resolve_step_output_versions(
        pipeline.get_definition(), execution_plan, environment_config)
    memoized_plan = resolve_memoized_execution_plan(execution_plan,
                                                    pipeline.get_definition(),
                                                    run_config, instance,
                                                    environment_config)
    # the step keys that we need to execute are those which do not have their inputs populated.
    step_keys_not_stored = set(memoized_plan.step_keys_to_execute)
    table = []
    for step_output_handle, version in step_output_versions.items():
        table.append([
            "{key}.{output}".format(key=step_output_handle.step_key,
                                    output=step_output_handle.output_name),
            version,
            "stored" if step_output_handle.step_key not in step_keys_not_stored
            else "to-be-recomputed",
        ])
    table_str = tabulate(
        table,
        headers=["Step Output", "Version", "Status of Output"],
        tablefmt="github")
    click.echo(table_str)
Exemple #11
0
def execute_launch_command(instance, kwargs):
    preset = kwargs.get("preset")
    mode = kwargs.get("mode")
    check.inst_param(instance, "instance", DagsterInstance)
    config = list(check.opt_tuple_param(kwargs.get("config"), "config", default=(), of_type=str))

    with get_repository_location_from_kwargs(kwargs, instance) as repo_location:
        external_repo = get_external_repository_from_repo_location(
            repo_location, kwargs.get("repository")
        )
        external_pipeline = get_external_pipeline_from_external_repo(
            external_repo, kwargs.get("pipeline"),
        )

        log_external_repo_stats(
            instance=instance,
            external_pipeline=external_pipeline,
            external_repo=external_repo,
            source="pipeline_launch_command",
        )

        if preset and config:
            raise click.UsageError("Can not use --preset with --config.")

        run_tags = get_tags_from_args(kwargs)

        solid_selection = get_solid_selection_from_args(kwargs)

        pipeline_run = _create_external_pipeline_run(
            instance=instance,
            repo_location=repo_location,
            external_repo=external_repo,
            external_pipeline=external_pipeline,
            run_config=get_run_config_from_file_list(config),
            mode=mode,
            preset=preset,
            tags=run_tags,
            solid_selection=solid_selection,
        )

        return instance.launch_run(pipeline_run.run_id, external_pipeline)
Exemple #12
0
def execute_list_versions_command(instance, kwargs):
    check.inst_param(instance, "instance", DagsterInstance)

    config = list(
        check.opt_tuple_param(kwargs.get("config"),
                              "config",
                              default=(),
                              of_type=str))

    job_origin = get_pipeline_or_job_python_origin_from_kwargs(kwargs, True)
    job = recon_pipeline_from_origin(job_origin)
    run_config = get_run_config_from_file_list(config)

    memoized_plan = create_execution_plan(
        job,
        run_config=run_config,
        mode="default",
        instance_ref=instance.get_ref(),
        tags={MEMOIZED_RUN_TAG: "true"},
    )

    add_step_to_table(memoized_plan)
Exemple #13
0
def test_opt_tuple_param():
    assert check.opt_tuple_param((1, 2), "something")
    assert check.opt_tuple_param(None, "something") is None
    assert check.opt_tuple_param(None, "something", (2)) == (2)

    with pytest.raises(CheckError):
        check.opt_tuple_param(1, "something")

    with pytest.raises(CheckError):
        check.opt_tuple_param([1], "something")

    with pytest.raises(CheckError):
        check.opt_tuple_param({1: 2}, "something")

    with pytest.raises(CheckError):
        check.opt_tuple_param("kdjfkd", "something")

    assert check.opt_tuple_param((3, 4), "something", of_type=int)
    assert check.opt_tuple_param(("foo", "bar"), "something", of_type=str)

    assert check.opt_tuple_param((3, 4), "something", of_type=(int, int))
    assert check.opt_tuple_param((3, 4), "something", of_type=(int, int))
    assert check.opt_tuple_param((3, "bar"), "something", of_type=(int, str))

    with pytest.raises(CheckError):
        check.opt_tuple_param((3, 4, 5), "something", of_type=(int, int))

    with pytest.raises(CheckError):
        check.opt_tuple_param((3, 4), "something", of_type=(int, int, int))

    with pytest.raises(CheckError):
        check.opt_tuple_param((3, 4), "something", of_type=(int, str))

    with pytest.raises(CheckError):
        check.opt_tuple_param((3, 4), "something", of_type=(str, str))
Exemple #14
0
def build_reconstructable_target(
    reconstructor_module_name,
    reconstructor_function_name,
    reconstructable_args=None,
    reconstructable_kwargs=None,
    reconstructor_working_directory=None,
):
    """
    Create a :py:class:`dagster.core.definitions.reconstructable.ReconstructablePipeline`.

    When your pipeline must cross process boundaries, e.g., for execution on multiple nodes or
    in different systems (like ``dagstermill``), Dagster must know how to reconstruct the pipeline
    on the other side of the process boundary.

    This function allows you to use the strategy of your choice for reconstructing pipelines, so
    that you can reconstruct certain kinds of pipelines that are not supported by
    :py:func:`~dagster.reconstructable`, such as those defined by lambdas, in nested scopes (e.g.,
    dynamically within a method call), or in interactive environments such as the Python REPL or
    Jupyter notebooks.

    If you need to reconstruct pipelines constructed in these ways, use this function instead of
    :py:func:`~dagster.reconstructable`.

    Args:
        reconstructor_module_name (str): The name of the module containing the function to use to
            reconstruct the pipeline.
        reconstructor_function_name (str): The name of the function to use to reconstruct the
            pipeline.
        reconstructable_args (Tuple): Args to the function to use to reconstruct the pipeline.
            Values of the tuple must be JSON serializable.
        reconstructable_kwargs (Dict[str, Any]): Kwargs to the function to use to reconstruct the
            pipeline. Values of the dict must be JSON serializable.

    Examples:

    .. code-block:: python

        # module: mymodule

        from dagster import PipelineDefinition, pipeline, build_reconstructable_pipeline

        class PipelineFactory:
            def make_pipeline(*args, **kwargs):

                @pipeline
                def _pipeline(...):
                    ...

                return _pipeline

        def reconstruct_pipeline(*args):
            factory = PipelineFactory()
            return factory.make_pipeline(*args)

        factory = PipelineFactory()

        foo_pipeline_args = (...,...)

        foo_pipeline_kwargs = {...:...}

        foo_pipeline = factory.make_pipeline(*foo_pipeline_args, **foo_pipeline_kwargs)

        reconstructable_foo_pipeline = build_reconstructable_pipeline(
            'mymodule',
            'reconstruct_pipeline',
            foo_pipeline_args,
            foo_pipeline_kwargs,
        )
    """
    check.str_param(reconstructor_module_name, "reconstructor_module_name")
    check.str_param(reconstructor_function_name, "reconstructor_function_name")
    check.opt_str_param(
        reconstructor_working_directory, "reconstructor_working_directory", os.getcwd()
    )

    reconstructable_args = list(check.opt_tuple_param(reconstructable_args, "reconstructable_args"))
    reconstructable_kwargs = list(
        (
            [key, value]
            for key, value in check.opt_dict_param(
                reconstructable_kwargs, "reconstructable_kwargs", key_type=str
            ).items()
        )
    )

    reconstructor_pointer = ModuleCodePointer(
        reconstructor_module_name,
        reconstructor_function_name,
        working_directory=reconstructor_working_directory,
    )

    pointer = CustomPointer(reconstructor_pointer, reconstructable_args, reconstructable_kwargs)

    pipeline_def = pipeline_def_from_pointer(pointer)

    return ReconstructablePipeline(
        repository=ReconstructableRepository(pointer),  # creates ephemeral repo
        pipeline_name=pipeline_def.name,
    )
Exemple #15
0
def _logged_pipeline_launch_command(config, preset_name, mode, instance, kwargs):
    check.inst_param(instance, 'instance', DagsterInstance)
    env = (
        canonicalize_backcompat_args(
            (config if config else None),
            '--config',
            (kwargs.get('env') if kwargs.get('env') else None),
            '--env',
            '0.9.0',
            stacklevel=2,  # this stacklevel can point the warning to this line
        )
        or tuple()  # back to default empty tuple
    )

    env = list(check.opt_tuple_param(env, 'env', default=(), of_type=str))

    instance = DagsterInstance.get()
    external_pipeline = get_external_pipeline_from_kwargs(kwargs, instance)
    # We should move this to use external pipeline
    # https://github.com/dagster-io/dagster/issues/2556
    pipeline = recon_pipeline_from_origin(external_pipeline.get_origin())

    log_repo_stats(instance=instance, pipeline=pipeline, source='pipeline_launch_command')

    if preset_name:
        if env:
            raise click.UsageError('Can not use --preset with --config.')

        if mode:
            raise click.UsageError('Can not use --preset with --mode.')

        preset = pipeline.get_definition().get_preset(preset_name)
    else:
        preset = None

    run_tags = get_tags_from_args(kwargs)

    solid_selection = get_solid_selection_from_args(kwargs)

    if preset and preset.solid_selection is not None:
        check.invariant(
            solid_selection is None or solid_selection == preset.solid_selection,
            'The solid_selection set in preset \'{preset}\', {preset_subset}, does not agree with '
            'the `solid_selection` argument: {solid_selection}'.format(
                preset=preset,
                preset_subset=preset.solid_selection,
                solid_selection=solid_selection,
            ),
        )
        solid_selection = preset.solid_selection

    # generate pipeline subset from the given solid_selection
    if solid_selection:
        pipeline = pipeline.subset_for_execution(solid_selection)

    # FIXME need to check the env against run_config
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline.get_definition(),
        solid_selection=solid_selection,
        solids_to_execute=pipeline.solids_to_execute,
        run_config=preset.run_config if preset else load_yaml_from_glob_list(env),
        mode=(preset.mode if preset else mode) or 'default',
        tags=run_tags,
    )

    recon_repo = pipeline.get_reconstructable_repository()

    repo_location = InProcessRepositoryLocation(recon_repo)
    external_pipeline = (
        repo_location.get_repository(recon_repo.get_definition().name).get_full_external_pipeline(
            pipeline.get_definition().name
        ),
    )

    return instance.launch_run(pipeline_run.run_id, external_pipeline)
Exemple #16
0
def test_opt_tuple_param():
    assert check.opt_tuple_param((1, 2), 'something')
    assert check.opt_tuple_param(None, 'something') is None
    assert check.opt_tuple_param(None, 'something', (2)) == (2)

    with pytest.raises(CheckError):
        check.opt_tuple_param(1, 'something')

    with pytest.raises(CheckError):
        check.opt_tuple_param([1], 'something')

    with pytest.raises(CheckError):
        check.opt_tuple_param({1: 2}, 'something')

    with pytest.raises(CheckError):
        check.opt_tuple_param('kdjfkd', 'something')

    assert check.opt_tuple_param((3, 4), 'something', of_type=int)
    assert check.opt_tuple_param(('foo', 'bar'), 'something', of_type=str)

    assert check.opt_tuple_param((3, 4), 'something', of_type=(int, int))
    assert check.opt_tuple_param((3, 4), 'something', of_type=(int, int))
    assert check.opt_tuple_param((3, 'bar'), 'something', of_type=(int, str))

    with pytest.raises(CheckError):
        check.opt_tuple_param((3, 4, 5), 'something', of_type=(int, int))

    with pytest.raises(CheckError):
        check.opt_tuple_param((3, 4), 'something', of_type=(int, int, int))

    with pytest.raises(CheckError):
        check.opt_tuple_param((3, 4), 'something', of_type=(int, str))

    with pytest.raises(CheckError):
        check.opt_tuple_param((3, 4), 'something', of_type=(str, str))