예제 #1
0
def configurable_class_data(config_field):
    return ConfigurableClassData(
        check.str_elem(config_field, "module"),
        check.str_elem(config_field, "class"),
        yaml.dump(check.opt_dict_elem(config_field, "config"),
                  default_flow_style=False),
    )
예제 #2
0
파일: types.py 프로젝트: zuik/dagster
    def from_dict(cls, d: Dict[str, Any]) -> "DbtCliOutput":
        """Constructs an instance of :class:`DbtCliOutput <dagster_dbt.DbtCliOutput>` from a
        dictionary.

        Args:
            d (Dict[str, Any]): A dictionary with key-values to construct a :class:`DbtCliOutput
                <dagster_dbt.DbtCliOutput>`.

        Returns:
            DbtCliOutput: An instance of :class:`DbtCliOutput <dagster_dbt.DbtCliOutput>`.
        """
        return_code = check.int_elem(d, "return_code")
        raw_output = check.str_elem(d, "raw_output")
        num_pass = check.opt_int_elem(d, "num_pass")
        num_warn = check.opt_int_elem(d, "num_warn")
        num_error = check.opt_int_elem(d, "num_error")
        num_skip = check.opt_int_elem(d, "num_skip")
        num_total = check.opt_int_elem(d, "num_total")
        command = check.str_elem(d, "command")

        return cls(
            result=DbtResult.from_dict(d),
            return_code=return_code,
            raw_output=raw_output,
            num_pass=num_pass,
            num_warn=num_warn,
            num_error=num_error,
            num_skip=num_skip,
            num_total=num_total,
            command=command,
        )
예제 #3
0
def test_string_elem():
    ddict = {"a_str": "a", "a_num": 1, "a_none": None}

    assert check.str_elem(ddict, "a_str") == "a"

    with pytest.raises(ElementCheckError):
        assert check.str_elem(ddict, "a_none")

    with pytest.raises(ElementCheckError):
        check.str_elem(ddict, "a_num")
예제 #4
0
def test_string_elem():
    ddict = {'a_str': 'a', 'a_num': 1, 'a_none': None}

    assert check.str_elem(ddict, 'a_str') == 'a'

    with pytest.raises(ElementCheckError):
        assert check.str_elem(ddict, 'a_none')

    with pytest.raises(ElementCheckError):
        check.str_elem(ddict, 'a_num')
예제 #5
0
    def from_dict(cls, d: Dict[str, Any]) -> "DbtResult":
        """Constructs an instance of :class:`DbtResult <dagster_dbt.DbtResult>` from a dictionary.

        Args:
            d (Dict[str, Any]): A dictionary with key-values to construct a :class:`DbtResult
                <dagster_dbt.DbtResult>`.

        Returns:
            DbtResult: An instance of :class:`DbtResult <dagster_dbt.DbtResult>`.
        """
        check.list_elem(d, "logs")
        logs = check.is_list(d["logs"], of_type=Dict)
        check.list_elem(d, "results")
        results = [
            NodeResult.from_dict(d)
            for d in check.is_list(d["results"], of_type=Dict)
        ]
        generated_at = check.str_elem(d, "generated_at")
        elapsed_time = check.float_elem(d, "elapsed_time")

        return cls(
            logs=logs,
            results=results,
            generated_at=generated_at,
            elapsed_time=elapsed_time,
        )
예제 #6
0
    def from_yaml(cls, file_path):
        check.str_param(file_path, 'file_path')

        config = load_yaml_from_path(file_path)
        repository_config = check.dict_elem(config, 'repository')
        module_name = check.opt_str_elem(repository_config, 'module')
        file_name = check.opt_str_elem(repository_config, 'file')
        fn_name = check.str_elem(repository_config, 'fn')

        scheduler_pointer, partitions_pointer = _handle_backcompat_pointers(
            config, file_path)

        if module_name:
            pointer = ModuleCodePointer(module_name, fn_name)
        else:
            # rebase file in config off of the path in the config file
            file_name = os.path.join(
                os.path.dirname(os.path.abspath(file_path)), file_name)
            pointer = FileCodePointer(file_name, fn_name)

        return cls(
            pointer=pointer,
            yaml_path=file_path,
            scheduler_pointer=scheduler_pointer,
            partitions_pointer=partitions_pointer,
        )
 def _materialization_fn(info, inputs):
     sql_expr = inputs['expr']
     check.inst(sql_expr, DagsterSqlExpression)
     output_table_name = check.str_elem(info.config, 'table_name')
     total_sql = '''CREATE TABLE {output_table_name} AS {query_text}'''.format(
         output_table_name=output_table_name, query_text=sql_expr.query_text
     )
     info.context.resources.sa.engine.connect().execute(total_sql)
예제 #8
0
파일: types.py 프로젝트: zuik/dagster
    def from_dict(cls, d: Dict[str, Any]) -> "DbtRpcOutput":
        """Constructs an instance of :class:`DbtRpcOutput <dagster_dbt.DbtRpcOutput>` from a
        dictionary.

        Args:
            d (Dict[str, Any]): A dictionary with key-values to construct a :class:`DbtRpcOutput
                <dagster_dbt.DbtRpcOutput>`.

        Returns:
            DbtRpcOutput: An instance of :class:`DbtRpcOutput <dagster_dbt.DbtRpcOutput>`.
        """
        state = check.str_elem(d, "state")
        start = check.str_elem(d, "start")
        end = check.str_elem(d, "end")
        elapsed = check.float_elem(d, "elapsed")

        result = DbtResult.from_dict(d)

        return cls(result=result, state=state, start=start, end=end, elapsed=elapsed)
예제 #9
0
    def from_dict(cls, d: Dict[str, Any]) -> "NodeResult":
        """Constructs an instance of :class:`NodeResult <dagster_dbt.NodeResult>` from a dictionary.

        Args:
            d (Dict[str, Any]): A dictionary with key-values to construct a :class:`NodeResult
                <dagster_dbt.NodeResult>`.

        Returns:
            NodeResult: An instance of :class:`NodeResult <dagster_dbt.NodeResult>`.
        """
        # When executing from the CLI in 0.19.x, we get unique_id as a top level attribute
        if "unique_id" in d:
            unique_id = check.str_elem(d, "unique_id")
            node = None
        # When executing via RPC server or via CLI in 0.18.x or lower, we get unique id within
        # "node" schema
        else:
            node = check.dict_elem(d, "node")
            unique_id = check.str_elem(node, "unique_id")
        error = check.opt_str_elem(d, "error")
        execution_time = check.float_elem(d, "execution_time")
        thread_id = check.opt_str_elem(d, "thread_id")
        check.list_elem(d, "timing")
        step_timings = [
            StepTiming(
                name=d["name"],
                started_at=parser.isoparse(d["started_at"]),
                completed_at=parser.isoparse(d["completed_at"]),
            ) for d in check.is_list(d["timing"], of_type=Dict)
        ]
        table = check.opt_dict_elem(d, "table")

        return cls(
            node=node,
            unique_id=unique_id,
            step_timings=step_timings,
            error=error,
            execution_time=execution_time,
            thread_id=thread_id,
            table=table,
        )
예제 #10
0
    def from_legacy_repository_yaml(file_path):
        check.str_param(file_path, "file_path")
        config = load_yaml_from_path(file_path)
        repository_config = check.dict_elem(config, "repository")
        module_name = check.opt_str_elem(repository_config, "module")
        file_name = check.opt_str_elem(repository_config, "file")
        fn_name = check.str_elem(repository_config, "fn")

        return (CodePointer.from_module(module_name, fn_name) if module_name
                # rebase file in config off of the path in the config file
                else CodePointer.from_python_file(
                    rebase_file(file_name, file_path), fn_name, None))
예제 #11
0
def _handle_backcompat_pointers(config, file_path):
    check.dict_param(config, 'config')
    partitions = config.get('partitions')
    scheduler = config.get('scheduler')
    if not (partitions or scheduler):
        return None, None

    warnings.warn(
        '"scheduler" and "partitions" keys in repository.yaml are deprecated. '
        'Add definitions directly via RepositoryDefinition')

    scheduler_pointer = None
    partitions_pointer = None
    if scheduler:
        module_name = check.opt_str_elem(scheduler, 'module')
        file_name = check.opt_str_elem(scheduler, 'file')
        fn_name = check.str_elem(scheduler, 'fn')

        if module_name:
            scheduler_pointer = ModuleCodePointer(module_name, fn_name)
        else:
            # rebase file in config off of the path in the config file
            file_name = os.path.join(
                os.path.dirname(os.path.abspath(file_path)), file_name)
            scheduler_pointer = FileCodePointer(file_name, fn_name)

    if partitions:
        module_name = check.opt_str_elem(partitions, 'module')
        file_name = check.opt_str_elem(partitions, 'file')
        fn_name = check.str_elem(partitions, 'fn')

        if module_name:
            partitions_pointer = ModuleCodePointer(module_name, fn_name)
        else:
            # rebase file in config off of the path in the config file
            file_name = os.path.join(
                os.path.dirname(os.path.abspath(file_path)), file_name)
            partitions_pointer = FileCodePointer(file_name, fn_name)

    return (scheduler_pointer, partitions_pointer)
예제 #12
0
def _handle_backcompat_loaders(config, file_path):
    check.dict_param(config, 'config')
    partitions = config.get('partitions')
    scheduler = config.get('scheduler')
    if not (partitions or scheduler):
        return None

    warnings.warn(
        '"scheduler" and "partitions" keys in repository.yaml are deprecated. '
        'Add definitions directly via RepositoryDefinition'
    )

    backcompat_loaders = {}
    if scheduler:
        module_name = check.opt_str_elem(scheduler, 'module')
        file_name = check.opt_str_elem(scheduler, 'file')
        fn_name = check.str_elem(scheduler, 'fn')

        if module_name:
            backcompat_loaders['scheduler'] = LoaderEntrypoint.from_module_target(
                module_name, fn_name
            )
        else:
            # rebase file in config off of the path in the config file
            file_name = os.path.join(os.path.dirname(os.path.abspath(file_path)), file_name)
            backcompat_loaders['scheduler'] = LoaderEntrypoint.from_file_target(file_name, fn_name)

    if partitions:
        module_name = check.opt_str_elem(partitions, 'module')
        file_name = check.opt_str_elem(partitions, 'file')
        fn_name = check.str_elem(partitions, 'fn')

        if module_name:
            return LoaderEntrypoint.from_module_target(module_name, fn_name)
        else:
            # rebase file in config off of the path in the config file
            file_name = os.path.join(os.path.dirname(os.path.abspath(file_path)), file_name)
            backcompat_loaders['partitions'] = LoaderEntrypoint.from_file_target(file_name, fn_name)

    return backcompat_loaders
예제 #13
0
    def from_dict(cls, d: Dict[str, Any]) -> "DbtCliOutput":
        """Constructs an instance of :class:`DbtCliOutput <dagster_dbt.DbtCliOutput>` from a
        dictionary.

        Args:
            d (Dict[str, Any]): A dictionary with key-values to construct a :class:`DbtCliOutput
                <dagster_dbt.DbtCliOutput>`.

        Returns:
            DbtCliOutput: An instance of :class:`DbtCliOutput <dagster_dbt.DbtCliOutput>`.
        """
        check.int_elem(d, "return_code")
        check.str_elem(d, "raw_output")
        check.opt_int_elem(d, "num_pass")
        check.opt_int_elem(d, "num_warn")
        check.opt_int_elem(d, "num_error")
        check.opt_int_elem(d, "num_skip")
        check.opt_int_elem(d, "num_total")

        d["result"] = DbtResult.from_dict(d)

        return cls(**d)
예제 #14
0
    def from_yaml(file_path, from_handle=None):
        check.str_param(file_path, 'file_path')

        config = load_yaml_from_path(file_path)
        repository_config = check.dict_elem(config, 'repository')
        module_name = check.opt_str_elem(repository_config, 'module')
        file_name = check.opt_str_elem(repository_config, 'file')
        fn_name = check.str_elem(repository_config, 'fn')

        if module_name:
            return LoaderEntrypoint.from_module_target(module_name, fn_name, from_handle)
        else:
            # rebase file in config off of the path in the config file
            file_name = os.path.join(os.path.dirname(os.path.abspath(file_path)), file_name)
            return LoaderEntrypoint.from_file_target(file_name, fn_name, from_handle)
예제 #15
0
    def from_yaml(file_path):
        check.str_param(file_path, 'file_path')
        config = load_yaml_from_path(file_path)
        repository_config = check.dict_elem(config, 'repository')
        module_name = check.opt_str_elem(repository_config, 'module')
        file_name = check.opt_str_elem(repository_config, 'file')
        fn_name = check.str_elem(repository_config, 'fn')

        return (
            ModuleCodePointer(module_name, fn_name)
            if module_name
            else FileCodePointer(
                # rebase file in config off of the path in the config file
                python_file=os.path.join(os.path.dirname(os.path.abspath(file_path)), file_name),
                fn_name=fn_name,
            )
        )
예제 #16
0
def load_repository_from_file(file_path):
    check.str_param(file_path, 'file_path')

    config = load_yaml_from_path(file_path)
    repository_config = check.dict_elem(config, 'repository')
    module_name = check.opt_str_elem(repository_config, 'module')
    file_name = check.opt_str_elem(repository_config, 'file')
    fn_name = check.str_elem(repository_config, 'fn')

    if module_name:
        return load_module_target_function(
            ModuleTargetFunction(module_name, fn_name))
    else:
        # rebase file in config off of the path in the config file
        file_name = os.path.join(os.path.dirname(os.path.abspath(file_path)),
                                 file_name)
        return load_file_target_function(FileTargetFunction(
            file_name, fn_name))
예제 #17
0
def get_acceptable_entrypoint(repo_target_info):
    check.inst_param(repo_target_info, 'repo_target_info',
                     RepositoryTargetInfo)
    if repo_target_info.repository_yaml:
        check.str_param(repo_target_info.repository_yaml, 'repository_yaml')
        config = load_yaml_from_path(repo_target_info.repository_yaml)
        repository_config = check.dict_elem(config, 'repository')
        module_name = check.opt_str_elem(repository_config, 'module')
        fn_name = check.str_elem(repository_config, 'fn')
        if module_name:
            return entrypoint_from_module_target(module_name, fn_name)
        return None
    elif repo_target_info.module_name and repo_target_info.fn_name:
        return entrypoint_from_module_target(repo_target_info.module_name,
                                             repo_target_info.fn_name)
    elif repo_target_info.python_file and repo_target_info.fn_name:
        return None
    else:
        raise InvalidRepositoryLoadingComboError()
예제 #18
0
파일: cli.py 프로젝트: saket1994/dagster
def get_module_target_function(info):
    check.inst_param(info, 'info', RepositoryTargetInfo)
    if info.repository_yaml:
        mode_data = create_repository_loading_mode_data(info)
        file_path = mode_data.data
        check.str_param(file_path, 'file_path')
        config = load_yaml_from_path(file_path)
        repository_config = check.dict_elem(config, 'repository')
        module_name = check.opt_str_elem(repository_config, 'module')
        fn_name = check.str_elem(repository_config, 'fn')
        if module_name:
            return ModuleTargetFunction(module_name=module_name,
                                        fn_name=fn_name)
        return None
    elif info.module_name and info.fn_name:
        return ModuleTargetFunction(module_name=info.module_name,
                                    fn_name=info.fn_name)
    elif info.python_file and info.fn_name:
        return None
    else:
        raise InvalidRepositoryLoadingComboError()
예제 #19
0
    def from_dict(cls, d: Dict[str, Any]) -> "DbtRpcOutput":
        """Constructs an instance of :class:`DbtRpcOutput <dagster_dbt.DbtRpcOutput>` from a
        dictionary.

        Args:
            d (Dict[str, Any]): A dictionary with key-values to construct a :class:`DbtRpcOutput
                <dagster_dbt.DbtRpcOutput>`.

        Returns:
            DbtRpcOutput: An instance of :class:`DbtRpcOutput <dagster_dbt.DbtRpcOutput>`.
        """
        check.str_elem(d, "state")
        check.str_elem(d, "start")
        check.str_elem(d, "end")
        check.float_elem(d, "elapsed")

        d["result"] = DbtResult.from_dict(d)

        return cls(**d)
예제 #20
0
def get_workspace_load_target(kwargs: Dict[str, str]):
    check.dict_param(kwargs, "kwargs")
    if are_all_keys_empty(kwargs, WORKSPACE_CLI_ARGS):
        if kwargs.get("empty_workspace"):
            return EmptyWorkspaceTarget()
        if os.path.exists("workspace.yaml"):
            return WorkspaceFileTarget(paths=["workspace.yaml"])
        raise click.UsageError(
            "No arguments given and workspace.yaml not found.")

    if kwargs.get("workspace"):
        _check_cli_arguments_none(
            kwargs,
            "python_file",
            "working_directory",
            "module_name",
            "package_name",
            "attribute",
            "grpc_host",
            "grpc_port",
            "grpc_socket",
        )
        return WorkspaceFileTarget(
            paths=list(cast(Union[List, Tuple], kwargs.get("workspace"))))
    if kwargs.get("python_file"):
        _check_cli_arguments_none(
            kwargs,
            "module_name",
            "package_name",
            "grpc_host",
            "grpc_port",
            "grpc_socket",
        )
        working_directory = get_working_directory_from_kwargs(kwargs)
        return PythonFileTarget(
            python_file=check.str_elem(kwargs, "python_file"),
            attribute=check.opt_str_elem(kwargs, "attribute"),
            working_directory=working_directory,
            location_name=None,
        )
    if kwargs.get("module_name"):
        _check_cli_arguments_none(
            kwargs,
            "package_name",
            "grpc_host",
            "grpc_port",
            "grpc_socket",
        )
        working_directory = get_working_directory_from_kwargs(kwargs)
        return ModuleTarget(
            module_name=check.str_elem(kwargs, "module_name"),
            attribute=check.opt_str_elem(kwargs, "attribute"),
            working_directory=working_directory,
            location_name=None,
        )
    if kwargs.get("package_name"):
        _check_cli_arguments_none(
            kwargs,
            "grpc_host",
            "grpc_port",
            "grpc_socket",
        )
        working_directory = get_working_directory_from_kwargs(kwargs)
        return PackageTarget(
            package_name=check.str_elem(kwargs, "package_name"),
            attribute=check.opt_str_elem(kwargs, "attribute"),
            working_directory=working_directory,
            location_name=None,
        )
    if kwargs.get("grpc_port"):
        _check_cli_arguments_none(
            kwargs,
            "attribute",
            "working_directory",
            "grpc_socket",
        )
        return GrpcServerTarget(
            port=check.int_elem(kwargs, "grpc_port"),
            socket=None,
            host=check.opt_str_elem(kwargs, "grpc_host") or "localhost",
            location_name=None,
        )
    elif kwargs.get("grpc_socket"):
        _check_cli_arguments_none(
            kwargs,
            "attribute",
            "working_directory",
        )
        return GrpcServerTarget(
            port=None,
            socket=check.str_elem(kwargs, "grpc_socket"),
            host=check.opt_str_elem(kwargs, "grpc_host") or "localhost",
            location_name=None,
        )
    else:
        _cli_load_invariant(False)
        # necessary for pyright, does not understand _cli_load_invariant(False) never returns
        assert False
def _table_name_read_fn(_context, arg_dict):
    check.dict_param(arg_dict, 'arg_dict')

    table_name = check.str_elem(arg_dict, 'table_name')
    # probably verify that the table name exists?
    return DagsterSqlTableExpression(table_name)
예제 #22
0
import json

import pandas as pd
import sqlalchemy as sa

from dagster import check

if __name__ == '__main__':
    config_obj = json.load(open('config.json'))
    engine = sa.create_engine('sqlite:///{dbname}.db'.format(
        dbname=check.str_elem(config_obj, 'dbname')))
    for table in ['num_table', 'sum_table', 'sum_sq_table']:
        table_df = pd.read_sql_table(table, con=engine)
        print(table)
        print(table_df)