def configurable_class_data(config_field): return ConfigurableClassData( check.str_elem(config_field, "module"), check.str_elem(config_field, "class"), yaml.dump(check.opt_dict_elem(config_field, "config"), default_flow_style=False), )
def from_dict(cls, d: Dict[str, Any]) -> "DbtCliOutput": """Constructs an instance of :class:`DbtCliOutput <dagster_dbt.DbtCliOutput>` from a dictionary. Args: d (Dict[str, Any]): A dictionary with key-values to construct a :class:`DbtCliOutput <dagster_dbt.DbtCliOutput>`. Returns: DbtCliOutput: An instance of :class:`DbtCliOutput <dagster_dbt.DbtCliOutput>`. """ return_code = check.int_elem(d, "return_code") raw_output = check.str_elem(d, "raw_output") num_pass = check.opt_int_elem(d, "num_pass") num_warn = check.opt_int_elem(d, "num_warn") num_error = check.opt_int_elem(d, "num_error") num_skip = check.opt_int_elem(d, "num_skip") num_total = check.opt_int_elem(d, "num_total") command = check.str_elem(d, "command") return cls( result=DbtResult.from_dict(d), return_code=return_code, raw_output=raw_output, num_pass=num_pass, num_warn=num_warn, num_error=num_error, num_skip=num_skip, num_total=num_total, command=command, )
def test_string_elem(): ddict = {"a_str": "a", "a_num": 1, "a_none": None} assert check.str_elem(ddict, "a_str") == "a" with pytest.raises(ElementCheckError): assert check.str_elem(ddict, "a_none") with pytest.raises(ElementCheckError): check.str_elem(ddict, "a_num")
def test_string_elem(): ddict = {'a_str': 'a', 'a_num': 1, 'a_none': None} assert check.str_elem(ddict, 'a_str') == 'a' with pytest.raises(ElementCheckError): assert check.str_elem(ddict, 'a_none') with pytest.raises(ElementCheckError): check.str_elem(ddict, 'a_num')
def from_dict(cls, d: Dict[str, Any]) -> "DbtResult": """Constructs an instance of :class:`DbtResult <dagster_dbt.DbtResult>` from a dictionary. Args: d (Dict[str, Any]): A dictionary with key-values to construct a :class:`DbtResult <dagster_dbt.DbtResult>`. Returns: DbtResult: An instance of :class:`DbtResult <dagster_dbt.DbtResult>`. """ check.list_elem(d, "logs") logs = check.is_list(d["logs"], of_type=Dict) check.list_elem(d, "results") results = [ NodeResult.from_dict(d) for d in check.is_list(d["results"], of_type=Dict) ] generated_at = check.str_elem(d, "generated_at") elapsed_time = check.float_elem(d, "elapsed_time") return cls( logs=logs, results=results, generated_at=generated_at, elapsed_time=elapsed_time, )
def from_yaml(cls, file_path): check.str_param(file_path, 'file_path') config = load_yaml_from_path(file_path) repository_config = check.dict_elem(config, 'repository') module_name = check.opt_str_elem(repository_config, 'module') file_name = check.opt_str_elem(repository_config, 'file') fn_name = check.str_elem(repository_config, 'fn') scheduler_pointer, partitions_pointer = _handle_backcompat_pointers( config, file_path) if module_name: pointer = ModuleCodePointer(module_name, fn_name) else: # rebase file in config off of the path in the config file file_name = os.path.join( os.path.dirname(os.path.abspath(file_path)), file_name) pointer = FileCodePointer(file_name, fn_name) return cls( pointer=pointer, yaml_path=file_path, scheduler_pointer=scheduler_pointer, partitions_pointer=partitions_pointer, )
def _materialization_fn(info, inputs): sql_expr = inputs['expr'] check.inst(sql_expr, DagsterSqlExpression) output_table_name = check.str_elem(info.config, 'table_name') total_sql = '''CREATE TABLE {output_table_name} AS {query_text}'''.format( output_table_name=output_table_name, query_text=sql_expr.query_text ) info.context.resources.sa.engine.connect().execute(total_sql)
def from_dict(cls, d: Dict[str, Any]) -> "DbtRpcOutput": """Constructs an instance of :class:`DbtRpcOutput <dagster_dbt.DbtRpcOutput>` from a dictionary. Args: d (Dict[str, Any]): A dictionary with key-values to construct a :class:`DbtRpcOutput <dagster_dbt.DbtRpcOutput>`. Returns: DbtRpcOutput: An instance of :class:`DbtRpcOutput <dagster_dbt.DbtRpcOutput>`. """ state = check.str_elem(d, "state") start = check.str_elem(d, "start") end = check.str_elem(d, "end") elapsed = check.float_elem(d, "elapsed") result = DbtResult.from_dict(d) return cls(result=result, state=state, start=start, end=end, elapsed=elapsed)
def from_dict(cls, d: Dict[str, Any]) -> "NodeResult": """Constructs an instance of :class:`NodeResult <dagster_dbt.NodeResult>` from a dictionary. Args: d (Dict[str, Any]): A dictionary with key-values to construct a :class:`NodeResult <dagster_dbt.NodeResult>`. Returns: NodeResult: An instance of :class:`NodeResult <dagster_dbt.NodeResult>`. """ # When executing from the CLI in 0.19.x, we get unique_id as a top level attribute if "unique_id" in d: unique_id = check.str_elem(d, "unique_id") node = None # When executing via RPC server or via CLI in 0.18.x or lower, we get unique id within # "node" schema else: node = check.dict_elem(d, "node") unique_id = check.str_elem(node, "unique_id") error = check.opt_str_elem(d, "error") execution_time = check.float_elem(d, "execution_time") thread_id = check.opt_str_elem(d, "thread_id") check.list_elem(d, "timing") step_timings = [ StepTiming( name=d["name"], started_at=parser.isoparse(d["started_at"]), completed_at=parser.isoparse(d["completed_at"]), ) for d in check.is_list(d["timing"], of_type=Dict) ] table = check.opt_dict_elem(d, "table") return cls( node=node, unique_id=unique_id, step_timings=step_timings, error=error, execution_time=execution_time, thread_id=thread_id, table=table, )
def from_legacy_repository_yaml(file_path): check.str_param(file_path, "file_path") config = load_yaml_from_path(file_path) repository_config = check.dict_elem(config, "repository") module_name = check.opt_str_elem(repository_config, "module") file_name = check.opt_str_elem(repository_config, "file") fn_name = check.str_elem(repository_config, "fn") return (CodePointer.from_module(module_name, fn_name) if module_name # rebase file in config off of the path in the config file else CodePointer.from_python_file( rebase_file(file_name, file_path), fn_name, None))
def _handle_backcompat_pointers(config, file_path): check.dict_param(config, 'config') partitions = config.get('partitions') scheduler = config.get('scheduler') if not (partitions or scheduler): return None, None warnings.warn( '"scheduler" and "partitions" keys in repository.yaml are deprecated. ' 'Add definitions directly via RepositoryDefinition') scheduler_pointer = None partitions_pointer = None if scheduler: module_name = check.opt_str_elem(scheduler, 'module') file_name = check.opt_str_elem(scheduler, 'file') fn_name = check.str_elem(scheduler, 'fn') if module_name: scheduler_pointer = ModuleCodePointer(module_name, fn_name) else: # rebase file in config off of the path in the config file file_name = os.path.join( os.path.dirname(os.path.abspath(file_path)), file_name) scheduler_pointer = FileCodePointer(file_name, fn_name) if partitions: module_name = check.opt_str_elem(partitions, 'module') file_name = check.opt_str_elem(partitions, 'file') fn_name = check.str_elem(partitions, 'fn') if module_name: partitions_pointer = ModuleCodePointer(module_name, fn_name) else: # rebase file in config off of the path in the config file file_name = os.path.join( os.path.dirname(os.path.abspath(file_path)), file_name) partitions_pointer = FileCodePointer(file_name, fn_name) return (scheduler_pointer, partitions_pointer)
def _handle_backcompat_loaders(config, file_path): check.dict_param(config, 'config') partitions = config.get('partitions') scheduler = config.get('scheduler') if not (partitions or scheduler): return None warnings.warn( '"scheduler" and "partitions" keys in repository.yaml are deprecated. ' 'Add definitions directly via RepositoryDefinition' ) backcompat_loaders = {} if scheduler: module_name = check.opt_str_elem(scheduler, 'module') file_name = check.opt_str_elem(scheduler, 'file') fn_name = check.str_elem(scheduler, 'fn') if module_name: backcompat_loaders['scheduler'] = LoaderEntrypoint.from_module_target( module_name, fn_name ) else: # rebase file in config off of the path in the config file file_name = os.path.join(os.path.dirname(os.path.abspath(file_path)), file_name) backcompat_loaders['scheduler'] = LoaderEntrypoint.from_file_target(file_name, fn_name) if partitions: module_name = check.opt_str_elem(partitions, 'module') file_name = check.opt_str_elem(partitions, 'file') fn_name = check.str_elem(partitions, 'fn') if module_name: return LoaderEntrypoint.from_module_target(module_name, fn_name) else: # rebase file in config off of the path in the config file file_name = os.path.join(os.path.dirname(os.path.abspath(file_path)), file_name) backcompat_loaders['partitions'] = LoaderEntrypoint.from_file_target(file_name, fn_name) return backcompat_loaders
def from_dict(cls, d: Dict[str, Any]) -> "DbtCliOutput": """Constructs an instance of :class:`DbtCliOutput <dagster_dbt.DbtCliOutput>` from a dictionary. Args: d (Dict[str, Any]): A dictionary with key-values to construct a :class:`DbtCliOutput <dagster_dbt.DbtCliOutput>`. Returns: DbtCliOutput: An instance of :class:`DbtCliOutput <dagster_dbt.DbtCliOutput>`. """ check.int_elem(d, "return_code") check.str_elem(d, "raw_output") check.opt_int_elem(d, "num_pass") check.opt_int_elem(d, "num_warn") check.opt_int_elem(d, "num_error") check.opt_int_elem(d, "num_skip") check.opt_int_elem(d, "num_total") d["result"] = DbtResult.from_dict(d) return cls(**d)
def from_yaml(file_path, from_handle=None): check.str_param(file_path, 'file_path') config = load_yaml_from_path(file_path) repository_config = check.dict_elem(config, 'repository') module_name = check.opt_str_elem(repository_config, 'module') file_name = check.opt_str_elem(repository_config, 'file') fn_name = check.str_elem(repository_config, 'fn') if module_name: return LoaderEntrypoint.from_module_target(module_name, fn_name, from_handle) else: # rebase file in config off of the path in the config file file_name = os.path.join(os.path.dirname(os.path.abspath(file_path)), file_name) return LoaderEntrypoint.from_file_target(file_name, fn_name, from_handle)
def from_yaml(file_path): check.str_param(file_path, 'file_path') config = load_yaml_from_path(file_path) repository_config = check.dict_elem(config, 'repository') module_name = check.opt_str_elem(repository_config, 'module') file_name = check.opt_str_elem(repository_config, 'file') fn_name = check.str_elem(repository_config, 'fn') return ( ModuleCodePointer(module_name, fn_name) if module_name else FileCodePointer( # rebase file in config off of the path in the config file python_file=os.path.join(os.path.dirname(os.path.abspath(file_path)), file_name), fn_name=fn_name, ) )
def load_repository_from_file(file_path): check.str_param(file_path, 'file_path') config = load_yaml_from_path(file_path) repository_config = check.dict_elem(config, 'repository') module_name = check.opt_str_elem(repository_config, 'module') file_name = check.opt_str_elem(repository_config, 'file') fn_name = check.str_elem(repository_config, 'fn') if module_name: return load_module_target_function( ModuleTargetFunction(module_name, fn_name)) else: # rebase file in config off of the path in the config file file_name = os.path.join(os.path.dirname(os.path.abspath(file_path)), file_name) return load_file_target_function(FileTargetFunction( file_name, fn_name))
def get_acceptable_entrypoint(repo_target_info): check.inst_param(repo_target_info, 'repo_target_info', RepositoryTargetInfo) if repo_target_info.repository_yaml: check.str_param(repo_target_info.repository_yaml, 'repository_yaml') config = load_yaml_from_path(repo_target_info.repository_yaml) repository_config = check.dict_elem(config, 'repository') module_name = check.opt_str_elem(repository_config, 'module') fn_name = check.str_elem(repository_config, 'fn') if module_name: return entrypoint_from_module_target(module_name, fn_name) return None elif repo_target_info.module_name and repo_target_info.fn_name: return entrypoint_from_module_target(repo_target_info.module_name, repo_target_info.fn_name) elif repo_target_info.python_file and repo_target_info.fn_name: return None else: raise InvalidRepositoryLoadingComboError()
def get_module_target_function(info): check.inst_param(info, 'info', RepositoryTargetInfo) if info.repository_yaml: mode_data = create_repository_loading_mode_data(info) file_path = mode_data.data check.str_param(file_path, 'file_path') config = load_yaml_from_path(file_path) repository_config = check.dict_elem(config, 'repository') module_name = check.opt_str_elem(repository_config, 'module') fn_name = check.str_elem(repository_config, 'fn') if module_name: return ModuleTargetFunction(module_name=module_name, fn_name=fn_name) return None elif info.module_name and info.fn_name: return ModuleTargetFunction(module_name=info.module_name, fn_name=info.fn_name) elif info.python_file and info.fn_name: return None else: raise InvalidRepositoryLoadingComboError()
def from_dict(cls, d: Dict[str, Any]) -> "DbtRpcOutput": """Constructs an instance of :class:`DbtRpcOutput <dagster_dbt.DbtRpcOutput>` from a dictionary. Args: d (Dict[str, Any]): A dictionary with key-values to construct a :class:`DbtRpcOutput <dagster_dbt.DbtRpcOutput>`. Returns: DbtRpcOutput: An instance of :class:`DbtRpcOutput <dagster_dbt.DbtRpcOutput>`. """ check.str_elem(d, "state") check.str_elem(d, "start") check.str_elem(d, "end") check.float_elem(d, "elapsed") d["result"] = DbtResult.from_dict(d) return cls(**d)
def get_workspace_load_target(kwargs: Dict[str, str]): check.dict_param(kwargs, "kwargs") if are_all_keys_empty(kwargs, WORKSPACE_CLI_ARGS): if kwargs.get("empty_workspace"): return EmptyWorkspaceTarget() if os.path.exists("workspace.yaml"): return WorkspaceFileTarget(paths=["workspace.yaml"]) raise click.UsageError( "No arguments given and workspace.yaml not found.") if kwargs.get("workspace"): _check_cli_arguments_none( kwargs, "python_file", "working_directory", "module_name", "package_name", "attribute", "grpc_host", "grpc_port", "grpc_socket", ) return WorkspaceFileTarget( paths=list(cast(Union[List, Tuple], kwargs.get("workspace")))) if kwargs.get("python_file"): _check_cli_arguments_none( kwargs, "module_name", "package_name", "grpc_host", "grpc_port", "grpc_socket", ) working_directory = get_working_directory_from_kwargs(kwargs) return PythonFileTarget( python_file=check.str_elem(kwargs, "python_file"), attribute=check.opt_str_elem(kwargs, "attribute"), working_directory=working_directory, location_name=None, ) if kwargs.get("module_name"): _check_cli_arguments_none( kwargs, "package_name", "grpc_host", "grpc_port", "grpc_socket", ) working_directory = get_working_directory_from_kwargs(kwargs) return ModuleTarget( module_name=check.str_elem(kwargs, "module_name"), attribute=check.opt_str_elem(kwargs, "attribute"), working_directory=working_directory, location_name=None, ) if kwargs.get("package_name"): _check_cli_arguments_none( kwargs, "grpc_host", "grpc_port", "grpc_socket", ) working_directory = get_working_directory_from_kwargs(kwargs) return PackageTarget( package_name=check.str_elem(kwargs, "package_name"), attribute=check.opt_str_elem(kwargs, "attribute"), working_directory=working_directory, location_name=None, ) if kwargs.get("grpc_port"): _check_cli_arguments_none( kwargs, "attribute", "working_directory", "grpc_socket", ) return GrpcServerTarget( port=check.int_elem(kwargs, "grpc_port"), socket=None, host=check.opt_str_elem(kwargs, "grpc_host") or "localhost", location_name=None, ) elif kwargs.get("grpc_socket"): _check_cli_arguments_none( kwargs, "attribute", "working_directory", ) return GrpcServerTarget( port=None, socket=check.str_elem(kwargs, "grpc_socket"), host=check.opt_str_elem(kwargs, "grpc_host") or "localhost", location_name=None, ) else: _cli_load_invariant(False) # necessary for pyright, does not understand _cli_load_invariant(False) never returns assert False
def _table_name_read_fn(_context, arg_dict): check.dict_param(arg_dict, 'arg_dict') table_name = check.str_elem(arg_dict, 'table_name') # probably verify that the table name exists? return DagsterSqlTableExpression(table_name)
import json import pandas as pd import sqlalchemy as sa from dagster import check if __name__ == '__main__': config_obj = json.load(open('config.json')) engine = sa.create_engine('sqlite:///{dbname}.db'.format( dbname=check.str_elem(config_obj, 'dbname'))) for table in ['num_table', 'sum_table', 'sum_sq_table']: table_df = pd.read_sql_table(table, con=engine) print(table) print(table_df)