def __new__( cls, pipeline_name, solids, dependency_structure, mode_definition, logger_defs, ignored_solids, ): return super(EnvironmentClassCreationData, cls).__new__( cls, pipeline_name=check.str_param(pipeline_name, "pipeline_name"), solids=check.list_param(solids, "solids", of_type=Solid), dependency_structure=check.inst_param(dependency_structure, "dependency_structure", DependencyStructure), mode_definition=check.inst_param(mode_definition, "mode_definition", ModeDefinition), logger_defs=check.dict_param(logger_defs, "logger_defs", key_type=str, value_type=LoggerDefinition), ignored_solids=check.list_param(ignored_solids, "ignored_solids", of_type=Solid), )
def construct_config_type_dictionary(solid_defs, environment_type): check.list_param(solid_defs, 'solid_defs', ISolidDefinition) check.inst_param(environment_type, 'environment_type', ConfigType) type_dict_by_name = {t.name: t for t in ALL_CONFIG_BUILTINS} type_dict_by_key = {t.key: t for t in ALL_CONFIG_BUILTINS} all_types = list(_gather_all_config_types( solid_defs, environment_type)) + list(_gather_all_schemas(solid_defs)) for config_type in all_types: name = config_type.name if name and name in type_dict_by_name: if type(config_type) is not type(type_dict_by_name[name]): raise DagsterInvalidDefinitionError(( 'Type names must be unique. You have constructed two different ' 'instances of types with the same name "{name}".').format( name=name)) elif name: type_dict_by_name[config_type.name] = config_type key = config_type.key if key in type_dict_by_key: if type(config_type) is not type(type_dict_by_key[key]): raise DagsterInvalidDefinitionError(( 'Type keys must be unique. You have constructed two different ' 'instances of types with the same key "{key}".').format( key=key)) else: type_dict_by_key[config_type.key] = config_type return type_dict_by_name, type_dict_by_key
def construct_config_type_dictionary(node_defs, environment_type): check.list_param(node_defs, "node_defs", NodeDefinition) check.inst_param(environment_type, "environment_type", ConfigType) type_dict_by_name = { t.given_name: t for t in ALL_CONFIG_BUILTINS if t.given_name } type_dict_by_key = {t.key: t for t in ALL_CONFIG_BUILTINS} all_types = list(_gather_all_config_types( node_defs, environment_type)) + list(_gather_all_schemas(node_defs)) for config_type in all_types: name = config_type.given_name if name and name in type_dict_by_name: if type(config_type) is not type(type_dict_by_name[name]): raise DagsterInvalidDefinitionError(( "Type names must be unique. You have constructed two different " 'instances of types with the same name "{name}".').format( name=name)) elif name: type_dict_by_name[name] = config_type type_dict_by_key[config_type.key] = config_type return type_dict_by_name, type_dict_by_key
def define_solid_dictionary_cls( solids, ignored_solids, dependency_structure, parent_handle=None, ): check.list_param(solids, "solids", of_type=Solid) ignored_solids = check.opt_list_param(ignored_solids, "ignored_solids", of_type=Solid) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) check.opt_inst_param(parent_handle, "parent_handle", SolidHandle) fields = {} for solid in solids: if solid.definition.has_config_entry: fields[solid.name] = define_isolid_field(solid, SolidHandle( solid.name, parent_handle), dependency_structure, ignored=False) for solid in ignored_solids: if solid.definition.has_config_entry: fields[solid.name] = define_isolid_field(solid, SolidHandle( solid.name, parent_handle), dependency_structure, ignored=True) return Shape(fields)
def define_solid_dictionary_cls( solids, ignored_solids, dependency_structure, resource_defs, parent_handle=None, ): check.list_param(solids, "solids", of_type=Solid) ignored_solids = check.opt_list_param(ignored_solids, "ignored_solids", of_type=Solid) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) check.opt_inst_param(parent_handle, "parent_handle", SolidHandle) fields = {} for solid in solids: solid_field = define_isolid_field( solid, SolidHandle(solid.name, parent_handle), dependency_structure, resource_defs, ignored=False, ) if solid_field: fields[solid.name] = solid_field for solid in ignored_solids: solid_field = define_isolid_field( solid, SolidHandle(solid.name, parent_handle), dependency_structure, resource_defs, ignored=True, ) if solid_field: fields[solid.name] = solid_field return Shape(fields)
def _gather_all_config_types(node_defs, environment_type): check.list_param(node_defs, "node_defs", NodeDefinition) check.inst_param(environment_type, "environment_type", ConfigType) for node_def in node_defs: yield from iterate_node_def_config_types(node_def) yield from iterate_config_types(environment_type)
def _gather_all_config_types(solid_defs, environment_type): check.list_param(solid_defs, 'solid_defs', ISolidDefinition) check.inst_param(environment_type, 'environment_type', ConfigType) for solid_def in solid_defs: for runtime_type in iterate_solid_def_types(solid_def): yield runtime_type for runtime_type in iterate_config_types(environment_type): yield runtime_type
def _gather_all_config_types(solid_defs, environment_type): check.list_param(solid_defs, "solid_defs", ISolidDefinition) check.inst_param(environment_type, "environment_type", ConfigType) for solid_def in solid_defs: for config_type in iterate_solid_def_config_types(solid_def): yield config_type for config_type in iterate_config_types(environment_type): yield config_type
def define_solid_dictionary_cls(solids, dependency_structure, parent_handle=None): check.list_param(solids, 'solids', of_type=Solid) check.inst_param(dependency_structure, 'dependency_structure', DependencyStructure) check.opt_inst_param(parent_handle, 'parent_handle', SolidHandle) fields = {} for solid in solids: if solid.definition.has_config_entry: fields[solid.name] = define_isolid_field( solid, SolidHandle(solid.name, parent_handle), dependency_structure, ) return Shape(fields)
def parse_solid_selection(pipeline_def, solid_selection): """Take pipeline definition and a list of solid selection queries (inlcuding names of solid invocations. See syntax examples below) and return a set of the qualified solid names. It currently only supports top-level solids. Query syntax examples: - "some_solid": select "some_solid" itself - "*some_solid": select "some_solid" and all ancestors (upstream dependencies) - "some_solid*": select "some_solid" and all descendants (downstream dependencies) - "*some_solid*": select "some_solid" and all of its ancestors and descendants - "+some_solid": select "some_solid" and its ancestors at 1 level up - "some_solid+++": select "some_solid" and its descendants within 3 levels down Note: - If one of the query clauses is invalid, we will skip that one and continue to parse the valid ones. Args: pipeline_def (PipelineDefinition): the pipeline to execute. solid_selection (List[str]): a list of the solid selection queries (including single solid names) to execute. Returns: FrozenSet[str]: a frozenset of qualified deduplicated solid names, empty if no qualified subset selected. """ check.list_param(solid_selection, "solid_selection", of_type=str) # special case: select all if len(solid_selection) == 1 and solid_selection[0] == "*": return frozenset(pipeline_def.graph.node_names()) graph = generate_dep_graph(pipeline_def) solids_set = set() # loop over clauses for clause in solid_selection: subset = clause_to_subset(graph, clause) if len(subset) == 0: raise DagsterInvalidSubsetError( "No qualified {node_type} to execute found for {selection_type}={requested}" .format( requested=solid_selection, node_type="ops" if pipeline_def.is_job else "solids", selection_type="op_selection" if pipeline_def.is_job else "solid_selection", )) solids_set.update(subset) return frozenset(solids_set)
def __new__(cls, op_selection, resolved_op_selection, ignored_solids, parent_job_def): from dagster.core.definitions.job_definition import JobDefinition return super(OpSelectionData, cls).__new__( cls, op_selection=check.list_param(op_selection, "op_selection", str), resolved_op_selection=check.set_param(resolved_op_selection, "resolved_op_selection", str), ignored_solids=check.list_param(ignored_solids, "ignored_solids", Node), parent_job_def=check.inst_param(parent_job_def, "parent_job_def", JobDefinition), )
def parse_step_selection(step_deps, step_selection): """Take the dependency dictionary generated while building execution plan and a list of step key selection queries and return a set of the qualified step keys. It currently only supports top-level solids. Args: step_deps (Dict[str, Set[str]]): a dictionary of execution step dependency where the key is a step key and the value is a set of direct upstream dependency of the step. step_selection (List[str]): a list of the step key selection queries (including single step key) to execute. Returns: FrozenSet[str]: a frozenset of qualified deduplicated solid names, empty if no qualified subset selected. """ check.list_param(step_selection, "step_selection", of_type=str) # reverse step_deps to get the downstream_deps # make sure we have all items as keys, including the ones without downstream dependencies downstream_deps = defaultdict(set, {k: set() for k in step_deps.keys()}) for downstream_key, upstream_keys in step_deps.items(): for step_key in upstream_keys: downstream_deps[step_key].add(downstream_key) # generate dep graph graph = {"upstream": step_deps, "downstream": downstream_deps} steps_set = set() step_keys = parse_items_from_selection(step_selection) invalid_keys = [key for key in step_keys if key not in step_deps] if invalid_keys: raise DagsterExecutionStepNotFoundError( f"Step selection refers to unknown step{'s' if len(invalid_keys)> 1 else ''}: {', '.join(invalid_keys)}", step_keys=invalid_keys, ) # loop over clauses for clause in step_selection: subset = clause_to_subset(graph, clause) if len(subset) == 0: raise DagsterInvalidSubsetError( "No qualified steps to execute found for step_selection={requested}" .format(requested=step_selection), ) steps_set.update(subset) return frozenset(steps_set)
def __new__(cls, pipeline_name, solids, dependency_structure, mode_definition, logger_defs): return super(EnvironmentClassCreationData, cls).__new__( cls, pipeline_name=check.str_param(pipeline_name, 'pipeline_name'), solids=check.list_param(solids, 'solids', of_type=Solid), dependency_structure=check.inst_param( dependency_structure, 'dependency_structure', DependencyStructure ), mode_definition=check.inst_param(mode_definition, 'mode_definition', ModeDefinition), logger_defs=check.dict_param( logger_defs, 'logger_defs', key_type=str, value_type=LoggerDefinition ), )
def parse_solid_selection(pipeline_def, solid_selection): """Take pipeline definition and a list of solid selection queries (inlcuding names of solid invocations. See syntax examples below) and return a list of the qualified solid names. It currently only supports top-level solids. Query syntax examples: - "some_solid": select "some_solid" itself - "*some_solid": select "some_solid" and all ancestors (upstream dependencies) - "some_solid*": select "some_solid" and all descendants (downstream dependencies) - "*some_solid*": select "some_solid" and all of its ancestors and descendants - "+some_solid": select "some_solid" and its ancestors at 1 level up - "some_solid+++": select "some_solid" and its descendants within 3 levels down Note: - If one of the query clauses is invalid, we will skip that one and continue to parse the valid ones. Args: pipeline_def (PipelineDefinition): the pipeline to execute. solid_selection (List[str]): a list of the solid selection queries (including single solid names) to execute. Returns: FrozenSet[str]: a frozenset of qualified deduplicated solid names, empty if no qualified subset selected. """ check.list_param(solid_selection, "solid_selection", of_type=str) graph = generate_dep_graph(pipeline_def) traverser = Traverser(graph=graph) solids_set = set() # loop over clauses for clause in solid_selection: solids_set.update(clause_to_subset(traverser, graph, clause)) return frozenset(solids_set)