Exemple #1
0
 def __new__(
     cls,
     pipeline_name,
     solids,
     dependency_structure,
     mode_definition,
     logger_defs,
     ignored_solids,
 ):
     return super(EnvironmentClassCreationData, cls).__new__(
         cls,
         pipeline_name=check.str_param(pipeline_name, "pipeline_name"),
         solids=check.list_param(solids, "solids", of_type=Solid),
         dependency_structure=check.inst_param(dependency_structure,
                                               "dependency_structure",
                                               DependencyStructure),
         mode_definition=check.inst_param(mode_definition,
                                          "mode_definition",
                                          ModeDefinition),
         logger_defs=check.dict_param(logger_defs,
                                      "logger_defs",
                                      key_type=str,
                                      value_type=LoggerDefinition),
         ignored_solids=check.list_param(ignored_solids,
                                         "ignored_solids",
                                         of_type=Solid),
     )
Exemple #2
0
def construct_config_type_dictionary(solid_defs, environment_type):
    check.list_param(solid_defs, 'solid_defs', ISolidDefinition)
    check.inst_param(environment_type, 'environment_type', ConfigType)

    type_dict_by_name = {t.name: t for t in ALL_CONFIG_BUILTINS}
    type_dict_by_key = {t.key: t for t in ALL_CONFIG_BUILTINS}
    all_types = list(_gather_all_config_types(
        solid_defs, environment_type)) + list(_gather_all_schemas(solid_defs))

    for config_type in all_types:
        name = config_type.name
        if name and name in type_dict_by_name:
            if type(config_type) is not type(type_dict_by_name[name]):
                raise DagsterInvalidDefinitionError((
                    'Type names must be unique. You have constructed two different '
                    'instances of types with the same name "{name}".').format(
                        name=name))
        elif name:
            type_dict_by_name[config_type.name] = config_type

        key = config_type.key

        if key in type_dict_by_key:
            if type(config_type) is not type(type_dict_by_key[key]):
                raise DagsterInvalidDefinitionError((
                    'Type keys must be unique. You have constructed two different '
                    'instances of types with the same key "{key}".').format(
                        key=key))

        else:
            type_dict_by_key[config_type.key] = config_type

    return type_dict_by_name, type_dict_by_key
Exemple #3
0
def construct_config_type_dictionary(node_defs, environment_type):
    check.list_param(node_defs, "node_defs", NodeDefinition)
    check.inst_param(environment_type, "environment_type", ConfigType)

    type_dict_by_name = {
        t.given_name: t
        for t in ALL_CONFIG_BUILTINS if t.given_name
    }
    type_dict_by_key = {t.key: t for t in ALL_CONFIG_BUILTINS}
    all_types = list(_gather_all_config_types(
        node_defs, environment_type)) + list(_gather_all_schemas(node_defs))

    for config_type in all_types:
        name = config_type.given_name
        if name and name in type_dict_by_name:
            if type(config_type) is not type(type_dict_by_name[name]):
                raise DagsterInvalidDefinitionError((
                    "Type names must be unique. You have constructed two different "
                    'instances of types with the same name "{name}".').format(
                        name=name))
        elif name:
            type_dict_by_name[name] = config_type

        type_dict_by_key[config_type.key] = config_type

    return type_dict_by_name, type_dict_by_key
def define_solid_dictionary_cls(
    solids,
    ignored_solids,
    dependency_structure,
    parent_handle=None,
):
    check.list_param(solids, "solids", of_type=Solid)
    ignored_solids = check.opt_list_param(ignored_solids,
                                          "ignored_solids",
                                          of_type=Solid)
    check.inst_param(dependency_structure, "dependency_structure",
                     DependencyStructure)
    check.opt_inst_param(parent_handle, "parent_handle", SolidHandle)

    fields = {}
    for solid in solids:
        if solid.definition.has_config_entry:
            fields[solid.name] = define_isolid_field(solid,
                                                     SolidHandle(
                                                         solid.name,
                                                         parent_handle),
                                                     dependency_structure,
                                                     ignored=False)

    for solid in ignored_solids:
        if solid.definition.has_config_entry:
            fields[solid.name] = define_isolid_field(solid,
                                                     SolidHandle(
                                                         solid.name,
                                                         parent_handle),
                                                     dependency_structure,
                                                     ignored=True)

    return Shape(fields)
def define_solid_dictionary_cls(
    solids, ignored_solids, dependency_structure, resource_defs, parent_handle=None,
):
    check.list_param(solids, "solids", of_type=Solid)
    ignored_solids = check.opt_list_param(ignored_solids, "ignored_solids", of_type=Solid)
    check.inst_param(dependency_structure, "dependency_structure", DependencyStructure)
    check.opt_inst_param(parent_handle, "parent_handle", SolidHandle)

    fields = {}
    for solid in solids:
        solid_field = define_isolid_field(
            solid,
            SolidHandle(solid.name, parent_handle),
            dependency_structure,
            resource_defs,
            ignored=False,
        )

        if solid_field:
            fields[solid.name] = solid_field

    for solid in ignored_solids:
        solid_field = define_isolid_field(
            solid,
            SolidHandle(solid.name, parent_handle),
            dependency_structure,
            resource_defs,
            ignored=True,
        )
        if solid_field:
            fields[solid.name] = solid_field

    return Shape(fields)
Exemple #6
0
def _gather_all_config_types(node_defs, environment_type):
    check.list_param(node_defs, "node_defs", NodeDefinition)
    check.inst_param(environment_type, "environment_type", ConfigType)

    for node_def in node_defs:
        yield from iterate_node_def_config_types(node_def)

    yield from iterate_config_types(environment_type)
Exemple #7
0
def _gather_all_config_types(solid_defs, environment_type):
    check.list_param(solid_defs, 'solid_defs', ISolidDefinition)
    check.inst_param(environment_type, 'environment_type', ConfigType)

    for solid_def in solid_defs:
        for runtime_type in iterate_solid_def_types(solid_def):
            yield runtime_type

    for runtime_type in iterate_config_types(environment_type):
        yield runtime_type
Exemple #8
0
def _gather_all_config_types(solid_defs, environment_type):
    check.list_param(solid_defs, "solid_defs", ISolidDefinition)
    check.inst_param(environment_type, "environment_type", ConfigType)

    for solid_def in solid_defs:
        for config_type in iterate_solid_def_config_types(solid_def):
            yield config_type

    for config_type in iterate_config_types(environment_type):
        yield config_type
def define_solid_dictionary_cls(solids, dependency_structure, parent_handle=None):
    check.list_param(solids, 'solids', of_type=Solid)
    check.inst_param(dependency_structure, 'dependency_structure', DependencyStructure)
    check.opt_inst_param(parent_handle, 'parent_handle', SolidHandle)

    fields = {}
    for solid in solids:
        if solid.definition.has_config_entry:
            fields[solid.name] = define_isolid_field(
                solid, SolidHandle(solid.name, parent_handle), dependency_structure,
            )

    return Shape(fields)
Exemple #10
0
def parse_solid_selection(pipeline_def, solid_selection):
    """Take pipeline definition and a list of solid selection queries (inlcuding names of solid
        invocations. See syntax examples below) and return a set of the qualified solid names.

    It currently only supports top-level solids.

    Query syntax examples:
    - "some_solid": select "some_solid" itself
    - "*some_solid": select "some_solid" and all ancestors (upstream dependencies)
    - "some_solid*": select "some_solid" and all descendants (downstream dependencies)
    - "*some_solid*": select "some_solid" and all of its ancestors and descendants
    - "+some_solid": select "some_solid" and its ancestors at 1 level up
    - "some_solid+++": select "some_solid" and its descendants within 3 levels down

    Note:
    - If one of the query clauses is invalid, we will skip that one and continue to parse the valid
        ones.

    Args:
        pipeline_def (PipelineDefinition): the pipeline to execute.
        solid_selection (List[str]): a list of the solid selection queries (including single solid
            names) to execute.

    Returns:
        FrozenSet[str]: a frozenset of qualified deduplicated solid names, empty if no qualified
            subset selected.
    """
    check.list_param(solid_selection, "solid_selection", of_type=str)

    # special case: select all
    if len(solid_selection) == 1 and solid_selection[0] == "*":
        return frozenset(pipeline_def.graph.node_names())

    graph = generate_dep_graph(pipeline_def)
    solids_set = set()

    # loop over clauses
    for clause in solid_selection:
        subset = clause_to_subset(graph, clause)
        if len(subset) == 0:
            raise DagsterInvalidSubsetError(
                "No qualified {node_type} to execute found for {selection_type}={requested}"
                .format(
                    requested=solid_selection,
                    node_type="ops" if pipeline_def.is_job else "solids",
                    selection_type="op_selection"
                    if pipeline_def.is_job else "solid_selection",
                ))
        solids_set.update(subset)

    return frozenset(solids_set)
Exemple #11
0
    def __new__(cls, op_selection, resolved_op_selection, ignored_solids,
                parent_job_def):
        from dagster.core.definitions.job_definition import JobDefinition

        return super(OpSelectionData, cls).__new__(
            cls,
            op_selection=check.list_param(op_selection, "op_selection", str),
            resolved_op_selection=check.set_param(resolved_op_selection,
                                                  "resolved_op_selection",
                                                  str),
            ignored_solids=check.list_param(ignored_solids, "ignored_solids",
                                            Node),
            parent_job_def=check.inst_param(parent_job_def, "parent_job_def",
                                            JobDefinition),
        )
Exemple #12
0
def parse_step_selection(step_deps, step_selection):
    """Take the dependency dictionary generated while building execution plan and a list of step key
     selection queries and return a set of the qualified step keys.

    It currently only supports top-level solids.

    Args:
        step_deps (Dict[str, Set[str]]): a dictionary of execution step dependency where the key is
            a step key and the value is a set of direct upstream dependency of the step.
        step_selection (List[str]): a list of the step key selection queries (including single
            step key) to execute.

    Returns:
        FrozenSet[str]: a frozenset of qualified deduplicated solid names, empty if no qualified
            subset selected.
    """
    check.list_param(step_selection, "step_selection", of_type=str)

    # reverse step_deps to get the downstream_deps
    # make sure we have all items as keys, including the ones without downstream dependencies
    downstream_deps = defaultdict(set, {k: set() for k in step_deps.keys()})
    for downstream_key, upstream_keys in step_deps.items():
        for step_key in upstream_keys:
            downstream_deps[step_key].add(downstream_key)

    # generate dep graph
    graph = {"upstream": step_deps, "downstream": downstream_deps}
    steps_set = set()

    step_keys = parse_items_from_selection(step_selection)
    invalid_keys = [key for key in step_keys if key not in step_deps]
    if invalid_keys:
        raise DagsterExecutionStepNotFoundError(
            f"Step selection refers to unknown step{'s' if len(invalid_keys)> 1 else ''}: {', '.join(invalid_keys)}",
            step_keys=invalid_keys,
        )

    # loop over clauses
    for clause in step_selection:
        subset = clause_to_subset(graph, clause)
        if len(subset) == 0:
            raise DagsterInvalidSubsetError(
                "No qualified steps to execute found for step_selection={requested}"
                .format(requested=step_selection), )
        steps_set.update(subset)

    return frozenset(steps_set)
 def __new__(cls, pipeline_name, solids, dependency_structure, mode_definition, logger_defs):
     return super(EnvironmentClassCreationData, cls).__new__(
         cls,
         pipeline_name=check.str_param(pipeline_name, 'pipeline_name'),
         solids=check.list_param(solids, 'solids', of_type=Solid),
         dependency_structure=check.inst_param(
             dependency_structure, 'dependency_structure', DependencyStructure
         ),
         mode_definition=check.inst_param(mode_definition, 'mode_definition', ModeDefinition),
         logger_defs=check.dict_param(
             logger_defs, 'logger_defs', key_type=str, value_type=LoggerDefinition
         ),
     )
Exemple #14
0
def parse_solid_selection(pipeline_def, solid_selection):
    """Take pipeline definition and a list of solid selection queries (inlcuding names of solid
        invocations. See syntax examples below) and return a list of the qualified solid names.

    It currently only supports top-level solids.

    Query syntax examples:
    - "some_solid": select "some_solid" itself
    - "*some_solid": select "some_solid" and all ancestors (upstream dependencies)
    - "some_solid*": select "some_solid" and all descendants (downstream dependencies)
    - "*some_solid*": select "some_solid" and all of its ancestors and descendants
    - "+some_solid": select "some_solid" and its ancestors at 1 level up
    - "some_solid+++": select "some_solid" and its descendants within 3 levels down

    Note:
    - If one of the query clauses is invalid, we will skip that one and continue to parse the valid
        ones.

    Args:
        pipeline_def (PipelineDefinition): the pipeline to execute.
        solid_selection (List[str]): a list of the solid selection queries (including single solid
            names) to execute.

    Returns:
        FrozenSet[str]: a frozenset of qualified deduplicated solid names, empty if no qualified
            subset selected.
    """
    check.list_param(solid_selection, "solid_selection", of_type=str)

    graph = generate_dep_graph(pipeline_def)
    traverser = Traverser(graph=graph)
    solids_set = set()

    # loop over clauses
    for clause in solid_selection:
        solids_set.update(clause_to_subset(traverser, graph, clause))

    return frozenset(solids_set)