Ejemplo n.º 1
0
def collect_requirements(graph) -> Tuple[iset, iset]:
    """Collect & split datanodes in (possibly overlapping) `needs`/`provides`."""
    operations = list(yield_ops(graph))
    provides = iset(p for op in operations for p in op.provides)
    needs = iset(
        _optionalized(graph, n) for op in operations for n in op.needs)
    provides = iset(provides)
    return needs, provides
Ejemplo n.º 2
0
    def validate(self, inputs: Items = UNSET, outputs: Items = UNSET):
        """
        Scream on invalid inputs, outputs or no operations in graph.

        :param inputs:
            the inputs that this plan was :term:`compile`\\d for, or MORE;
            will scream if LESS...
        :param outputs:
            the outputs that this plan was :term:`compile`\\d for, or LESS;
            will scream if MORE...

        :raises ValueError:
            *Unsolvable graph...*
                if it cannot produce any `outputs` from the given `inputs`.
            *Plan needs more inputs...*
                if given `inputs` mismatched plan's :attr:`needs`.
            *Unreachable outputs...*
                if net cannot produce asked `outputs`.

        """
        if not self.dag:
            raise ValueError(
                f"Unsolvable graph:\n  +--{self.net}"
                f"\n  +--possible inputs: {list(self.net.needs)}"
                f"\n  +--possible outputs: {list(self.net.provides)}"
            )

        if inputs is UNSET:
            inputs = self.needs
        if outputs is UNSET:
            outputs = self.provides

        # Check plan<-->inputs mismatch.
        #
        missing = iset(self.needs) - set(inputs)
        if missing:
            raise ValueError(
                f"Plan needs more inputs: {list(missing)}"
                f"\n  given inputs: {list(inputs)}\n  {self}"
            )

        if outputs:
            unknown = (
                iset(astuple(outputs, "outputs", allowed_types=abc.Sequence))
                - self.provides
            )
            if unknown:
                raise ValueError(
                    f"Unreachable outputs {list(unknown)}\n  for given inputs {list(unknown)}"
                    f"\n for graph: {self}\n  {self}"
                )
Ejemplo n.º 3
0
    def _filter_projects_by_pnames(self, projects, version, *pnames):
        """Separate `version` from `pnames`, scream if unknown pnames."""
        if pnames:
            all_pnames = [prj.pname for prj in projects]
            pnames = iset(pnames)
            unknown_projects = (pnames - iset(all_pnames))
            if unknown_projects:
                raise cmdlets.CmdException(
                    "Unknown project(s): %s\n  Choose from existing one(s): %s"
                    % (', '.join(unknown_projects), ', '.join(all_pnames)))

            projects = [p for p in projects if p.pname in pnames]

        return version, projects
Ejemplo n.º 4
0
    def _inherit_parent_cmd(self, change):
        """ Inherit config-related stuff from up the cmd-chain. """
        if self.parent:
            ## Collect parents, ordered like that:
            #    subapp, self, parent1, ...
            #
            cmd_chain = self.my_cmd_chain()

            ## Collect separately and merge  SPECs separately,
            #  to prepend them before SPECs at the end.
            #
            conf_classes = list(
                itz.concat(cmd.conf_classes for cmd in cmd_chain))

            ## Merge aliases/flags reversed.
            #
            cmd_aliases = dtz.merge(cmd.cmd_aliases for cmd in cmd_chain[::-1])
            cmd_flags = dtz.merge(cmd.cmd_flags for cmd in cmd_chain[::-1])
        else:
            ## We are root.

            cmd_chain = [self]
            conf_classes = list(self.conf_classes)
            cmd_aliases = self.cmd_aliases
            cmd_flags = self.cmd_flags

        cmd_classes = [type(cmd) for cmd in cmd_chain]
        self.classes = list(iset(cmd_classes + conf_classes))
        self.aliases.update(cmd_aliases)
        self.flags.update(cmd_flags)
Ejemplo n.º 5
0
    def load_config_files(self):
        """Load default user-specified overrides config files.


        Config-files in descending orders:

        - user-overrides:
          - :envvar:`<APPNAME>_CONFIG_FILE`, or if not set,
          - :attr:`config_file`;

        - default config-files:
            - ~/.<appname>/<appname>_config.{json,py} and
            - <this-file's-folder>/<appname>_config.{json,py}.
        """
        # Load "standard" configs,
        #      path-list in descending priority order.
        #
        paths = list(iset([default_config_dir(), _mydir]))
        self.load_config_file(default_config_fname(), path=paths)

        # Load "user" configs.
        #
        user_conf_fpaths = self.user_config_fpaths
        for fp in user_conf_fpaths[::-1]:
            cdir, cfname = osp.split(fp)
            self.load_config_file(cfname, path=cdir)
Ejemplo n.º 6
0
def _topo_sort_nodes(dag) -> iset:
    """
    Topo-sort dag by execution order & operation-insertion order to break ties.

    This means (probably!?) that the first inserted win the `needs`, but
    the last one win the `provides` (and the final solution).

    Inform user in case of cycles.
    """
    node_keys = dict(zip(dag.nodes, count()))
    try:
        return iset(nx.lexicographical_topological_sort(dag,
                                                        key=node_keys.get))
    except nx.NetworkXUnfeasible as ex:
        import sys
        from textwrap import dedent

        tb = sys.exc_info()[2]
        msg = dedent(f"""
            {ex}

            TIP:
                Launch a post-mortem debugger, move 3 frames UP, and
                plot the `graphtik.planning.Network' class in `self`
                to discover the cycle.

                If GRAPHTIK_DEBUG enabled, this plot will be stored tmp-folder
                automatically :-)
            """)
        raise nx.NetworkXUnfeasible(msg).with_traceback(tb)
Ejemplo n.º 7
0
def _slices_to_ids(slices, thelist):
    from boltons.setutils import IndexedSet as iset

    all_ids = list(range(len(thelist)))
    mask_ids = iset()
    for aslice in slices:
        mask_ids.update(all_ids[aslice])

    return list(mask_ids)
Ejemplo n.º 8
0
    def __call__(self, *operations):
        """
        Composes a collection of operations into a single computation graph,
        obeying the ``merge`` property, if set in the constructor.

        :param operations:
            Each argument should be an operation instance created using
            ``operation``.

        :return:
            Returns a special type of operation class, which represents an
            entire computation graph as a single operation.
        """
        assert len(operations), "no operations provided to compose"

        # If merge is desired, deduplicate operations before building network
        if self.merge:
            merge_set = iset()  # Preseve given node order.
            for op in operations:
                if isinstance(op, NetworkOperation):
                    netop_nodes = nx.topological_sort(op.net.graph)
                    merge_set.update(s for s in netop_nodes
                                     if isinstance(s, Operation))
                else:
                    merge_set.add(op)
            operations = merge_set

        provides = iset(p for op in operations for p in op.provides)
        # Mark them all as optional, now that #18 calmly ignores
        # non-fully satisfied operations.
        needs = iset(optional(n) for op in operations
                     for n in op.needs) - provides

        # Build network
        net = Network()
        for op in operations:
            net.add_op(op)

        return NetworkOperation(name=self.name,
                                needs=needs,
                                provides=provides,
                                params={},
                                net=net)
Ejemplo n.º 9
0
def _glob_find_files(pattern_pairs: Tuple[str, str], mybase: Path):
    from boltons.setutils import IndexedSet as iset

    files = iset()
    notfiles = set()  # type: ignore
    for positive, negative in pattern_pairs:
        if positive:
            new_files = iset(mybase.glob(positive))
            cleared_files = [
                f for f in new_files
                if not any(nf in f.parents for nf in notfiles)
            ]
            files.update(cleared_files)
        elif negative:
            new_notfiles = mybase.glob(negative)
            notfiles.update(new_notfiles)
        else:
            raise AssertionError("Both in (positive, negative) pair are None!")

    return files
Ejemplo n.º 10
0
    def collect_fpaths(self, path_list):
        """
        Collects all (``.json|.py``) files present in the `path_list`, (descending order).

        :param path_list:
            A list of paths (absolute, relative, dir or folders).
        :type path_list:
            List[Text]
        :return:
            fully-normalized paths, with ext
        """
        collected_paths = self.collected_paths = iset()
        cfg_exts = self.supported_cfg_extensions

        def try_file_extensions(basepath):
            loaded_any = False
            for ext in cfg_exts:
                f = fu.ensure_file_ext(basepath, ext)
                if f in collected_paths:
                    continue

                loaded = osp.isfile(f)
                self.visit_file(f, loaded=loaded)
                loaded_any |= loaded

            ## Load any files in `conf.d/`, alphabetically-sorted.
            #
            for ext in ('', ) + cfg_exts:
                if basepath.endswith(ext):
                    conf_d = fu.ensure_file_ext(basepath.rstrip(ext), '.d')
                    if os.path.isdir(conf_d):
                        for f in sorted(os.listdir(conf_d)):
                            loaded = f.endswith(cfg_exts)
                            self.visit_file(osp.join(conf_d, f), loaded=loaded)
                            loaded_any |= loaded

            return loaded_any

        def _derive_config_fpaths(
                path):  # -> List[Text]:  TODO: enable cmdlet typing comments
            """Return multiple *existent* fpaths for each config-file path (folder/file)."""

            p = fu.convpath(path)
            loaded_any = try_file_extensions(p)
            ## Do not strip ext if has matched WITH ext.
            if not loaded_any:
                try_file_extensions(osp.splitext(p)[0])

        for cf in path_list:
            _derive_config_fpaths(cf)

        return list(collected_paths)
Ejemplo n.º 11
0
def _process_dependencies(
    deps: Collection[str], ) -> Tuple[Collection[str], Collection[str]]:
    """
    Strip or singularize any :term:`implicit`/:term:`sideffects` and apply CWD.

    :param cwd:
        The :term:`current-working-document`, when given, all non-root `dependencies`
        (`needs`, `provides` & `aliases`) become :term:`jsonp`\\s, prefixed with this.

    :return:
        a x2 tuple ``(op_deps, fn_deps)``, where any instances of
        :term:`sideffects` in `deps` are processed like this:

        `op_deps`
            - any :func:`.sfxed` is replaced by a sequence of ":func:`singularized
              <.dep_singularized>`" instances, one for each item in its
              :term:`sfx_list`;
            - any duplicates are discarded;
            - order is irrelevant, since they don't reach the function.

        `fn_deps`
            - the dependencies consumed/produced by underlying functions, in the order
              they are first met.  In particular, it replaces any :func:`.sfxed`
              by the :func:`stripped <.dep_stripped>`, unless ...
            - it had been declared as :term:`implicit`, in which case, it is discared;
            - any :func:`.sfx` are simply dropped.
    """

    #: The dedupe  any `sideffected`.
    seen_sideffecteds = set()

    def as_fn_deps(dep):
        """Strip and dedupe any sfxed, drop any sfx and implicit. """
        if is_implicit(dep):  # must ignore also `sfxed`s
            pass
        elif is_sfxed(dep):
            dep = dep_stripped(dep)
            if not dep in seen_sideffecteds:
                seen_sideffecteds.add(dep)
                return (dep, )
        elif not is_sfx(dep):  # must kick after `sfxed`
            return (dep, )
        return ()

    assert deps is not None

    if deps:
        op_deps = iset(nn for n in deps for nn in dep_singularized(n))
        fn_deps = tuple(nn for n in deps for nn in as_fn_deps(n))
        return op_deps, fn_deps
    else:
        return deps, deps
Ejemplo n.º 12
0
    def _autodiscover_project_basepaths(self) -> Dict[str, Path]:
        """
        Invoked when no config exists (or asked to updated it) to guess projects.

        :return:
            a mapping of {pnames: basepaths}
        """
        from . import engrave

        if not self.autodiscover_subproject_projects:
            raise cmdlets.CmdException(
                "No `Polyvers.autodiscover_subproject_projects` param given!")

        fproc = engrave.FileProcessor(parent=self)
        with self.errlogged(doing='discovering project paths',
                            info_log=self.log.info):
            scan_projects = self.autodiscover_subproject_projects
            #: Dict[Path,
            #: List[Tuple[pvproject.Project, Engrave, Graft, List[Match]]]]
            match_map = fproc.scan_projects(scan_projects)

        ## Accept projects only if one, and only one,
        #  pair (pname <--> path) matched.
        #
        pname_path_pairs: List[Tuple[str, Path]] = [
            (match.groupdict()['pname'].decode('utf-8'),
             fpath.parent / (prj.basepath or '.'))
            for fpath, mqruples in match_map.items()
            for prj, _eng, _graft, match in mqruples
        ]
        unique_pname_paths = iset(pname_path_pairs)

        ## check basepath conflicts.
        #
        projects: Dict[str, Path] = {}
        dupe_projects: Dict[str, Set[Path]] = defaultdict(set)
        for pname, basepath in unique_pname_paths:
            dupe_basepath = projects.get(pname)
            if dupe_basepath and dupe_basepath != basepath:
                dupe_projects[pname].add(basepath)
            else:
                projects[pname] = basepath

        if dupe_projects:
            raise cmdlets.CmdException(
                "Discovered conflicting project-basepaths: %s" %
                yu.ydumps(dupe_basepath))

        return projects
Ejemplo n.º 13
0
    def operation_executed(self, op, outputs):
        """
        Invoked once per operation, with its results.

        It will update :attr:`executed` with the operation status and
        if `outputs` were partials, it will update :attr:`canceled`
        with the unsatisfied ops downstream of `op`.

        :param op:
            the operation that completed ok
        :param outputs:
            The named values the op` actually produced,
            which may be a subset of its `provides`.  Sideffects are not considered.

        """

        def collect_canceled_sideffects(dep, val) -> Collection:
            """yield any sfx `dep` with falsy value, singularizing sideffected."""
            if val or not is_sfx(dep):
                return ()
            return dep_singularized(dep)

        self._populate_op_layer_with_outputs(op, outputs)
        if first_solid(self.is_reschedule, getattr(op, "rescheduled", None)):
            ## Find which provides have been broken?
            #
            # OPTIMIZE: could use _fn_provides
            missing_outs = iset(op.provides) - set(outputs)
            sfx = (out for out in missing_outs if is_sfx(out))
            canceled_sideffects = [
                sf
                for k, v in outputs.items()
                for sf in collect_canceled_sideffects(k, v)
            ]
            outs_to_break = (missing_outs - sfx) | canceled_sideffects
            log.info(
                "... (%s) missing partial outputs %s from rescheduled %s.",
                self.solid,
                list(outs_to_break),
                op,
            )

            if outs_to_break:
                dag = self.dag
                dag.remove_edges_from((op, out) for out in outs_to_break)
                self._reschedule(dag, "rescheduled", op)
                # list used by `check_if_incomplete()`
                self.broken[op] = outs_to_break
Ejemplo n.º 14
0
 def __init__(
     self,
     *,
     excludes: Iterable[_FnKey] = None,
     base_modules: Iterable[Union[ModuleType, str]] = None,
     predicate: Callable[[Any], bool] = None,
     include_methods=False,
     sep=None,
 ):
     super().__init__(sep)
     if include_methods is not None:
         self.include_methods = bool(include_methods)
     self._seen: Set[int] = set()
     self.excludes = set(excludes or ())
     self.base_modules = iset(sys.modules[m] if isinstance(m, str) else m
                              for m in (base_modules or ()))
     self.predicate = predicate
     self.collected = []
Ejemplo n.º 15
0
    def yield_files(self, *fpaths):
        """
        :return:
            a 2 tuple `(fpath, file_text)`
        """

        import io
        import os
        from boltons.setutils import IndexedSet as iset

        fpaths = iset(fpaths) or ['-']
        for fpath in fpaths:
            if fpath == '-':
                msg = "Reading STDIN."
                if getattr(sys.stdin, 'isatty', lambda: False)():
                    msg += ("..paste text, then [Ctrl+%s] to exit!" %
                            'Z' if sys.platform == 'win32' else 'D')
                self.log.info(msg)
                text = sys.stdin.read()
                yield "<STDIN: %i-chars>" % len(text), text
            else:
                fpath = convpath(fpath, abs_path=False)
                if osp.exists(fpath):
                    afpath = convpath(fpath, abs_path=True)
                    if osp.exists(afpath):
                        fpath = afpath
                else:
                    self.log.error(
                        "File to read '%s' not found!"
                        "\n  CWD: %s", fpath, os.curdir)
                    continue

                try:
                    with io.open(fpath, 'rt') as fin:
                        text = fin.read()

                    yield fpath, text
                except Exception as ex:
                    self.log.error(
                        "Reading file-path '%s' failed due to: %r",
                        fpath,
                        ex,
                        exc_info=self.verbose)  # WARN: from `cmdlets.Spec`
                    continue
Ejemplo n.º 16
0
 def check_if_incomplete(self) -> Optional[IncompleteExecutionError]:
     """Return a :class:`IncompleteExecutionError` if `pipeline` operations failed/canceled. """
     failures = {
         op: ex for op, ex in self.executed.items() if isinstance(ex, Exception)
     }
     incomplete = iset(chain(self.canceled, failures.keys()))
     if incomplete:
         incomplete = list(yield_node_names(incomplete))
         partial_msgs = {
             f"\n  +--{op.name}: {list(pouts)}" for op, pouts in self.broken.items()
         }
         err_msgs = [
             f"\n  +--{op.name}: {type(ex).__name__}('{ex}')"
             for op, ex in failures.items()
         ]
         msg = (
             f"Not completed x{len(incomplete)} operations {list(incomplete)}"
             f" due to x{len(failures)} failures and x{len(partial_msgs)} partial-ops:"
             f"{''.join(err_msgs)}{''.join(partial_msgs)}"
         )
         return IncompleteExecutionError(msg, self)
Ejemplo n.º 17
0
def collect_gpgs():
    inc_errors = 1
    gpg_kws = {}
    gpg_paths = iset(
        itt.chain.from_iterable(pndlu.where(prog) for prog in ('gpg2', 'gpg')))
    gnupghome = osp.expanduser('~/.gnupg')
    gpg_avail = []
    for gpg_path in gpg_paths:
        try:
            gpg = gnupg.GPG(gpgbinary=gpg_path, **gpg_kws)
            row = _describe_gpg(gpg)
        except Exception as ex:
            #raise
            if inc_errors:
                row = (gpg_path, '%s: %s' % (type(ex).__name__, str(ex)), None,
                       None)
            else:
                continue
        gpg_avail.append(row)

    cols = ['GnuPG path', 'Version', '#PRIV', '#TOTAL']
    gpg_avail = pd.DataFrame(gpg_avail, columns=cols)
    return gpg_avail
Ejemplo n.º 18
0
def inputs_for_recompute(
    graph,
    inputs: Sequence[str],
    recompute_from: Sequence[str],
    recompute_till: Sequence[str] = None,
) -> Tuple[iset, iset]:
    """
    Clears the inputs between `recompute_from >--<= recompute_till` to clear.

    :param graph:
        MODIFIED, at most 2 helper nodes inserted
    :param inputs:
        a sequence
    :param recompute_from:
        None or a sequence, including any out-of-graph deps (logged))
    :param recompute_till:
        (optional) a sequence, only in-graph deps.

    :return:
        a 2-tuple with the reduced `inputs` by the dependencies that must
        be removed from the graph to recompute (along with those dependencies).

    It works by temporarily adding x2 nodes to find and remove the intersection of::

        strict-descendants(recompute_from) & ancestors(recompute_till)

    FIXME: merge recompute() with travesing unsatisfied (see ``test_recompute_NEEDS_FIX``)
    bc it clears inputs of unsatisfied ops (cannot be replaced later)
    """
    START, STOP = "_TMP.RECOMPUTE_FROM", "_TMP.RECOMPUTE_TILL"

    deps = set(yield_datanodes(graph.nodes))
    recompute_from = iset(recompute_from)  # traversed in logs
    inputs = iset(inputs)  # returned
    bad = recompute_from - deps
    if bad:
        log.info("... ignoring unknown `recompute_from` dependencies: %s",
                 list(bad))
        recompute_from = recompute_from & deps  # avoid sideffect in `recompute_from`
    assert recompute_from, f"Given unknown-only `recompute_from` {locals()}"

    graph.add_edges_from((START, i) for i in recompute_from)

    # strictly-downstreams from START
    between_deps = iset(nx.descendants(graph, START)) & deps - recompute_from

    if recompute_till:
        graph.add_edges_from(
            (i, STOP) for i in recompute_till)  # edge reversed!

        # upstreams from STOP
        upstreams = set(nx.ancestors(graph, STOP)) & deps
        between_deps &= upstreams

    recomputes = between_deps & inputs
    new_inputs = iset(inputs) - recomputes

    if log.isEnabledFor(logging.DEBUG):
        log.debug(
            "... recompute x%i data%s means deleting x%i inputs%s, to arrive from x%i %s -> x%i %s.",
            len(between_deps),
            list(between_deps),
            len(recomputes),
            list(recomputes),
            len(inputs),
            list(inputs),
            len(new_inputs),
            list(new_inputs),
        )

    return new_inputs, recomputes
Ejemplo n.º 19
0
def reparse_operation_data(
    name,
    needs,
    provides,
    aliases=(),
    cwd: Sequence[str] = None
) -> Tuple[str, Collection[str], Collection[str], Collection[Tuple[str, str]]]:
    """
    Validate & reparse operation data as lists.

    :return:
        name, needs, provides, aliases

    As a separate function to be reused by client building operations,
    to detect errors early.
    """
    from .jsonpointer import jsonp_path

    if name is not None and not isinstance(name, str):
        raise TypeError(f"Non-str `name` given: {name}")

    cwd_parts = jsonp_path(cwd) if cwd else ()

    # Allow single string-value for needs parameter
    needs = astuple(needs, "needs", allowed_types=cabc.Collection)
    if not all(isinstance(i, str) for i in needs):
        raise TypeError(f"All `needs` must be str, got: {needs!r}")
    needs = jsonp_ize_all(needs, cwd_parts)

    # Allow single value for provides parameter
    provides = astuple(provides, "provides", allowed_types=cabc.Collection)
    if not all(isinstance(i, str) for i in provides):
        raise TypeError(f"All `provides` must be str, got: {provides!r}")
    provides = jsonp_ize_all(provides, cwd_parts)

    aliases = as_renames(aliases, "aliases")
    if aliases:
        ## Sanity checks, or `jsonp_ize_all()` would fail.
        #
        if not all(
                src and isinstance(src, str) and dst and isinstance(dst, str)
                for src, dst in aliases):
            raise TypeError(
                f"All `aliases` must be non-empty str, got: {aliases!r}")

        # XXX: Why jsonp_ize here? (and not everywhere, or nowhere in fnop?)
        aliases = [(prefixed(src, cwd_parts), prefixed(dst, cwd_parts))
                   for src, dst in aliases]

        if any(1 for src, dst in aliases if dst in provides):
            bad = ", ".join(f"{src} -> {dst}" for src, dst in aliases
                            if dst in provides)
            raise ValueError(
                f"The `aliases` [{bad}] clash with existing provides in {list(provides)}!"
            )

        aliases_src = iset(src for src, _dst in aliases)
        all_provides = iset(provides) | (dep_stripped(d) for d in provides)

        if not aliases_src <= all_provides:
            bad_alias_sources = aliases_src - all_provides
            bad_aliases = ", ".join(f"{src!r}-->{dst!r}"
                                    for src, dst in aliases
                                    if src in bad_alias_sources)
            raise ValueError(
                f"The `aliases` [{bad_aliases}] rename non-existent provides in {list(all_provides)}!"
            )
        sfx_aliases = [
            f"{src!r} -> {dst!r}" for src, dst in aliases
            if is_pure_sfx(src) or is_pure_sfx(dst)
        ]
        if sfx_aliases:
            raise ValueError(
                f"The `aliases` must not contain `sideffects` {sfx_aliases}"
                "\n  Simply add any extra `sideffects` in the `provides`.")
        implicit_aliases = [
            f"{'<implicit>' if bad_src else ''}{src!r} -> "
            f"{dst!r}{'<implicit>' if bad_dst else ''}" for src, dst in aliases
            for bad_src in [
                is_implicit(src) or any(
                    is_implicit(i) for i in provides if i == src)
            ] for bad_dst in [is_implicit(dst)] if bad_src or bad_dst
        ]
        if implicit_aliases:
            raise ValueError(
                f"The `aliases` must not contain `implicits`: {implicit_aliases}"
                "\n  Simply add any extra `implicits` in the `provides`.")

    return name, needs, provides, aliases
Ejemplo n.º 20
0
def build_network(
    operations,
    rescheduled=None,
    endured=None,
    parallel=None,
    marshalled=None,
    node_props=None,
    renamer=None,
    excludes=None,
):
    """
    The :term:`network` factory that does :term:`operation merging` before constructing it.

    :param nest:
        see same-named param in :func:`.compose`
    """
    kw = {
        k: v
        for k, v in locals().items()
        if v is not None and k not in ("operations", "excludes")
    }

    def proc_op(op, parent=None):
        """clone FuncOperation with certain props changed"""
        ## Convey any node-props specified in the pipeline here
        #  to all sub-operations.
        #
        if kw:
            op_kw = kw.copy()

            if node_props:
                op_kw["node_props"] = {**op.node_props, **node_props}

            if callable(renamer):

                def parent_wrapper(ren_args: RenArgs) -> str:
                    # Provide RenArgs.parent.
                    return renamer(ren_args._replace(parent=parent))

                op_kw["renamer"] = parent_wrapper
            op = op.withset(**op_kw)

        ## Last minute checks, couldn't check earlier due to builder pattern.
        #
        if hasattr(op, "fn"):
            op.validate_fn_name()
        if not op.provides:
            TypeError(f"`provides` must not be empty!")

        return op

    merge_set = iset()  # Preseve given node order.
    for op in operations:
        if isinstance(op, Pipeline):
            merge_set.update(proc_op(s, op) for s in op.ops)
        else:
            merge_set.add(proc_op(op))

    if excludes is not None:
        excludes = {
            op
            for op in merge_set if op in asset(excludes, "excludes")
        }
        if excludes:
            merge_set = [op for op in merge_set if op not in excludes]
            log.info("Compose excluded %i operations %s.", len(excludes),
                     excludes)

    assert all(bool(n) for n in merge_set)

    from .planning import Network  # Imported here not to affect locals() at the top.

    return Network(*merge_set)
Ejemplo n.º 21
0
    def _zip_results_returns_dict(self, results, is_rescheduled) -> dict:
        if hasattr(results, "_asdict"):  # named tuple
            results = results._asdict()
        elif isinstance(results, cabc.Mapping):
            pass
        elif hasattr(results, "__dict__"):  # regular object
            results = vars(results)
        else:
            raise ValueError(
                "Expected results as mapping, named_tuple, object, "
                f"got {type(results).__name__!r}: {results}\n  {self}"
                f"\n  {debug_var_tip}")

        fn_required = self._fn_provides
        if fn_required:
            renames = {get_keyword(i): i
                       for i in fn_required}  # +1 useless key: None
            renames.pop(None, None)
            fn_expected = fn_required = [
                get_keyword(i) or i for i in fn_required
            ]
        else:
            fn_expected = fn_required = renames = ()

        if is_rescheduled:
            # Canceled sfx(ed) are welcomed.
            fn_expected = iset(
                [*fn_expected, *(i for i in self.provides if is_sfx(i))])

        res_names = results.keys()

        ## Clip unknown outputs (handy for reuse).
        #
        unknown = [i for i in (res_names - fn_expected) if not is_pure_sfx(i)]
        if unknown:
            unknown = list(unknown)
            log.warning(
                "Results%s contained +%i unknown provides%s - will DELETE them!\n  %s",
                list(res_names),
                len(unknown),
                list(unknown),
                self,
            )
            # Filter results, don't mutate them.
            # NOTE: too invasive when no-evictions!?
            results = {k: v for k, v in results.items() if k not in unknown}

        missmatched = fn_required - res_names
        if missmatched:
            if is_rescheduled:
                log.warning("... Op %r did not provide%s", self.name,
                            list(missmatched))
            else:
                raise ValueError(
                    f"Got x{len(results)} results({list(results)}) mismatched "
                    f"-{len(missmatched)} provides({list(fn_expected)}):"
                    f" {list(missmatched)}\n  {self}\n  {debug_var_tip}")

        if renames:
            results = {renames.get(k, k): v for k, v in results.items()}

        return results
Ejemplo n.º 22
0
    def _build_execution_steps(self, dag, inputs, outputs):
        """
        Create the list of operation-nodes & *instructions* evaluating all

        operations & instructions needed a) to free memory and b) avoid
        overwritting given intermediate inputs.

        :param dag:
            The original dag, pruned; not broken.
        :param outputs:
            outp-names to decide whether to add (and which) evict-instructions

        Instances of :class:`_EvictInstructions` are inserted in `steps` between
        operation nodes to reduce the memory footprint of solutions while
        the computation is running.
        An evict-instruction is inserted whenever a *need* is not used
        by any other *operation* further down the DAG.
        """

        steps = []

        # create an execution order such that each layer's needs are provided.
        ordered_nodes = iset(nx.topological_sort(dag))

        # Add Operations evaluation steps, and instructions to free and "pin"
        # data.
        for i, node in enumerate(ordered_nodes):

            if isinstance(node, _DataNode):
                if node in inputs and dag.pred[node]:
                    # Add a pin-instruction only when there is another operation
                    # generating this data as output.
                    steps.append(_PinInstruction(node))

            elif isinstance(node, Operation):
                steps.append(node)

                # Keep all values in solution if not specific outputs asked.
                if not outputs:
                    continue

                # Add instructions to evict predecessors as possible.  A
                # predecessor may be evicted if it is a data placeholder that
                # is no longer needed by future Operations.
                # It shouldn't make a difference if it were the broken dag
                # bc these are preds of data (provides), and we scan here
                # preds of ops (need).
                for need in dag.pred[node]:
                    log.debug("checking if node %s can be evicted", need)
                    for future_node in ordered_nodes[i + 1 :]:
                        if (
                            isinstance(future_node, Operation)
                            and need in future_node.needs
                        ):
                            break
                    else:
                        if need not in outputs:
                            log.debug("  adding evict-instruction for %s", need)
                            steps.append(_EvictInstruction(need))

            else:
                raise AssertionError("Unrecognized network graph node %r" % node)

        return steps
Ejemplo n.º 23
0
    def _prune_graph(self,
                     inputs: Items,
                     outputs: Items,
                     predicate: NodePredicate = None
                     ) -> Tuple[nx.DiGraph, Tuple, Tuple, OpMap]:
        """
        Determines what graph steps need to run to get to the requested
        outputs from the provided inputs:
        - Eliminate steps that are not on a path arriving to requested outputs;
        - Eliminate unsatisfied operations: partial inputs or no outputs needed;
        - consolidate the list of needs & provides.

        :param inputs:
            The names of all given inputs.
        :param outputs:
            The desired output names.  This can also be ``None``, in which
            case the necessary steps are all graph nodes that are reachable
            from the provided inputs.
        :param predicate:
            the :term:`node predicate` is a 2-argument callable(op, node-data)
            that should return true for nodes to include; if None, all nodes included.

        :return:
            a 4-tuple:

            - the *pruned* :term:`execution dag`,
            - net's needs & outputs based on the given inputs/outputs and the net
              (may overlap, see :func:`collect_requirements()`),
            - an {op, prune-explanation} dictionary


            Use the returned `needs/provides` to build a new plan.

        :raises ValueError:
            - if `outputs` asked do not exist in network, with msg:

                *Unknown output nodes: ...*
        """
        # TODO: break cycles based on weights here.
        dag = self.graph

        ##  When `inputs` is None, we have to keep all possible input nodes
        #   and this is achieved with 2 tricky locals:
        #
        #   inputs
        #       it is kept falsy, to disable the edge-breaking, so that
        #       the ascending_from_outputs that follows can reach all input nodes;
        #       including intermediate ones;
        #   satisfied_inputs
        #       it is filled with all possible input nodes, to trick `unsatisfied_operations()`
        #       to assume their operations are satisfied, and keep them.
        #
        if inputs is None and outputs is None:
            satisfied_inputs, outputs = self.needs, self.provides
        else:
            if inputs is None:  # outputs: NOT None
                satisfied_inputs = self.needs - outputs
            else:  # inputs: NOT None, outputs: None
                # Just ignore `inputs` not in the graph.
                satisfied_inputs = inputs = iset(inputs) & dag.nodes

            ## Scream on unknown `outputs`.
            #
            if outputs:
                unknown_outputs = iset(outputs) - dag.nodes
                if unknown_outputs:
                    raise ValueError(
                        f"Unknown output nodes: {list(unknown_outputs)}\n  {self}"
                        "\n  (tip: set GRAPHTIK_DEBUG envvar to view Op details in print-outs)"
                    )

        assert isinstance(satisfied_inputs, abc.Collection)
        assert inputs is None or isinstance(inputs, abc.Collection)
        assert outputs is None or isinstance(outputs, abc.Collection)

        broken_dag = dag.copy()  # preserve net's graph

        if predicate:
            self._apply_graph_predicate(broken_dag, predicate)

        # Break the incoming edges to all given inputs.
        #
        # Nodes producing any given intermediate inputs are unnecessary
        # (unless they are also used elsewhere).
        # To discover which ones to prune, we break their incoming edges
        # and they will drop out while collecting ancestors from the outputs.
        #
        if inputs:
            for n in inputs:
                # Coalesce to a list, to avoid concurrent modification.
                broken_dag.remove_edges_from(
                    list((src, dst) for src, dst, subdoc in
                         broken_dag.in_edges(n, data="subdoc") if not subdoc))

        comments: OpMap = {}

        # Drop stray input values and operations (if any).
        if outputs is not None:
            ## If caller requested specific outputs, we can prune any
            #  unrelated nodes further up the dag.
            #
            ending_in_outputs = set()
            for out in yield_chaindocs(dag, outputs, ending_in_outputs):
                # TODO: speedup prune-by-outs with traversing code
                ending_in_outputs.update(nx.ancestors(broken_dag, out))
                ending_in_outputs.add(out)
            # Clone it, to modify it, or BUG@@ much later (e.g in eviction planing).
            broken_dag = broken_dag.subgraph(ending_in_outputs).copy()

            irrelevant_ops = [
                op for op in yield_ops(dag) if op not in ending_in_outputs
            ]
            if irrelevant_ops:
                comments.update(
                    (op, "outputs-irrelevant") for op in irrelevant_ops)
                log.info(
                    "... dropping output-irrelevant ops%s.\n    +--outputs: %s",
                    irrelevant_ops,
                    outputs,
                )

        # Prune unsatisfied operations (those with partial inputs or no outputs).
        unsatisfied, sorted_nodes = unsatisfied_operations(
            broken_dag, satisfied_inputs)
        comments.update(unsatisfied)

        # Clone it, to modify it.
        pruned_dag = dag.subgraph(broken_dag.nodes - unsatisfied).copy()
        ## Clean unlinked data-nodes (except those both given & asked).
        #
        unlinked_data = set(nx.isolates(pruned_dag))
        if outputs is not None:
            # FIXME: must cast to simple set due to mahmoud/boltons#252 (boltons < v20.1)
            unlinked_data -= set(satisfied_inputs & outputs)
        pruned_dag.remove_nodes_from(unlinked_data)

        inputs = iset(
            _optionalized(pruned_dag, n) for n in satisfied_inputs
            if n in pruned_dag)
        if outputs is None:
            outputs = iset(
                n for n in self.provides
                if n not in inputs and n in pruned_dag and not is_sfx(n))
        else:
            # filter-out from new `provides` if pruned.
            outputs = iset(n for n in outputs if n in pruned_dag)

        assert inputs is not None or isinstance(inputs, abc.Collection)
        assert outputs is not None or isinstance(outputs, abc.Collection)

        return pruned_dag, sorted_nodes, tuple(inputs), tuple(
            outputs), comments
Ejemplo n.º 24
0
    def __init__(
        self,
        fn: Callable = None,
        name=None,
        needs: Items = None,
        provides: Items = None,
        aliases: Mapping = None,
        *,
        cwd=None,
        rescheduled=None,
        endured=None,
        parallel=None,
        marshalled=None,
        returns_dict=None,
        node_props: Mapping = None,
    ):
        """
        Build a new operation out of some function and its requirements.

        See :func:`.operation` for the full documentation of parameters,
        study the code for attributes (or read them from  rendered sphinx site).
        """
        from .jsonpointer import jsonp_path

        super().__init__()
        node_props = node_props = node_props if node_props else {}

        if fn and not callable(fn):
            raise TypeError(
                f"Operation was provided with a non-callable: {fn}")
        if node_props is not None and not isinstance(node_props, cabc.Mapping):
            raise TypeError(
                f"Operation `node_props` must be a dict, was {type(node_props).__name__!r}: {node_props}"
            )

        if name is None and fn:
            name = func_name(fn, None, mod=0, fqdn=0, human=0, partials=1)
        ## Overwrite reparsed op-data.
        name, needs, provides, aliases = reparse_operation_data(
            name, needs, provides, aliases, cwd)

        user_needs, user_provides = needs, provides
        needs, _fn_needs = _process_dependencies(needs)
        provides, _fn_provides = _process_dependencies(provides)
        alias_dst = aliases and tuple(dst for _src, dst in aliases)
        provides = iset((*provides, *alias_dst))

        # TODO: enact conveyor fn if varargs in the outputs.
        if fn is None and name and len(_fn_needs) == len(_fn_provides):
            log.debug(
                "Auto-setting conveyor identity function on op(%s) for needs(%s) --> provides(%s)",
                name,
                needs,
                provides,
            )
            fn = identity_fn

        #: The :term:`operation`'s underlying function.
        self.fn = fn
        #: a name for the operation (e.g. `'conv1'`, `'sum'`, etc..);
        #: any "parents split by dots(``.``)".
        #: :seealso: :ref:`operation-nesting`
        self.name = name

        #: Fake function attributes.
        #:
        if fn:
            update_wrapper(
                self,
                fn,
                assigned=("__module__", "__doc__", "__annotations__"),
                updated=(),
            )
        self.__name__ = name
        qname = getattr(fn, "__qualname__", None) or name
        if qname:
            # "ab.cd" => "ab.NAME", "ab" => "NAME", "" => "NAME"
            qname = ".".join((*qname.split(".")[:-1], name))
        self.__qualname__ = qname

        #: Dependencies ready to lay the graph for :term:`pruning`
        #: (NO-DUPES, SFX, SINGULAR :term:`sideffected`\s).
        self.needs = needs
        #: The :term:`needs` as given by the user, stored for *builder pattern*
        #: to work.
        self._user_needs = user_needs
        #: Value names the underlying function requires
        #: (DUPES preserved, NO-SFX, STRIPPED :term:`sideffected`).
        self._fn_needs = _fn_needs

        #: Value names ready to lay the graph for :term:`pruning`
        #: (NO DUPES, ALIASES, SFX, SINGULAR sideffecteds, +alias destinations).
        self.provides = provides
        #: The :term:`provides` as given by the user, stored for *builder pattern*
        #: to work.
        self._user_provides = user_provides
        #: Value names the underlying function produces
        #: (DUPES, NO-ALIASES, NO_SFX, STRIPPED :term:`sideffected`).
        self._fn_provides = _fn_provides

        #: an optional mapping of `fn_provides` to additional ones, together
        #: comprising this operations the `provides`.
        #:
        #: You cannot alias an :term:`alias`.
        self.aliases = aliases
        #: The :term:`current-working-document`, when defined, all non-root `dependencies`
        # become :term:`jsonp` and are prefixed with this.
        self.cwd = cwd
        #: If true, underlying *callable* may produce a subset of `provides`,
        #: and the :term:`plan` must then :term:`reschedule` after the operation
        #: has executed.  In that case, it makes more sense for the *callable*
        #: to `returns_dict`.
        self.rescheduled = rescheduled
        #: If true, even if *callable* fails, solution will :term:`reschedule`;
        #: ignored if :term:`endurance` enabled globally.
        self.endured = endured
        #: execute in (deprecated) :term:`parallel`
        self.parallel = parallel
        #: If true, operation will be :term:`marshalled <marshalling>` while computed,
        #: along with its `inputs` & `outputs`.
        #: (usefull when run in (deprecated) `parallel` with a :term:`process pool`).
        self.marshalled = marshalled
        #: If true, it means the underlying function :term:`returns dictionary` ,
        #: and no further processing is done on its results,
        #: i.e. the returned output-values are not zipped with `provides`.
        #:
        #: It does not have to return any :term:`alias` `outputs`.
        #:
        #: Can be changed amidst execution by the operation's function.
        self.returns_dict = returns_dict
        #: Added as-is into NetworkX graph, and you may filter operations by
        #: :meth:`.Pipeline.withset()`.
        #: Also plot-rendering affected if they match `Graphviz` properties,
        #: if they start with :data:`.USER_STYLE_PREFFIX`,
        #: unless they start with underscore(``_``).
        self.node_props = node_props
Ejemplo n.º 25
0
    def _prune_graph(self, outputs, inputs):
        """
        Determines what graph steps need to run to get to the requested
        outputs from the provided inputs. :
        - Eliminate steps that are not on a path arriving to requested outputs.
        - Eliminate unsatisfied operations: partial inputs or no outputs needed.

        :param iterable outputs:
            A list of desired output names.  This can also be ``None``, in which
            case the necessary steps are all graph nodes that are reachable
            from one of the provided inputs.

        :param iterable inputs:
            The inputs names of all given inputs.

        :return:
            the *pruned_dag*
        """
        dag = self.graph

        # Ignore input names that aren't in the graph.
        graph_inputs = set(dag.nodes) & set(inputs)  # unordered, iterated, but ok

        # Scream if some requested outputs aren't in the graph.
        unknown_outputs = iset(outputs) - dag.nodes
        if unknown_outputs:
            raise ValueError(
                "Unknown output node(s) asked: %s" % ", ".join(unknown_outputs)
            )

        broken_dag = dag.copy()  # preserve net's graph

        # Break the incoming edges to all given inputs.
        #
        # Nodes producing any given intermediate inputs are unecessary
        # (unless they are also used elsewhere).
        # To discover which ones to prune, we break their incoming edges
        # and they will drop out while collecting ancestors from the outputs.
        broken_edges = set()  # unordered, not iterated
        for given in graph_inputs:
            broken_edges.update(broken_dag.in_edges(given))
        broken_dag.remove_edges_from(broken_edges)

        # Drop stray input values and operations (if any).
        broken_dag.remove_nodes_from(list(nx.isolates(broken_dag)))

        if outputs:
            # If caller requested specific outputs, we can prune any
            # unrelated nodes further up the dag.
            ending_in_outputs = set()
            for output_name in outputs:
                ending_in_outputs.add(_DataNode(output_name))
                ending_in_outputs.update(nx.ancestors(dag, output_name))
            broken_dag = broken_dag.subgraph(ending_in_outputs)

        # Prune unsatisfied operations (those with partial inputs or no outputs).
        unsatisfied = self._collect_unsatisfied_operations(broken_dag, inputs)
        # Clone it so that it is picklable.
        pruned_dag = dag.subgraph(broken_dag.nodes - unsatisfied).copy()

        assert all(
            isinstance(n, (Operation, _DataNode)) for n in pruned_dag
        ), pruned_dag

        return pruned_dag, broken_edges