예제 #1
0
class CacheNode(Node):

    """
    Cache the values that pass through this node.

    This is useful if you have a :class:`~.ChangeListenerNode` and only wish to
    process the updated files. You can put a CacheNode after the processing,
    and all of the results will be passed on.

    ..note::
        The CacheNode handles new and changed files, but if you remove a file
        it will still appear in the output of the CacheNode.

    Parameters
    ----------
    cache : str, optional
        Name of the file to cache data in. By default will cache data in
        memory.
    key : str, optional
        Table name to use inside the ``cache`` file. Must be present if
        ``cache`` is non-None.

    """

    name = "cache"
    outputs = "*"

    def __init__(self, cache=None, key=None):
        super(CacheNode, self).__init__()
        if cache is None:
            self.cache = {}
        elif key is None:
            raise ValueError("If cache is provided, must provide a key")
        else:
            self.cache = SqliteDict(cache, key, autocommit=False, synchronous=0)

    def process(self, default=None, **kwargs):
        if default is not None:
            kwargs["default"] = default
        ret = {}
        for stream, items in six.iteritems(kwargs):
            stream_cache = self.cache.setdefault(stream, OrderedDict())
            for item in items:
                stream_cache[item.fullpath] = item
            ret[stream] = []
            for item in six.itervalues(stream_cache):
                clone = copy.copy(item)
                clone.data = FileDataBlob(clone.data.read())
                ret[stream].append(clone)
            if isinstance(self.cache, SqliteDict):
                self.cache[stream] = stream_cache
                self.cache.commit()
        return ret
예제 #2
0
class ChangeListenerNode(Node):

    """
    Filter source files and detect changes.

    It has two outputs, the default and 'all'. The default output contains only
    the changed files. The 'all' edge will contain all files from the source.

    Parameters
    ----------
    stop : bool, optional
        If True, stop processing the graph if no changes are detected at this
        node (default True)
    cache : str, optional
        Name of the file to cache data in. By default will cache data in
        memory.
    key : str, optional
        Table name to use inside the ``cache`` file. Must be present if
        ``cache`` is non-None.
    fingerprint: str or callable
        Function that takes a file and returns a fingerprint. May also be the
        strings 'md5' or 'mtime', which will md5sum the file or check the
        modification time respectively. (default 'md5')

    """

    name = "change_listener"
    outputs = ("default", "all")

    def __init__(self, stop=True, cache=None, key=None, fingerprint="md5"):
        super(ChangeListenerNode, self).__init__()
        self.stop = stop
        if cache is None:
            self.checksums = {}
        elif key is None:
            raise ValueError("If cache is provided, must provide a key")
        else:
            self.checksums = SqliteDict(cache, key, autocommit=False, synchronous=0)
        if fingerprint == "md5":
            self.fingerprint = self._md5
        elif fingerprint == "mtime":
            self.fingerprint = self._mtime
        else:
            self.fingerprint = fingerprint

    def _md5(self, item):
        """ md5sum a file """
        with item.data.open() as filestream:
            return md5stream(filestream)

    def _mtime(self, item):
        """ Get the modification time of a file """
        return os.path.getmtime(item.fullpath)

    def process(self, stream):
        changed = []
        all_items = []
        for item in stream:
            fingerprint = self.fingerprint(item)
            if fingerprint != self.checksums.get(item.fullpath):
                self.checksums[item.fullpath] = fingerprint
                changed.append(item)
            all_items.append(item)
        if not changed and self.stop:
            raise StopProcessing
        if isinstance(self.checksums, SqliteDict):
            self.checksums.commit()
        return {"default": changed, "all": all_items}