Exemple #1
0
class ChangeListenerNode(Node):

    """
    Filter source files and detect changes.

    It has two outputs, the default and 'all'. The default output contains only
    the changed files. The 'all' edge will contain all files from the source.

    Parameters
    ----------
    stop : bool, optional
        If True, stop processing the graph if no changes are detected at this
        node (default True)
    cache : str, optional
        Name of the file to cache data in. By default will cache data in
        memory.
    key : str, optional
        Table name to use inside the ``cache`` file. Must be present if
        ``cache`` is non-None.
    fingerprint: str or callable
        Function that takes a file and returns a fingerprint. May also be the
        strings 'md5' or 'mtime', which will md5sum the file or check the
        modification time respectively. (default 'md5')

    """

    name = "change_listener"
    outputs = ("default", "all")

    def __init__(self, stop=True, cache=None, key=None, fingerprint="md5"):
        super(ChangeListenerNode, self).__init__()
        self.stop = stop
        if cache is None:
            self.checksums = {}
        elif key is None:
            raise ValueError("If cache is provided, must provide a key")
        else:
            self.checksums = SqliteDict(cache, key, autocommit=False, synchronous=0)
        if fingerprint == "md5":
            self.fingerprint = self._md5
        elif fingerprint == "mtime":
            self.fingerprint = self._mtime
        else:
            self.fingerprint = fingerprint

    def _md5(self, item):
        """ md5sum a file """
        with item.data.open() as filestream:
            return md5stream(filestream)

    def _mtime(self, item):
        """ Get the modification time of a file """
        return os.path.getmtime(item.fullpath)

    def process(self, stream):
        changed = []
        all_items = []
        for item in stream:
            fingerprint = self.fingerprint(item)
            if fingerprint != self.checksums.get(item.fullpath):
                self.checksums[item.fullpath] = fingerprint
                changed.append(item)
            all_items.append(item)
        if not changed and self.stop:
            raise StopProcessing
        if isinstance(self.checksums, SqliteDict):
            self.checksums.commit()
        return {"default": changed, "all": all_items}