Exemple #1
0
    def __init__(self, root):
        assert isinstance(root, Path)
        self.root = root
        self.mdd = _metadata_path(root)
        self.keydb = KeyDB(_keys_path(root))
        self.udd = Path(self.keydb.read('udd'))
        self.bs = FileBlobstore(self.udd)
        self.snapdb = KeyDBFactory(KeyDBWindow("snaps", self.keydb),
                                   encode_snapshot,
                                   partial(decode_snapshot, self.bs.reverser))
        self.remotedb = KeyDBFactory(KeyDBWindow("remotes", self.keydb),
                                     encode_volume, decode_volume)

        exclude_file = Path('.farmignore', self.root)
        ignored = [safetype(self.mdd)]
        try:
            with exclude_file.open('rb') as exclude_fd:
                for raw_pattern in exclude_fd.readlines():
                    pattern = ingest(raw_pattern.strip())
                    excluded = safetype(Path(pattern, root))
                    ignored.append(excluded)
        except IOError as e:
            if e.errno == NoSuchFile:
                pass
            else:
                raise e
        self.is_ignored = partial(skip_ignored, ignored)
Exemple #2
0
def test_KeyDBFactory_diff(tmp_Path):
    with KeyDBWrapper(tmp_Path) as db:
        window = KeyDBWindow("window", db)
        factory = KeyDBFactory(window, str, lambda data, name: safetype(data))
        assert factory.list() == []
        factory.write("five", 5)
        assert factory.list() == ["five"]
        value = factory.read("five")
        assert value == safetype(5)
        factory.delete("five")
        assert factory.list() == []
Exemple #3
0
def test_KeyDBFactory_copy(tmp_Path):
    with KeyDBWrapper(Path("db1", tmp_Path)) as db1:
        window1 = KeyDBWindow("window", db1)
        factory1 = KeyDBFactory(window1, str,
                                lambda data, name: safetype(data))
        assert factory1.list() == []
        factory1.write("five", 5)
        with KeyDBWrapper(Path("db2", tmp_Path)) as db2:
            window2 = KeyDBWindow("other", db2)
            factory2 = KeyDBFactory(window2, str,
                                    lambda data, name: safetype(data))
            factory2.copy("five", window1)
            value = factory2.read("five")
            assert value == safetype(5)
Exemple #4
0
def test_KeyDBFactory_diff():
  with KeyDBWrapper("./db") as db:
    window = KeyDBWindow("window", db)
    factory = KeyDBFactory(window, str, str)
    assert factory.list() == []
    factory.write("five", 5)
    assert factory.list() == ["five"]
    value = factory.read("five")
    assert value == str(5)
    factory.delete("five")
    assert factory.list() == []
Exemple #5
0
class FarmFSVolume:
    def __init__(self, root):
        assert isinstance(root, Path)
        self.root = root
        self.mdd = _metadata_path(root)
        self.keydb = KeyDB(_keys_path(root))
        self.udd = Path(self.keydb.read('udd'))
        self.bs = FileBlobstore(self.udd)
        self.snapdb = KeyDBFactory(KeyDBWindow("snaps", self.keydb),
                                   encode_snapshot,
                                   partial(decode_snapshot, self.bs.reverser))
        self.remotedb = KeyDBFactory(KeyDBWindow("remotes", self.keydb),
                                     encode_volume, decode_volume)

        exclude_file = Path('.farmignore', self.root)
        ignored = [safetype(self.mdd)]
        try:
            with exclude_file.open('rb') as exclude_fd:
                for raw_pattern in exclude_fd.readlines():
                    pattern = ingest(raw_pattern.strip())
                    excluded = safetype(Path(pattern, root))
                    ignored.append(excluded)
        except IOError as e:
            if e.errno == NoSuchFile:
                pass
            else:
                raise e
        self.is_ignored = partial(skip_ignored, ignored)

    def thawed(self, path):
        """Yield set of files not backed by FarmFS under path"""
        get_path = fmap(first)
        select_userdata_files = pipeline(ftype_selector([FILE]), get_path)
        return select_userdata_files(path.entries(self.is_ignored))

    def frozen(self, path):
        """Yield set of files backed by FarmFS under path"""
        get_path = fmap(first)
        select_userdata_files = pipeline(ftype_selector([LINK]), get_path)
        return select_userdata_files(path.entries(self.is_ignored))

    #NOTE: This assumes a posix storage engine.
    def freeze(self, path):
        assert isinstance(path, Path)
        assert isinstance(self.udd, Path)
        csum = path.checksum()
        duplicate = self.bs.import_via_link(path, csum)
        self.bs.link_to_blob(path, csum)
        return {"path": path, "csum": csum, "was_dup": duplicate}

    #Note: This assumes a posix storage engine.
    def thaw(self, user_path):
        assert isinstance(user_path, Path)
        csum_path = user_path.readlink()
        user_path.unlink()
        csum_path.copy(user_path)
        return user_path

    def repair_link(self, path):
        """Find all broken links and point them back at UDD"""
        assert (path.islink())
        oldlink = path.readlink()
        if oldlink.isfile():
            return
        csum = self.bs.reverser(oldlink)
        newlink = self.bs.csum_to_path(csum)
        if not newlink.isfile():
            raise ValueError("%s is missing, cannot relink" % newlink)
        else:
            path.unlink()
            self.bs.link_to_blob(path, csum)
            return newlink

    def link_checker(self):
        """Return a pipeline which given a list of SnapshotItems, returns the SnapshotItems with broken links to the blobstore"""
        select_links = ffilter(lambda x: x.is_link())
        is_broken = lambda x: not self.bs.exists(x.csum())
        select_broken = ffilter(is_broken)
        return pipeline(select_links, select_broken)

    def trees(self):
        """Returns an iterator which contains all trees for the volume.
    The Local tree and all the snapshots"""
        tree = self.tree()
        snaps = imap(lambda x: self.snapdb.read(x), self.snapdb.list())
        return chain([tree], snaps)

    def items(self):
        """Returns an iterator which lists all SnapshotItems from all local snaps + the working tree"""
        return pipeline(concat)(self.trees())

    """Get a snap object which represents the tree of the volume."""

    def tree(self):
        tree_snap = TreeSnapshot(self.root,
                                 self.is_ignored,
                                 reverser=self.bs.reverser)
        return tree_snap

    """ Yield all the relative paths (safetype) for all the files in the userdata store."""

    def userdata_csums(self):
        # We populate counts with all hash paths from the userdata directory.
        for (path, type_) in self.udd.entries():
            assert isinstance(path, Path)
            if type_ == FILE:
                yield self.bs.reverser(path)
            elif type_ == DIR:
                pass
            else:
                raise ValueError("%s is f invalid type %s" % (path, type_))

    def unused_blobs(self, items):
        """Returns the set of blobs not referenced in items"""
        select_links = ffilter(lambda x: x.is_link())
        get_csums = fmap(lambda item: item.csum())
        referenced_hashes = pipeline(select_links, get_csums, uniq, set)(items)
        udd_hashes = set(self.userdata_csums())
        missing_data = referenced_hashes - udd_hashes
        assert len(
            missing_data) == 0, "Missing %s\nReferenced %s\nExisting %s\n" % (
                missing_data, referenced_hashes, udd_hashes)
        orphaned_csums = udd_hashes - referenced_hashes
        return orphaned_csums

    """Yields similarity data for directories"""

    def similarity(self):
        tree = self.tree()
        dir_sigs = directory_signatures(tree, self.root)
        combos = combinations(dir_sigs.items(), 2)
        for ((dir_a, sigs_a), (dir_b, sigs_b)) in combos:
            intersection = len(sigs_a.intersection(sigs_b))
            count_a = len(sigs_a)
            count_b = len(sigs_b)
            yield (dir_a, count_a, dir_b, count_b, intersection)