def __init__(self, root): assert isinstance(root, Path) self.root = root self.mdd = _metadata_path(root) self.keydb = KeyDB(_keys_path(root)) self.udd = Path(self.keydb.read('udd')) self.bs = FileBlobstore(self.udd) self.snapdb = KeyDBFactory(KeyDBWindow("snaps", self.keydb), encode_snapshot, partial(decode_snapshot, self.bs.reverser)) self.remotedb = KeyDBFactory(KeyDBWindow("remotes", self.keydb), encode_volume, decode_volume) exclude_file = Path('.farmignore', self.root) ignored = [safetype(self.mdd)] try: with exclude_file.open('rb') as exclude_fd: for raw_pattern in exclude_fd.readlines(): pattern = ingest(raw_pattern.strip()) excluded = safetype(Path(pattern, root)) ignored.append(excluded) except IOError as e: if e.errno == NoSuchFile: pass else: raise e self.is_ignored = partial(skip_ignored, ignored)
def test_KeyDBFactory_diff(tmp_Path): with KeyDBWrapper(tmp_Path) as db: window = KeyDBWindow("window", db) factory = KeyDBFactory(window, str, lambda data, name: safetype(data)) assert factory.list() == [] factory.write("five", 5) assert factory.list() == ["five"] value = factory.read("five") assert value == safetype(5) factory.delete("five") assert factory.list() == []
def test_KeyDBFactory_copy(tmp_Path): with KeyDBWrapper(Path("db1", tmp_Path)) as db1: window1 = KeyDBWindow("window", db1) factory1 = KeyDBFactory(window1, str, lambda data, name: safetype(data)) assert factory1.list() == [] factory1.write("five", 5) with KeyDBWrapper(Path("db2", tmp_Path)) as db2: window2 = KeyDBWindow("other", db2) factory2 = KeyDBFactory(window2, str, lambda data, name: safetype(data)) factory2.copy("five", window1) value = factory2.read("five") assert value == safetype(5)
def test_KeyDBFactory_diff(): with KeyDBWrapper("./db") as db: window = KeyDBWindow("window", db) factory = KeyDBFactory(window, str, str) assert factory.list() == [] factory.write("five", 5) assert factory.list() == ["five"] value = factory.read("five") assert value == str(5) factory.delete("five") assert factory.list() == []
class FarmFSVolume: def __init__(self, root): assert isinstance(root, Path) self.root = root self.mdd = _metadata_path(root) self.keydb = KeyDB(_keys_path(root)) self.udd = Path(self.keydb.read('udd')) self.bs = FileBlobstore(self.udd) self.snapdb = KeyDBFactory(KeyDBWindow("snaps", self.keydb), encode_snapshot, partial(decode_snapshot, self.bs.reverser)) self.remotedb = KeyDBFactory(KeyDBWindow("remotes", self.keydb), encode_volume, decode_volume) exclude_file = Path('.farmignore', self.root) ignored = [safetype(self.mdd)] try: with exclude_file.open('rb') as exclude_fd: for raw_pattern in exclude_fd.readlines(): pattern = ingest(raw_pattern.strip()) excluded = safetype(Path(pattern, root)) ignored.append(excluded) except IOError as e: if e.errno == NoSuchFile: pass else: raise e self.is_ignored = partial(skip_ignored, ignored) def thawed(self, path): """Yield set of files not backed by FarmFS under path""" get_path = fmap(first) select_userdata_files = pipeline(ftype_selector([FILE]), get_path) return select_userdata_files(path.entries(self.is_ignored)) def frozen(self, path): """Yield set of files backed by FarmFS under path""" get_path = fmap(first) select_userdata_files = pipeline(ftype_selector([LINK]), get_path) return select_userdata_files(path.entries(self.is_ignored)) #NOTE: This assumes a posix storage engine. def freeze(self, path): assert isinstance(path, Path) assert isinstance(self.udd, Path) csum = path.checksum() duplicate = self.bs.import_via_link(path, csum) self.bs.link_to_blob(path, csum) return {"path": path, "csum": csum, "was_dup": duplicate} #Note: This assumes a posix storage engine. def thaw(self, user_path): assert isinstance(user_path, Path) csum_path = user_path.readlink() user_path.unlink() csum_path.copy(user_path) return user_path def repair_link(self, path): """Find all broken links and point them back at UDD""" assert (path.islink()) oldlink = path.readlink() if oldlink.isfile(): return csum = self.bs.reverser(oldlink) newlink = self.bs.csum_to_path(csum) if not newlink.isfile(): raise ValueError("%s is missing, cannot relink" % newlink) else: path.unlink() self.bs.link_to_blob(path, csum) return newlink def link_checker(self): """Return a pipeline which given a list of SnapshotItems, returns the SnapshotItems with broken links to the blobstore""" select_links = ffilter(lambda x: x.is_link()) is_broken = lambda x: not self.bs.exists(x.csum()) select_broken = ffilter(is_broken) return pipeline(select_links, select_broken) def trees(self): """Returns an iterator which contains all trees for the volume. The Local tree and all the snapshots""" tree = self.tree() snaps = imap(lambda x: self.snapdb.read(x), self.snapdb.list()) return chain([tree], snaps) def items(self): """Returns an iterator which lists all SnapshotItems from all local snaps + the working tree""" return pipeline(concat)(self.trees()) """Get a snap object which represents the tree of the volume.""" def tree(self): tree_snap = TreeSnapshot(self.root, self.is_ignored, reverser=self.bs.reverser) return tree_snap """ Yield all the relative paths (safetype) for all the files in the userdata store.""" def userdata_csums(self): # We populate counts with all hash paths from the userdata directory. for (path, type_) in self.udd.entries(): assert isinstance(path, Path) if type_ == FILE: yield self.bs.reverser(path) elif type_ == DIR: pass else: raise ValueError("%s is f invalid type %s" % (path, type_)) def unused_blobs(self, items): """Returns the set of blobs not referenced in items""" select_links = ffilter(lambda x: x.is_link()) get_csums = fmap(lambda item: item.csum()) referenced_hashes = pipeline(select_links, get_csums, uniq, set)(items) udd_hashes = set(self.userdata_csums()) missing_data = referenced_hashes - udd_hashes assert len( missing_data) == 0, "Missing %s\nReferenced %s\nExisting %s\n" % ( missing_data, referenced_hashes, udd_hashes) orphaned_csums = udd_hashes - referenced_hashes return orphaned_csums """Yields similarity data for directories""" def similarity(self): tree = self.tree() dir_sigs = directory_signatures(tree, self.root) combos = combinations(dir_sigs.items(), 2) for ((dir_a, sigs_a), (dir_b, sigs_b)) in combos: intersection = len(sigs_a.intersection(sigs_b)) count_a = len(sigs_a) count_b = len(sigs_b) yield (dir_a, count_a, dir_b, count_b, intersection)