def repair_link(self, path): assert(path.islink()) oldlink = path.readlink() if oldlink.isfile(): print "Link %s is ok" % path return csum = self.reverser(oldlink) newlink = Path(_checksum_to_path(csum), self.udd) # Should be part of volume. if not newlink.isfile(): raise ValueError("%d is missing, cannot relink" % newlink) else: print "Relinking %s from %s to %s" % (path, oldlink, newlink) path.unlink() path.symlink(newlink)
def is_egg(cls, pth): pth = Path.make(pth) if pth.extension.lower() <> "egg": return False if not pth.is_file: return False return True
def get_bundle(pth = "."): pth = Path.make(pth).real bundle = Bundle(pth) for parent in pth.iter_parents(): if Egg.is_egg(parent): return Egg(parent) if (parent + INIT_FILE).is_file: bundle = Bundle(parent) return bundle
def __init__(self, root): assert isinstance(root, Path) self.root = root self.mdd = _metadata_path(root) self.keydb = KeyDB(_keys_path(root)) self.udd = Path(self.keydb.read('udd')) self.snapdb = KeyDBFactory(KeyDBWindow("snaps", self.keydb), encode_snapshot, decode_snapshot) self.remotedb = KeyDBFactory(KeyDBWindow("remotes", self.keydb), encode_volume, decode_volume) self.reverser = reverser() exclude_file = Path('.farmignore', self.root) self.exclude = [str(self.mdd)] try: with exclude_file.open('r') as exclude_fd: for pattern in exclude_fd.readlines(): pattern = str(Path(pattern.strip(), root)) self.exclude.append(pattern) except IOError as e: if e.errno == NoSuchFile: pass else: raise e
class FarmFSVolume: def __init__(self, root): assert isinstance(root, Path) self.root = root self.mdd = _metadata_path(root) self.keydb = KeyDB(_keys_path(root)) self.udd = Path(self.keydb.read('udd')) self.snapdb = KeyDBFactory(KeyDBWindow("snaps", self.keydb), encode_snapshot, decode_snapshot) self.remotedb = KeyDBFactory(KeyDBWindow("remotes", self.keydb), encode_volume, decode_volume) self.reverser = reverser() exclude_file = Path('.farmignore', self.root) self.exclude = [str(self.mdd)] try: with exclude_file.open('r') as exclude_fd: for pattern in exclude_fd.readlines(): pattern = str(Path(pattern.strip(), root)) self.exclude.append(pattern) except IOError as e: if e.errno == NoSuchFile: pass else: raise e """Yield set of files not backed by FarmFS under path""" def thawed(self, path): for (entry, type_) in path.entries(self.exclude): if type_ == "file": yield entry """Yield set of files backed by FarmFS under path""" def frozen(self, path): for (entry, type_) in path.entries(self.exclude): if type_ == "link": yield entry """Back all files under path with FarmFS""" def freeze(self, path): for p in self.thawed(path): self._import_file(p) #NOTE: This assumes a posix storage engine. def _import_file(self, path): assert isinstance(path, Path) assert isinstance(self.udd, Path) blob = self.udd.join(_checksum_to_path(path.checksum())) print "Processing %s with csum %s" % (path, self.udd) if blob.exists(): print "Found a copy of file already in userdata, skipping copy" else: print "Putting link at %s" % blob ensure_link(blob, path) ensure_readonly(blob) ensure_symlink(path, blob) ensure_readonly(path) """Thaw all files under path, to allow editing""" def thaw(self, path): for p in self.frozen(path): self._export_file(p) #Note: This assumes a posix storage engine. def _export_file(self, user_path): assert isinstance(user_path, Path) csum_path = user_path.readlink() user_path.unlink() csum_path.copy(user_path) """Find all broken links and point them back at UDD""" def repair_link(self, path): assert(path.islink()) oldlink = path.readlink() if oldlink.isfile(): print "Link %s is ok" % path return csum = self.reverser(oldlink) newlink = Path(_checksum_to_path(csum), self.udd) # Should be part of volume. if not newlink.isfile(): raise ValueError("%d is missing, cannot relink" % newlink) else: print "Relinking %s from %s to %s" % (path, oldlink, newlink) path.unlink() path.symlink(newlink) """Make sure all backed file hashes match thier file contents""" def check_userdata_hashes(self): link2csum = reverser() for (path, type_) in self.udd.entries(): if type_ == "file": if not _validate_checksum(link2csum, path): yield path """Make sure that all links in the tree and in all snaps are backed.""" def check_links(self): for (name, count) in self.count().items(): path = self.udd.join(name) if not path.exists(): yield path def fsck(self): for bad_link in self.check_links(): yield "CORRUPTION: broken link in ", bad_link for bad_hash in self.check_userdata_hashes(): yield "CORRUPTION: checksum mismatch in ", bad_hash """Get a snap object which represents the tree of the volume.""" def tree(self): root = self.root udd = self.udd tree_snap = TreeSnapshot(root, udd, self.exclude) return tree_snap """Return a checksum_path -> count map for each unique file backed by FarmFS""" def count(self): tree_snap = self.tree() key_snaps = [] for snap_name in self.snapdb.list(): snap = self.snapdb.read(snap_name) key_snaps.append(snap) snaps = [tree_snap] + key_snaps counts = snap_reduce(snaps) return counts """Yields a set of paths which reference a given checksum_path name.""" def reverse(self, udd_name): #TODO SCAN THE SNAPS FOR THIS SILLY PANTS. for (path, type_) in self.root.entries(self.exclude): if type_ == "link": ud_path = path.readlink() if ud_path == udd_name: yield path """ Yield all the relative paths (basestring) for all the files in the userdata store.""" def userdata(self): # We populate counts with all hash paths from the userdata directory. for (path, type_) in self.udd.entries(): assert isinstance(path, Path) if type_ == "file": yield path.relative_to(self.udd) elif type_ == "dir": pass else: raise ValueError("%s is f invalid type %s" % (path, type_)) """Yields the names of files which are being garbage collected""" def gc(self): referenced_hashes = set(self.count().keys()) udd_hashes = set(self.userdata()) missing_data = referenced_hashes - udd_hashes assert len(missing_data) == 0, "Missing %s\nReferenced %s\nExisting %s\n" % (missing_data, referenced_hashes, udd_hashes) orphaned_data = udd_hashes - referenced_hashes for blob in orphaned_data: yield blob blob_path = self.udd.join(blob) blob_path.unlink(clean=self.udd) """Yields similarity data for directories""" def similarity(self): tree = self.tree() dir_sigs = directory_signatures(tree) combos = combinations(dir_sigs.items(), 2) for ((dir_a, sigs_a), (dir_b, sigs_b)) in combos: jac_sim = float(len(sigs_a.intersection(sigs_b)))/len(sigs_a.union(sigs_b)) yield (dir_a, dir_b, jac_sim)