def __init__(self, storage_path): self._storage_path = storage_path self._objects_path = os.path.join(self._storage_path, 'objects') self._refs_path = os.path.join(self._storage_path, 'refs') for d in self._objects_path, self._refs_path: if not os.path.exists(d): os.makedirs(d) self._blobs_storage = BlobsStorage(self._objects_path) self._objects_storage = ObjectsStorage(self._blobs_storage)
class FileSystemScanner(object): """ Git-like high-level filesystem snapshotting tool """ def __init__(self, storage_path): self._storage_path = storage_path self._objects_path = os.path.join(self._storage_path, 'objects') self._refs_path = os.path.join(self._storage_path, 'refs') for d in self._objects_path, self._refs_path: if not os.path.exists(d): os.makedirs(d) self._blobs_storage = BlobsStorage(self._objects_path) self._objects_storage = ObjectsStorage(self._blobs_storage) def store_ref(self, refname, refhash): if not re.match(r"^[0-9a-zA-Z_\-]+$", refname): raise ValueError("Invalid character in refname") if not re.match(r"^[0-9A-Za-z]{40}$", refhash): raise ValueError("Invalid hash") with open(os.path.join(self._refs_path, refname), 'w') as f: f.write(refhash) def list_refs(self): return [f for f in os.listdir(self._refs_path) if not (f.startswith('.') or f.endswith('~'))] def get_ref(self, refname): with open(os.path.join(self._refs_path, refname), 'r') as f: return f.read() def get_ref_object(self, refname): object_id = self.get_ref(refname) return self._objects_storage.retrieve_obj(object_id) def delete_ref(self, refname): os.unlink(os.path.join(self._refs_path, refname)) def backup_directory(self, refname, base_path, exclude=None, exclude_patterns=None): """ Scan a directory recursively and store all the found files + trees. **Backup strategy:** - Recursively store trees + files while exploring ``path`` - All the objects located in a directory listed in the ``exclude`` list will be ignored - All the objects whose full path matches a regular expression in ``exclude_patterns`` will be ignored - The sha1 of the top-level tree will be stored as the snapshot content. - The sha1 of the commit will be stored as ``refname`` :param refname: Name that will be used to refer to the snapshot object resulting from the backup operation, for further retrieval. :param base_path: The base path from which to start scanning :param exclude: A list of (relative) paths to exclude from backup :param exclude_patterns: A list of regular expressions that will be matched against the full path of the file (relative to ``path``) """ def store_directory(path): hashes = [] for file_name in os.listdir(path): file_path = os.path.join(path, file_name) if os.path.isdir(file_path): hashes.append(store_directory(file_path)) else: hashes.append(store_file(file_path)) def store_file(path): rel_path = os.path.relpath(path, base_path) file_stat = os.lstat(path) st_ifmt = stat.S_IFMT(file_stat.st_mode) if st_ifmt == stat.S_IFREG: with open(path, 'rb') as f: file_blob = self._objects_storage.store_blob(f) file_info = { 'file_name': os.path.basename(path), 'stat': { 'st_ifmt': st_ifmt, 'st_mode': file_stat.st_mode, 'st_ino': file_stat.st_ino, 'st_dev': file_stat.st_dev, 'st_nlink': file_stat.st_nlink, 'st_uid': file_stat.st_uid, 'st_gid': file_stat.st_gid, 'st_size': file_stat.st_size, 'st_atime': file_stat.st_atime, 'st_ctime': file_stat.st_ctime, 'st_mtime': file_stat.st_mtime, }, 'content': None, } return self._objects_storage.store_file(file_info) raise NotImplemented("Unsupported file type") base_path = os.path.abspath(base_path) tree_hash = store_directory(self, base_path) snapshot_hash = self._objects_storage.store_snapshot({ 'content': tree_hash, }) self.store_ref(refname, snapshot_hash) # def _store_directory(self, path, base, exclude=None, # exclude_patterns=None): # """ # Recursively store files in a directory # """ # ## For each directory -> descend recursively # ## For each file -> store a file/link object # rel_base = os.path.relpath(path, base) # tree_contents = [] # for filename in os.listdir(path): # file_path = os.path.join(path, filename) # if os.path.isdir(file_path): # self._store_directory( # file_path, base, exclude=exclude, # exclude_patterns=exclude_patterns) # continue # ## Store this file. # rel_path = os.path.join(rel_base, filename) # file_stat = os.path.lstat(filename) # if stat.S_ISREG(file_stat.st_mode): # ## todo: store the blob # file_info = { # ## todo: fill this object # } # pass # elif stat.S_ISLNK(file_stat.st_mode): # ## todo: store the link object # link_info = { # ## todo: fill this object # } # pass # else: # raise NotImplemented("File type not supported!") # return self._objects_storage.store_tree(tree_contents) # def _store_file(self, path, base): # pass def restore_files(self, refname, destination, file_name=None): """ Restore files from a backup. """ pass