def truncate(self, path, length, fh=None): '''truncate(2) calls with fh == None; based on path but access must be checked. ftruncate passes in open handle''' zero_enabled = self.shadow.zero_on_unlink # ALWAYS get the shelf by name, even if fh is valid. # FIXME: Compare self.shadow[fh] to returned shelf. # IMPLICIT ASSUMPTION: without tenants this will never EPERM rsp = self.librarian(self.lcp('get_shelf', path=path)) req = self.lcp('resize_shelf', path=path, size_bytes=length, id=rsp['id'], zero_enabled=zero_enabled) rsp = self.librarian(req) # If books were removed from the shelf and added to a zeroing # shelf, start a process to zero the books on that shelf. if rsp['z_shelf_path'] is not None: z_rsp = self.librarian( self.lcp('get_shelf', path=rsp['z_shelf_path'])) z_shelf = TMShelf(z_rsp) threading.Thread(target=self._zero, args=(z_shelf, )).start() # Refresh shelf info rsp = self.librarian(self.lcp('get_shelf', path=path)) shelf = TMShelf(rsp) if shelf.size_bytes < length: raise TmfsOSError(errno.EINVAL) self.get_bos(shelf) return self.shadow.truncate(shelf, length, fh)
def unlink(self, path, *args, **kwargs): assert not args and not kwargs, 'unlink: unexpected args' shelf = TMShelf(self.librarian(self.lcp('get_shelf', path=path))) # Paranoia check for (dangling) opens. Does VFS catch these first? cached = self.shadow[(shelf.id, None)] if cached is not None: # Not a good sign open_handles = cached.open_handle # This was if not none, but cached.open_handle was returning empty dict, # not a NoneObject if open_handles: # Definitely a bad sign raise TmfsOSError(errno.EBUSY) # Once upon a time I forced it... for fh in open_handles.values(): try: self.librarian( self.lcp('close_shelf', id=shelf.id, fh=fh)) except Exception as e: pass self.shadow.unlink(shelf) # empty cache INCLUDING dangling opens # Early exit: no explicit zeroing required if: # 1. it's zero bytes long # 2. it's passed through zeroing, meaning this is the second trip to # unlink().. Even if length is non-zero, remove it, assuming the # _zero subprocess failed. # 3. The shadow subclass doesn't need it if ((not shelf.size_bytes) or shelf.name.startswith(self._ZERO_PREFIX) or (not self.shadow.zero_on_unlink)): self.librarian(self.lcp('destroy_shelf', path=path)) return 0 # Schedule for zeroing; a second entry to unlink() will occur on # this shelf with the new name. zeroname = '%s%d' % (self._ZERO_PREFIX, shelf.id) if zeroname != shelf.name: self.rename(self.get_shelf_path(shelf), zeroname) shelf.name = zeroname shelf.mode = stat.S_IFREG # un-block it as was done on server if self.verbose <= 3: threading.Thread(target=self._zero, args=(shelf, )).start() else: set_trace() self._zero(shelf) # this will block: lfs_fuse is single-threaded return 0
def cmd_create_shelf(self, cmdict): """ Create a new shelf In (dict)--- name Out (dict) --- shelf data """ path_list = self._path2list(cmdict['path']) cmdict['name'] = path_list[-1] parent_shelf = self._path2shelf(path_list[:-1]) cmdict['parent_id'] = parent_shelf.id cmdict['link_count'] = 1 # normal files get one # POSIX: if extant, open it; else create and then open try: shelf = self.cmd_open_shelf(cmdict) return shelf except Exception as e: pass self.errno = errno.EINVAL shelf = TMShelf(cmdict) self.db.create_shelf(shelf) self.db.create_xattr(shelf, BookPolicy.XATTR_ALLOCATION_POLICY, BookPolicy.DEFAULT_ALLOCATION_POLICY) # Will be ignored until AllocationPolicy set to RequestIG. I just # want it to show up in a full xattr dump (getfattr -d /lfs/xxxx) self.db.create_xattr(shelf, BookPolicy.XATTR_IG_REQ, '') self.db.create_xattr(shelf, BookPolicy.XATTR_IG_REQ_POS, '') return self.cmd_open_shelf(cmdict) # Does the handle thang
def cmd_symlink(self, cmdict): """ Creates a symlink at path to target Input(dict)--- path - path to create symlink at target - path where symlink is supposed to point to Output(shelf)--- symlink shelf that was created """ cmdict['mode'] = self._MODE_DEFAULT_LNK path_list = self._path2list(cmdict['path']) cmdict['name'] = path_list[-1] parent_shelf = self._path2shelf(path_list[:-1]) cmdict['parent_id'] = parent_shelf.id cmdict['link_count'] = 1 # I think? try: shelf = self.cmd_get_shelf(cmdict) self.errno = errno.EEXIST return shelf except Exception as e: pass self.errno = errno.EINVAL shelf = TMShelf(cmdict) self.db.create_shelf(shelf) self.db.create_symlink(shelf, cmdict['target']) return shelf
def cmd_mkdir(self, cmdict): # currently similar to cmd_create_shelf with slight modifications path_list = self._path2list(cmdict['path']) cmdict['name'] = path_list[-1] parent_shelf = self._path2shelf(path_list[:-1]) cmdict['parent_id'] = parent_shelf.id cmdict['link_count'] = 2 # . and .. for directories try: shelf = self.cmd_get_shelf(cmdict) self.errno = errno.EEXIST return shelf except Exception as e: pass self.errno = errno.EINVAL shelf = TMShelf(cmdict) self.db.create_shelf(shelf) # parent directory shelf has to also have link count incremented parent_shelf.link_count += 1 parent_shelf.matchfields = ('link_count', ) self.db.modify_shelf(parent_shelf, commit=True) return shelf # man 2 mkdir: 0 on success
def open(self, path, flags, mode=None): # looking for filehandles? See FUSE docs. Librarian will do # all access calculations so call it first. rsp = self.librarian(self.lcp('open_shelf', path=path)) shelf = TMShelf(rsp) self.get_bos(shelf) # File handle is a proxy for FuSE to refer to "real" file descriptor. # Different shadow types may return different things for kernel. fx = self.shadow.open(shelf, flags, mode) return fx
def cmd_close_shelf(self, cmdict): """ Close a shelf against access by a node. In (dict)--- shelf_id, handle Out (dict) --- TMShelf object """ shelf = TMShelf(cmdict) shelf = self.db.modify_opened_shelves(shelf, 'put', cmdict['context']) return shelf
def write(self, path, buf, offset, fh): # FIXME: see read comment rsp = self.librarian(self.lcp('get_shelf', path=path)) shelf = TMShelf(rsp) # Resize shelf "on the fly" for writes past EOF # BUG: what if shelf was resized elsewhere? And what about read? req_size = offset + len(buf) if self.shadow[(shelf.id, None)].size_bytes < req_size: self.truncate(path, req_size, fh) # updates the cache return self.shadow.write(shelf, buf, offset, fh)
def create(self, path, mode, fh=None, supermode=None): if fh is not None: # createat(2), methinks, but I never saw this in 8 months raise TmfsOSError(errno.ENOSYS) if supermode is None: mode &= 0o777 tmpmode = stat.S_IFREG + mode else: mode = supermode & 0o777 tmpmode = supermode tmp = self.lcp('create_shelf', path=path, mode=tmpmode) rsp = self.librarian(tmp) shelf = TMShelf(rsp) # This is an open shelf... fx = self.shadow.create(shelf, mode) # ...added to the cache... return fx # ...with this value.
def rename(self, old, new): # 0 or raise rsp = self.librarian(self.lcp('get_shelf', path=new), errorOK=True) if 'errmsg' not in rsp: self.unlink(new) rsp = self.librarian(self.lcp('get_shelf', path=old)) shelf = TMShelf(rsp) # one of the only places path2name still exists # renamed from old path2shelf new_name = self._legacy_path2name(new) old_name = self._legacy_path2name(old) self.shadow.rename(shelf, old_name, new_name) req = self.lcp('rename_shelf', path=old, id=shelf.id, newpath=new) self.librarian(req) # None or raise return 0
def getattr(self, path, fh=None): # ROSS remove some root hardcoding now that a real root exists if fh is not None: raise TmfsOSError(errno.ENOENT) # never saw this in 8 months rsp = self.librarian(self.lcp('get_shelf', path=path)) shelf = TMShelf(rsp) tmp = { 'st_ctime': shelf.ctime, 'st_mtime': shelf.mtime, 'st_uid': 42, 'st_gid': 42, 'st_mode': shelf.mode, 'st_nlink': shelf.link_count, 'st_size': shelf.size_bytes } if shelf.name.startswith('block'): tmp['st_mode'] = self._MODE_DEFAULT_BLK return tmp
def fallocate(self, path, mode, offset, length, fh=None): # LFS doesn't support sparse files or hole punching (lazy allocation). # mode == 0 is essentially posix_fallocate and that's all there is. # Case in point: mkfs on a shelf (or a loopback device to a shelf) # wants mode == 3 == FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE # for quick-and-dirty zeroed blocks. See it in the e2fsprogs source # file lib/ext2fs/unix_io.c::unix_discard() if mode: raise TmfsOSError(errno.EOPNOTSUPP) if fh is None: rsp = self.librarian(self.lcp('get_shelf', path=path)) shelf = TMShelf(rsp) else: shelf = self.shadow[(None, fh)] if not shelf: raise TmfsOSError(errno.ESTALE) if shelf.size_bytes >= offset + length: return 0 return self.truncate(path, offset + length, None)
def _path2shelf(self, path): '''Returns shelf object corresponding to given path''' # accepts a string, or a list. either is handled in path2list path_list = self._path2list(path) # FIXME: not sure if this will work, attempting to start at root and move from there # if things break, this would be a reasonable place to look tmp = TMShelf(id=2) tmp.matchfields = ('id', ) current_shelf = self.db.get_shelf(tmp) for name in path_list: tmp = TMShelf(parent_id=current_shelf.id, name=name) tmp.matchfields = ('parent_id', 'name') current_shelf = self.db.get_shelf(tmp) return current_shelf
def getxattr(self, path, xattr, position=0): """Called with a specific namespace.name xattr. Can return either a bytes array OR an int.""" if position: raise TmfsOSError(errno.ENOSYS) # never saw this in 8 months rsp = self.librarian(self.lcp('get_shelf', path=path)) shelf = TMShelf(rsp) # Does this also need changed to support path instead of name? # Piggy back for queries by kernel (globals & fault handling). if xattr.startswith('_obtain_'): # this will need some work data = self.shadow.getxattr(shelf, xattr) try: return bytes(data.encode()) except AttributeError as e: # probably the "encode()" self._ret_is_string = False return bytes(data) # "ls" starts with simple getattr but then comes here for # security.selinux, system.posix_acl_access, and posix_acl_default. # ls -l can also do the same thing on '/'. Save the round trips. # if xattr.startswith('security.') or not shelf_name: # path == '/' if xattr.startswith('security.'): # path == '/' is legal now return bytes(0) try: rsp = self.librarian(self.lcp('get_xattr', path=path, xattr=xattr)) value = rsp['value'] assert value is not None # 'No such attribute' if isinstance(value, int): return value elif isinstance(value, str): # http://stackoverflow.com/questions/606191/convert-bytes-to-a-python-string return bytes(value.encode('cp437')) else: bytes(value.encode()) except Exception as e: raise TmfsOSError(errno.ENODATA) # syn for ENOATTR
def cmd_get_shelf_path(self, cmdict): ''' Retrieves path to any given shelf "shelf" is an incomplete shelf generated from cmdict "current_shelf" is complete shelf fetched from db ''' path = ['/'] shelf = TMShelf(cmdict) shelf.matchfields = ('name', 'parent_id') current_shelf = self.db.get_shelf(shelf) # shelf who needs path path.insert(1, current_shelf.name) while current_shelf.parent_id != 2: # modify passing command dict cmdict['id'] = current_shelf.parent_id shelf = TMShelf(cmdict) shelf.matchfields = ('id', ) current_shelf = self.db.get_shelf(shelf) path.insert(1, '/') path.insert(1, current_shelf.name) return str().join(path)
def cmd_get_shelf(self, cmdict, match_id=False, match_parent_id=True): """ List a given shelf. In (dict)--- path optional flag to force a match on id (ie, already open) optional flag to force a match on parent_id, defaulted to True Out (TMShelf object) --- TMShelf object """ self.errno = errno.EINVAL path_list = self._path2list(cmdict['path']) try: cmdict['name'] = path_list[-1] parent_shelf = self._path2shelf(path_list[:-1]) cmdict['parent_id'] = parent_shelf.id except IndexError: #case of getting root cmdict['name'] = '.' cmdict['parent_id'] = 2 shelf = TMShelf(cmdict) assert shelf.name, 'Missing shelf name' if match_parent_id: assert shelf.parent_id, 'Missing parent id' if match_id and match_parent_id: shelf.matchfields = ('name', 'id', 'parent_id') elif match_parent_id: shelf.matchfields = ('name', 'parent_id') elif match_id: shelf.matchfields = ('name', 'id') else: shelf.matchfields = ('name', ) shelf = self.db.get_shelf(shelf) if shelf is None: self.errno = errno.ENOENT # FIXME: raise OSError instead? raise AssertionError('no such shelf %s' % cmdict['name']) # consistency checks self.errno = errno.EBADF assert self._nbooks(shelf.size_bytes) == shelf.book_count, ( '%s size metadata mismatch' % shelf.name) self.errno = errno.ESTALE return shelf
def cmd_resize_shelf(self, cmdict): """ Resize given shelf to new size in bytes. In (dict)--- path id size_bytes zero_enabled Out (dict) --- z_shelf_path """ shelf = self.cmd_get_shelf(cmdict, match_id=True) bos = self._list_shelf_books(shelf) new_size_bytes = int(cmdict['size_bytes']) self.errno = errno.EINVAL assert new_size_bytes >= 0, 'Bad size' new_book_count = self._nbooks(new_size_bytes) out_buf = {'z_shelf_path': None} if bos: seqs = [ b.seq_num for b in bos ] self.errno = errno.EBADFD assert set(seqs) == set(range(1, shelf.book_count + 1)), ( 'Corrupt BOS sequence progression for %s' % shelf.name) # Can I leave real early? if new_size_bytes == shelf.size_bytes: return out_buf # Can this call be rejected? openers = self.db.get_shelf_openers( shelf, cmdict['context'], include_me=False) self.errno = errno.EMFILE assert not openers or new_size_bytes > shelf.size_bytes, \ 'Cannot shrink multiply-opened shelf' # Go for it shelf.size_bytes = new_size_bytes # How about a little early? if new_book_count == shelf.book_count: shelf.matchfields = 'size_bytes' shelf = self.db.modify_shelf(shelf, commit=True) return out_buf books_needed = new_book_count - shelf.book_count if books_needed > 0: policy = BookPolicy(self, shelf, cmdict['context']) freebooks = policy(books_needed) self.errno = errno.ENOSPC assert len(freebooks) == books_needed, \ 'out of space for "%s"' % shelf.name seq_num = shelf.book_count for book in freebooks: # Mark book in use and create BOS entry book = self._set_book_alloc(book, TMBook.ALLOC_INUSE) seq_num += 1 thisbos = TMBos( shelf_id=shelf.id, book_id=book.id, seq_num=seq_num) thisbos = self.db.create_bos(thisbos) elif books_needed < 0: books_2bdel = -books_needed # it all reads so much better self.errno = errno.EREMOTEIO assert len(bos) >= books_2bdel, 'Book removal problem' # The unlink workflow has one step which zero truncates # a file with a certain name. IOW not all shelves are USED, # some may be full of ZOMBIES freeing = shelf.name.startswith( _ZERO_PREFIX) and not new_book_count zero_enabled = cmdict['zero_enabled'] if not freeing and zero_enabled: # Create a zeroing shelf for the books being removed z_shelf_name = (_ZERO_PREFIX + shelf.name + '_' + str(shelf.parent_id) + '_' + str(time.time()) + '_' + cmdict['context']['physloc']) # all are placed in root, so path is easy z_shelf_path = '/' + z_shelf_name z_shelf_data = {} z_shelf_data.update({'context': cmdict['context']}) z_shelf_data.update({'name': z_shelf_name}) # place all zeroing shelves at root (/lfs) with parent_id = 2 # this makes sure that they are not placed in a directory that # is removed before they can be zeroed and deleted themselves z_shelf_data.update({'parent_id': 2}) self.errno = errno.EINVAL z_shelf = TMShelf(z_shelf_data) self.db.create_shelf(z_shelf) z_seq_num = 1 while books_2bdel > 0: try: thisbos = bos.pop() self.db.delete_bos(thisbos) # Orphans the book new_state = TMBook.ALLOC_ZOMBIE book = self.db.get_book_by_id(thisbos.book_id) if freeing and book.allocated == TMBook.ALLOC_ZOMBIE: new_state = TMBook.ALLOC_FREE if new_state != book.allocated: # staying ZOMBIE _ = self._set_book_alloc(book, new_state) books_2bdel -= 1 if not freeing and zero_enabled: # Add removed book to zeroing shelf z_thisbos = TMBos(shelf_id=z_shelf.id, book_id=book.id, seq_num=z_seq_num) z_thisbos = self.db.create_bos(z_thisbos) z_shelf.size_bytes += self.book_size_bytes z_shelf.book_count += 1 z_seq_num += 1 except Exception as e: self.db.rollback() self.errno = errno.EREMOTEIO raise RuntimeError( 'Resizing shelf smaller failed: %s' % str(e)) if not freeing and zero_enabled: z_shelf.matchfields = ('size_bytes', 'book_count') z_shelf = self.db.modify_shelf(z_shelf, commit=True) out_buf = {'z_shelf_path': z_shelf_path} else: self.db.rollback() self.errno = errno.EREMOTEIO raise RuntimeError('Bad code path in cmd_resize_shelf()') shelf.book_count = new_book_count shelf.matchfields = ('size_bytes', 'book_count') shelf = self.db.modify_shelf(shelf, commit=True) return out_buf
def ioctl(self, path, cmd, arg, fh, flags, data): rsp = self.librarian(self.lcp('get_shelf', path=path)) shelf = TMShelf(rsp) return self.shadow.ioctl(shelf, cmd, arg, fh, flags, data)
def create_empty_db(cur): try: table_create = """ CREATE TABLE globals ( schema_version TEXT, book_size_bytes INT, nvm_bytes_total INT, books_total INT, nodes_total INT ) """ cur.execute(table_create) # FRD: max 80. # index == quadruple. table_create = """ CREATE TABLE FRDnodes ( node_id, rack INT, enc INT, node INT, coordinate TEXT, serialNumber TEXT ) """ cur.execute(table_create) # SOC table_create = """ CREATE TABLE SOCs ( node_id, MAC TEXT, status INT, coordinate TEXT, tlsPublicCertificate TEXT, heartbeat INT, cpu_percent INT, rootfs_percent INT, network_in INT, network_out INT, mem_percent INT ) """ cur.execute(table_create) # FRD: 4 per node. CID is raw descriptor table encoded value table_create = """ CREATE TABLE FAModules ( node_id INT, IG INT, module_size_books INT, rawCID INT, status INT, coordinate TEXT, memorySize INT ) """ cur.execute(table_create) # Book numbers are now relative to an interleave group. # intlv_group 1-128 is less than eight bits. For 990 usage there # is now a tag field above bits 15-0 (plenty of room for legacy IGs). table_create = """ CREATE TABLE books ( id INTEGER PRIMARY KEY, intlv_group INT, book_num INT, allocated INT, attributes INT ) """ cur.execute(table_create) cur.commit() table_create = """ CREATE TABLE shelves ( id INTEGER PRIMARY KEY, creator_id INT, size_bytes INT, book_count INT, ctime INT, mtime INT, name TEXT, mode INT, parent_id INT, link_count INT ) """ cur.execute(table_create) cur.commit() # cur.execute('CREATE UNIQUE INDEX IDX_shelves ON shelves (name)') # cur.commit() table_create = """ CREATE TABLE books_on_shelves ( shelf_id INT, book_id INT, seq_num INT ) """ cur.execute(table_create) cur.commit() # The id will be used as the open_handle. It's called id because # it matches some internal operations harcoded to that name. table_create = """ CREATE TABLE opened_shelves ( id INTEGER PRIMARY KEY, shelf_id INT, node_id INT, pid INT ) """ cur.execute(table_create) cur.commit() table_create = """ CREATE TABLE shelf_xattrs ( shelf_id INT, xattr TEXT, value TEXT ) """ cur.execute(table_create) cur.commit() # creating linking table. Will currently house shelf id for # symbolic link shelves, the path for the file being linked to, # and another "magic" field for whatever else needs to be put in # that I can't think of now. Thought about adding a primary key # id for possible debugging purposes, but can't think of what I # would do with it. Will just be for linking other stuff together table_create = """ CREATE TABLE links ( shelf_id INT, target TEXT, other TEXT ) """ cur.execute(table_create) cur.commit() cur.execute('''CREATE UNIQUE INDEX IDX_xattrs ON shelf_xattrs (shelf_id, xattr)''') cur.commit() except Exception as e: raise SystemExit('DB operation failed at line %d: %s' % ( sys.exc_info()[2].tb_lineno, str(e))) # Idiot checks book = TMBook() assert book.schema == cur.schema('books'), 'Bad schema: books' shelf = TMShelf() assert shelf.schema == cur.schema('shelves'), 'Bad schema: shelves' bos = TMBos() assert bos.schema == cur.schema('books_on_shelves'), 'Bad schema: BOS' opened_shelves = TMOpenedShelves() assert opened_shelves.schema == cur.schema( 'opened_shelves'), 'Bad schema: opened_shelves'
def read(self, path, length, offset, fh): # FIXME: this might break shadow directories and shadow files # but those have not been made to work with subs anyway rsp = self.librarian(self.lcp('get_shelf', path=path)) shelf = TMShelf(rsp) return self.shadow.read(shelf, length, offset, fh)