def _parse_header(self): ''' Read and parse the next header. Returns whether or not a header is found. ''' header_bytes = self._bio.read(ClfsObjParseBase.OBTYPE_HEADER_BYTES) self._last_read_length = len(header_bytes) if not header_bytes: return False if self._last_read_length != ClfsObjParseBase.OBTYPE_HEADER_BYTES: raise TargetObjectError( self._tobj, "short read %d != %d" % (len(header_bytes), ClfsObjParseBase.OBTYPE_HEADER_BYTES)) self._magic, self._blobCount, self._realCount, handleType, _ = struct.unpack( _HEADER_PACK1, header_bytes) if self._magic != ClfsObjParseBase.OBTYPE_MAGIC_20: raise TargetObjectError( self._tobj, "header magic %s != %s" % (oct(self._magic), oct(ClfsObjParseBase.OBTYPE_MAGIC_20))) try: self._objHandleType = CLFSObjHandleType(handleType) except ValueError: raise TargetObjectError(self._tobj, "invalid handle type '%s'" % handleType) return True
def _obj_reconcile__repack_dirents(wrock, tobj, oblob, blob_name): ''' Helper for obj_reconcile(). Invoked on OBTYPE_DIRENTS. oblob is the CLFSObjectBlob. The purpose here is to replace the '..' entry with one that is correct for the current tobj.first_backpointer. Returns a tuple of (unparsed_data_for_blob, owner_id). ''' ps = ParseState(tobj, oblob.data) ps.resume_from_oblob(oblob) dirent_list = ps.parse_dirents() if len(dirent_list) < 2: raise TargetObjectError(tobj, "directory entry list short (length=%d)" % len(dirent_list), blob_name=blob_name) dotdot = dirent_list[1] if dotdot[0] != '..': raise TargetObjectError(tobj, "second entry is not dotdot ('%s')" % dotdot[0], blob_name=blob_name) if tobj.first_backpointer == FILEHANDLE_NULL: if tobj.filehandle == FILEHANDLE_ROOT: dotdot[1] = tobj.filehandle else: wrock.logger.warning("%s reconcile %s appears to be orphaned", wrock, tobj.describe()) dotdot[1] = FILEHANDLE_ORPHAN else: dotdot[1] = tobj.first_backpointer ba = unparse_dirents_list(dirent_list) assert len(ba) <= CLFSSegment.FIRST_SEGMENT_BYTES return ba, dotdot[1]
def obj_reconcile(wrock, tobj, content, desc, blob_name): ''' Do the work to reconcile a target. tobj is the current database content with the backpointer map fully populated. content is the data payload of the object. desc is a description of the backing - eg 'blob' for Azure Return the new data payload for the object. ''' try: ba = strip_compression_header_and_decompress(content, tobj, blob_name) parse_state = ParseState(tobj, ba) oblobs = parse_state.parse_shallow() except (NamedObjectError, TerminalError): raise except Exception: exc_log(wrock.logger, logging.ERROR, "inode %s parse failure", desc) raise TargetObjectError( tobj, "inode %s parse failure: %s" % (desc, exc_info_err())) vattr_bytes = bytearray() unparse_attr(wrock.run_options, vattr_bytes, tobj) backpointers = tobj.backpointer_list_generate( include_null_firstbackpointer=True) backpointer_bytes = bytearray() unparse_back(backpointer_bytes, backpointers) ba = bytearray() owner_id = None for oblob in oblobs: obtype = oblob.obtype if obtype == CLFSObjHandleType.OBTYPE_VATTR: data = vattr_bytes elif obtype == CLFSObjHandleType.OBTYPE_BACK: data = backpointer_bytes elif obtype == CLFSObjHandleType.OBTYPE_DIRENTS: data, owner_id = _obj_reconcile__repack_dirents( wrock, tobj, oblob, blob_name) else: data = oblob.data realCount = len(data) blobCountRaw = realCount + get_byte_count_header() blobCount = (blobCountRaw + ClfsObjParseBase.OBTYPE_ROUNDUP_SIZE - 1) & ClfsObjParseBase.OBTYPE_ROUNDUP_MASK padding = blobCount - blobCountRaw unparse_header(ba, obtype.value, realCount, blobCount) ba.extend(data) ba.extend(bytes(padding)) if tobj.ftype == Ftype.DIR: if owner_id is None: raise TargetObjectError(tobj, "no dirents (internal error)", blob_name=blob_name) else: owner_id = tobj.first_backpointer if owner_id == FILEHANDLE_NULL: wrock.logger.warning( "%s reconcile %s appears to be orphaned with nlink_effective=%s", wrock, tobj.describe(), tobj.nlink_effective()) return ba, owner_id
def strip_compression_header_and_decompress(ba, tobj, blob_name): ''' strip 4-byte compression/encryption-type header uncompress buffer if header indicates it is compressed ''' emode, cmode, ba = get_compress_mode(ba) if emode != CLFSEncryptionType.DISABLED: err = "unexpected emode %s %s" % (emode.__class__.__name__, emode) raise TargetObjectError(tobj, err, blob_name=blob_name) if cmode == CLFSCompressionType.DISABLED: pass elif cmode == CLFSCompressionType.LZ4: #LZ4HC is not supported by armada_main ba = lz4.frame.decompress(ba) else: err = "unexpected cmode %s %s" % (cmode.__class__.__name__, cmode) raise TargetObjectError(tobj, err, blob_name=blob_name) return ba
def add_block(self, blockIndex, blockId, allDone=False): '''Given an ObCacheId blockId at a blockIndex in the map, add it to list of blocks. If number of block pointers reaches limit _POINTERS_PER_BLOCK Flush indirect block and update _current_depth, _current_depth_blocks, and _current_block_list. ''' if blockIndex < CLFSSegment.DIRECT_BLOCKS: self._direct_blocks.append(blockId) return list() if len(self._direct_blocks) > CLFSSegment.DIRECT_BLOCKS: self._wrock.logger.error( "too many directblocks for %s seen=%d max=%d", self._tobj.filehandle, len(self._direct_blocks), CLFSSegment.DIRECT_BLOCKS) raise TargetObjectError(self._tobj, "Too Many Direct Blocks") # starting on a new indirect tree. initialize IndirectBlockItems for all # non-leaf indir tree nodes. if not self._current_depth_blockitems: for d in range(0, self._current_depth + 1): self._current_depth_blockitems.append(IndirectBlockItem(0, d)) assert len(self._current_depth_blockitems) == self._current_depth + 1 self._flush_deque = collections.deque() self._treeblockcount += 1 # if adding a new block results in one or more full indirect blocks, # self._flush_deque keeps track of the contents of these indirect blocks. if self._maybe_flush_block(newBlockId=blockId, allBlocksDone=allDone): # we move onto next tree assert self._treeblockcount == CLFSSegment.INDIR_TREE_ITEMS[ self._current_depth] self._treeblockcount = 0 self._current_depth += 1 self._current_depth_blockitems = list() if self._current_depth >= CLFSSegment.MAX_INDIR_DEPTH: raise TargetObjectError(self._tobj, "Indirect tree too deep. File too large") return self._flush_deque
def _parse_bmap(self): parseDict = self.parseDict fsegBytes, otherSegBytes, directListSize = struct.unpack(_PARSE_BMAP_PACK1, self._read(12)) parseDict['FirstSegmentBytes'] = fsegBytes parseDict['OtherSegmentBytes'] = otherSegBytes parseDict['DirectBlocks'] = [self._read_with_length16() for _ in range(directListSize)] indirListSize = struct.unpack(STRUCT_LE_U32, self._read(4))[0] if indirListSize != 4: # sanity check -- making indir size > 4 is not supported. raise TargetObjectError(self._tobj, "indirListSize has unexpected value %s" % indirListSize) parseDict['IndirectBlockTrees'] = [self._read_with_length16() for _ in range(indirListSize)]
def _read(self, length): ''' Read and return up to length bytes. Returns zero-length bytes when there is nothing left. ''' ret = self._bio.read(length) self._last_read_length = len(ret) if self._last_read_length != length: raise TargetObjectError(self._tobj, "short read %d != %d" % (len(ret), length)) return ret
def parse(self): ''' Iterate through the CLFS blobs loading the logical contents into self.parseDict ''' while self._parse_header(): try: parseFunc = self._PARSE_DICT[self._objHandleType] except KeyError: raise TargetObjectError(self._tobj, "unrecognized objHandleType %s" % self._objHandleType) parseFunc(self) padding = self._blobCount - self._realCount - ClfsObjParseBase.OBTYPE_HEADER_BYTES self._bio.seek(padding, io.SEEK_CUR)
def _do_read(wrock, read_obj, tobj, length, timer_name, expect_exact=False, zero_ok=True): ''' Wrap read_obj.read(length) with appropriate exception handling. ''' try: with wrock.timers.start(timer_name): ret = read_obj.read(length) except (NamedObjectError, TerminalError): raise except Exception as e: txt = e.__class__.__name__ tmp = str(e) if tmp: txt += ' ' txt += tmp raise TargetObjectError(tobj, txt) from e if zero_ok: return ret if expect_exact: if len(ret) != length: msg = "%s=%s expected=%d read %d bytes instead" % (read_obj.__class__.__name__, read_obj, length, len(ret)) raise TargetObjectError(tobj, msg) return ret
def _maybe_flush_block(self, newBlockId=None, allBlocksDone=False): ''' When all blocks of a file have been flush and remaining data blocks need to be flushed, this method flushes any unflushed indirect blocks all the way to root of indirect block tree. Returns True if we need to move to next indirect tree; False otherwise ''' for d in range(self._current_depth, -1, -1): flushitem = self._current_depth_blockitems[d] flushneeded = False if isinstance(newBlockId, ObCacheId): flushneeded = flushitem.add_block_to_list(newBlockId) if not flushneeded and not allBlocksDone: return False bdepth, blist = flushitem.get_info() # flush indirect block if d != bdepth: self._wrock.logger.error( "afh %s depth mismatch: depth=%d flushitem_depth=%d current_depth=%d blocklist_size=%d", self._tobj.filehandle, d, bdepth, self._current_depth, len(blist)) raise TargetObjectError(self._tobj, "depth mismatch in flush item") ibid, ibdata = unparse_indirect(self._run_options, self._tobj, blist, self._current_depth, bdepth) flushitem.set_block_id_and_data(ibid, ibdata) self._flush_deque.append(flushitem) if flushneeded and not allBlocksDone: # replace blockItem since the previous one is done and added to _flush_deque # number of block ids committed to storage is returned by flushitem.get_blocks_committed() # and is set to the _blocks_committed value for the newly created block. self._current_depth_blockitems[d] = IndirectBlockItem( flushitem.get_blocks_committed(), d) newBlockId = ibid if d == 0: self._treeroots[self._current_depth] = newBlockId # stash block counts in current tree (for testing/debugging) counts = list() for dpt in range(0, self._current_depth + 1): counts.append( self._current_depth_blockitems[dpt].get_blocks_committed()) self._tree_block_counts.append(counts) return True return False
def unparse_obj_handles(run_options, tobj, ba, afh, objBtypeList=None, targetObj=None, ownerFh=None, dataOffset=None, dataBa=None, direntDataBa=None, direntList=None, directBlockList=None, indirectBlockList=None, backPointerList=None, extattrDict=None, targetName=None): ''' return a byte array of data that can be put in container and is a valid clfs inode object. Not that bytearray returned does not have the 4-byte compression+encryption header objBtype: type of object to be unparsed tObj: targetObj ownerfh: owning fh if datablock or parent dir if directory inode dataOffset: offset of data in file direntDataba: bytearray of dirent data for dir segments. len(bytes) is number of bytes to be written databa: bytearray of data. len(bytes) is number of bytes to be written direntList: list of (name,fh) tuples directBlockList: list of direct blocks in indices 0 to 1023 indirectBlockList: list of indirect block pointer to the root of the indirect block trees backPointerList: list of parent points for a non-directory object extattrDict: dictionary of extended attributes and their values ''' for objHandleType in objBtypeList: obdata = None if objHandleType == CLFSObjHandleType.OBTYPE_DATA: realCount = get_byte_count_data(dataBa) elif objHandleType == CLFSObjHandleType.OBTYPE_DIRENTS: if direntDataBa is not None: obdata = direntDataBa else: assert direntList obdata = unparse_dirents_list(direntList) realCount = len(obdata) elif objHandleType == CLFSObjHandleType.OBTYPE_VATTR: realCount = get_byte_count_attr() elif objHandleType == CLFSObjHandleType.OBTYPE_BMAP: if len(directBlockList) > CLFSSegment.DIRECT_BLOCKS: raise TargetObjectError( tobj, "DirectBlockList Too Large: %d limit: %d" % (len(directBlockList), CLFSSegment.DIRECT_BLOCKS)) realCount = get_byte_count_bmap(directBlockList, indirectBlockList) elif objHandleType == CLFSObjHandleType.OBTYPE_INDIR: realCount = get_byte_count_indir(indirectBlockList) elif objHandleType == CLFSObjHandleType.OBTYPE_BACK: realCount = get_byte_count_back(backPointerList) elif objHandleType == CLFSObjHandleType.OBTYPE_DATABACK: realCount = get_byte_count_databack(ownerFh) elif objHandleType == CLFSObjHandleType.OBTYPE_EXTATTRS: obdata = unparse_extattrs(extattrDict) realCount = len(obdata) elif objHandleType == CLFSObjHandleType.OBTYPE_NAME: obdata = unparse_name(targetName) realCount = len(obdata) else: raise TargetObjectError( tobj, "blob byteCount not implemented for objHandleType %s %s" % (objHandleType.__class__.__name__, objHandleType)) blobCountRaw = realCount + get_byte_count_header() blobCount = (blobCountRaw + ClfsObjParseBase.OBTYPE_ROUNDUP_SIZE - 1) & ClfsObjParseBase.OBTYPE_ROUNDUP_MASK padding = blobCount - blobCountRaw unparse_header(ba, objHandleType.value, realCount, blobCount) if objHandleType == CLFSObjHandleType.OBTYPE_DATA: unparse_data(ba, dataBa) elif objHandleType == CLFSObjHandleType.OBTYPE_VATTR: unparse_attr(run_options, ba, targetObj) elif objHandleType == CLFSObjHandleType.OBTYPE_BMAP: if targetObj.ftype == Ftype.DIR: otherSegBytes = CLFSSegment.DIR_OTHER_SEGMENT_BYTES else: otherSegBytes = CLFSSegment.OTHER_SEGMENT_BYTES assert afh == targetObj.filehandle unparse_bmap(ba, CLFSSegment.FIRST_SEGMENT_BYTES, otherSegBytes, directBlockList, indirectBlockList) elif objHandleType == CLFSObjHandleType.OBTYPE_INDIR: unparse_indir(ba, indirectBlockList) elif objHandleType == CLFSObjHandleType.OBTYPE_BACK: unparse_back(ba, backPointerList) elif objHandleType == CLFSObjHandleType.OBTYPE_DATABACK: unparse_databack(ba, int(tobj.ctime), dataOffset, ownerFh) else: ba.extend(obdata) if padding > 0: ba.extend(bytes(padding)) return ba