def _parse_header(self):
     '''
     Read and parse the next header. Returns whether or not
     a header is found.
     '''
     header_bytes = self._bio.read(ClfsObjParseBase.OBTYPE_HEADER_BYTES)
     self._last_read_length = len(header_bytes)
     if not header_bytes:
         return False
     if self._last_read_length != ClfsObjParseBase.OBTYPE_HEADER_BYTES:
         raise TargetObjectError(
             self._tobj, "short read %d != %d" %
             (len(header_bytes), ClfsObjParseBase.OBTYPE_HEADER_BYTES))
     self._magic, self._blobCount, self._realCount, handleType, _ = struct.unpack(
         _HEADER_PACK1, header_bytes)
     if self._magic != ClfsObjParseBase.OBTYPE_MAGIC_20:
         raise TargetObjectError(
             self._tobj, "header magic %s != %s" %
             (oct(self._magic), oct(ClfsObjParseBase.OBTYPE_MAGIC_20)))
     try:
         self._objHandleType = CLFSObjHandleType(handleType)
     except ValueError:
         raise TargetObjectError(self._tobj,
                                 "invalid handle type '%s'" % handleType)
     return True
def _obj_reconcile__repack_dirents(wrock, tobj, oblob, blob_name):
    '''
    Helper for obj_reconcile(). Invoked on OBTYPE_DIRENTS.
    oblob is the CLFSObjectBlob.  The purpose here is to replace the
    '..' entry with one that is correct for the current
    tobj.first_backpointer.
    Returns a tuple of (unparsed_data_for_blob, owner_id).
    '''
    ps = ParseState(tobj, oblob.data)
    ps.resume_from_oblob(oblob)
    dirent_list = ps.parse_dirents()
    if len(dirent_list) < 2:
        raise TargetObjectError(tobj,
                                "directory entry list short (length=%d)" %
                                len(dirent_list),
                                blob_name=blob_name)
    dotdot = dirent_list[1]
    if dotdot[0] != '..':
        raise TargetObjectError(tobj,
                                "second entry is not dotdot ('%s')" %
                                dotdot[0],
                                blob_name=blob_name)
    if tobj.first_backpointer == FILEHANDLE_NULL:
        if tobj.filehandle == FILEHANDLE_ROOT:
            dotdot[1] = tobj.filehandle
        else:
            wrock.logger.warning("%s reconcile %s appears to be orphaned",
                                 wrock, tobj.describe())
            dotdot[1] = FILEHANDLE_ORPHAN
    else:
        dotdot[1] = tobj.first_backpointer
    ba = unparse_dirents_list(dirent_list)
    assert len(ba) <= CLFSSegment.FIRST_SEGMENT_BYTES
    return ba, dotdot[1]
def obj_reconcile(wrock, tobj, content, desc, blob_name):
    '''
    Do the work to reconcile a target. tobj is the current
    database content with the backpointer map fully populated.
    content is the data payload of the object.
    desc is a description of the backing - eg 'blob' for Azure
    Return the new data payload for the object.
    '''
    try:
        ba = strip_compression_header_and_decompress(content, tobj, blob_name)
        parse_state = ParseState(tobj, ba)
        oblobs = parse_state.parse_shallow()
    except (NamedObjectError, TerminalError):
        raise
    except Exception:
        exc_log(wrock.logger, logging.ERROR, "inode %s parse failure", desc)
        raise TargetObjectError(
            tobj, "inode %s parse failure: %s" % (desc, exc_info_err()))
    vattr_bytes = bytearray()
    unparse_attr(wrock.run_options, vattr_bytes, tobj)
    backpointers = tobj.backpointer_list_generate(
        include_null_firstbackpointer=True)
    backpointer_bytes = bytearray()
    unparse_back(backpointer_bytes, backpointers)
    ba = bytearray()
    owner_id = None
    for oblob in oblobs:
        obtype = oblob.obtype
        if obtype == CLFSObjHandleType.OBTYPE_VATTR:
            data = vattr_bytes
        elif obtype == CLFSObjHandleType.OBTYPE_BACK:
            data = backpointer_bytes
        elif obtype == CLFSObjHandleType.OBTYPE_DIRENTS:
            data, owner_id = _obj_reconcile__repack_dirents(
                wrock, tobj, oblob, blob_name)
        else:
            data = oblob.data
        realCount = len(data)
        blobCountRaw = realCount + get_byte_count_header()
        blobCount = (blobCountRaw + ClfsObjParseBase.OBTYPE_ROUNDUP_SIZE -
                     1) & ClfsObjParseBase.OBTYPE_ROUNDUP_MASK
        padding = blobCount - blobCountRaw
        unparse_header(ba, obtype.value, realCount, blobCount)
        ba.extend(data)
        ba.extend(bytes(padding))
    if tobj.ftype == Ftype.DIR:
        if owner_id is None:
            raise TargetObjectError(tobj,
                                    "no dirents (internal error)",
                                    blob_name=blob_name)
    else:
        owner_id = tobj.first_backpointer
        if owner_id == FILEHANDLE_NULL:
            wrock.logger.warning(
                "%s reconcile %s appears to be orphaned with nlink_effective=%s",
                wrock, tobj.describe(), tobj.nlink_effective())
    return ba, owner_id
def strip_compression_header_and_decompress(ba, tobj, blob_name):
    '''
    strip 4-byte compression/encryption-type header
    uncompress buffer if header indicates it is compressed
    '''
    emode, cmode, ba = get_compress_mode(ba)
    if emode != CLFSEncryptionType.DISABLED:
        err = "unexpected emode %s %s" % (emode.__class__.__name__, emode)
        raise TargetObjectError(tobj, err, blob_name=blob_name)
    if cmode == CLFSCompressionType.DISABLED:
        pass
    elif cmode == CLFSCompressionType.LZ4:  #LZ4HC is not supported by armada_main
        ba = lz4.frame.decompress(ba)
    else:
        err = "unexpected cmode %s %s" % (cmode.__class__.__name__, cmode)
        raise TargetObjectError(tobj, err, blob_name=blob_name)
    return ba
    def add_block(self, blockIndex, blockId, allDone=False):
        '''Given an ObCacheId blockId at a blockIndex in the map, add it to
        list of blocks. If number of block pointers reaches limit
        _POINTERS_PER_BLOCK Flush indirect block and update
        _current_depth, _current_depth_blocks, and
        _current_block_list.
        '''
        if blockIndex < CLFSSegment.DIRECT_BLOCKS:
            self._direct_blocks.append(blockId)
            return list()

        if len(self._direct_blocks) > CLFSSegment.DIRECT_BLOCKS:
            self._wrock.logger.error(
                "too many directblocks for %s seen=%d max=%d",
                self._tobj.filehandle, len(self._direct_blocks),
                CLFSSegment.DIRECT_BLOCKS)
            raise TargetObjectError(self._tobj, "Too Many Direct Blocks")

        # starting on a new indirect tree. initialize IndirectBlockItems for all
        # non-leaf indir tree nodes.
        if not self._current_depth_blockitems:
            for d in range(0, self._current_depth + 1):
                self._current_depth_blockitems.append(IndirectBlockItem(0, d))

        assert len(self._current_depth_blockitems) == self._current_depth + 1

        self._flush_deque = collections.deque()
        self._treeblockcount += 1
        # if adding a new block results in one or more full indirect blocks,
        # self._flush_deque keeps track of the contents of these indirect blocks.
        if self._maybe_flush_block(newBlockId=blockId, allBlocksDone=allDone):
            # we move onto next tree
            assert self._treeblockcount == CLFSSegment.INDIR_TREE_ITEMS[
                self._current_depth]
            self._treeblockcount = 0
            self._current_depth += 1
            self._current_depth_blockitems = list()

        if self._current_depth >= CLFSSegment.MAX_INDIR_DEPTH:
            raise TargetObjectError(self._tobj,
                                    "Indirect tree too deep. File too large")

        return self._flush_deque
Exemple #6
0
 def _parse_bmap(self):
     parseDict = self.parseDict
     fsegBytes, otherSegBytes, directListSize = struct.unpack(_PARSE_BMAP_PACK1, self._read(12))
     parseDict['FirstSegmentBytes'] = fsegBytes
     parseDict['OtherSegmentBytes'] = otherSegBytes
     parseDict['DirectBlocks'] = [self._read_with_length16() for _ in range(directListSize)]
     indirListSize = struct.unpack(STRUCT_LE_U32, self._read(4))[0]
     if indirListSize != 4: # sanity check -- making indir size > 4 is not supported.
         raise TargetObjectError(self._tobj, "indirListSize has unexpected value %s" % indirListSize)
     parseDict['IndirectBlockTrees'] = [self._read_with_length16() for _ in range(indirListSize)]
Exemple #7
0
 def _read(self, length):
     '''
     Read and return up to length bytes. Returns zero-length
     bytes when there is nothing left.
     '''
     ret = self._bio.read(length)
     self._last_read_length = len(ret)
     if self._last_read_length != length:
         raise TargetObjectError(self._tobj, "short read %d != %d" % (len(ret), length))
     return ret
Exemple #8
0
 def parse(self):
     '''
     Iterate through the CLFS blobs loading the logical
     contents into self.parseDict
     '''
     while self._parse_header():
         try:
             parseFunc = self._PARSE_DICT[self._objHandleType]
         except KeyError:
             raise TargetObjectError(self._tobj, "unrecognized objHandleType %s" % self._objHandleType)
         parseFunc(self)
         padding = self._blobCount - self._realCount - ClfsObjParseBase.OBTYPE_HEADER_BYTES
         self._bio.seek(padding, io.SEEK_CUR)
Exemple #9
0
 def _do_read(wrock, read_obj, tobj, length, timer_name, expect_exact=False, zero_ok=True):
     '''
     Wrap read_obj.read(length) with appropriate exception handling.
     '''
     try:
         with wrock.timers.start(timer_name):
             ret = read_obj.read(length)
     except (NamedObjectError, TerminalError):
         raise
     except Exception as e:
         txt = e.__class__.__name__
         tmp = str(e)
         if tmp:
             txt += ' '
             txt += tmp
         raise TargetObjectError(tobj, txt) from e
     if zero_ok:
         return ret
     if expect_exact:
         if len(ret) != length:
             msg = "%s=%s expected=%d read %d bytes instead" % (read_obj.__class__.__name__, read_obj, length, len(ret))
             raise TargetObjectError(tobj, msg)
     return ret
    def _maybe_flush_block(self, newBlockId=None, allBlocksDone=False):
        '''
        When all blocks of a file have been flush and remaining data blocks need
        to be flushed, this method flushes any unflushed indirect blocks all the way
        to root of indirect block tree.

        Returns True if we need to move to next indirect tree; False otherwise
        '''
        for d in range(self._current_depth, -1, -1):
            flushitem = self._current_depth_blockitems[d]
            flushneeded = False
            if isinstance(newBlockId, ObCacheId):
                flushneeded = flushitem.add_block_to_list(newBlockId)
            if not flushneeded and not allBlocksDone:
                return False
            bdepth, blist = flushitem.get_info()

            # flush indirect block
            if d != bdepth:
                self._wrock.logger.error(
                    "afh %s depth mismatch: depth=%d flushitem_depth=%d current_depth=%d blocklist_size=%d",
                    self._tobj.filehandle, d, bdepth, self._current_depth,
                    len(blist))
                raise TargetObjectError(self._tobj,
                                        "depth mismatch in flush item")
            ibid, ibdata = unparse_indirect(self._run_options, self._tobj,
                                            blist, self._current_depth, bdepth)
            flushitem.set_block_id_and_data(ibid, ibdata)
            self._flush_deque.append(flushitem)

            if flushneeded and not allBlocksDone:
                # replace blockItem since the previous one is done and added to _flush_deque
                # number of block ids committed to storage is returned by flushitem.get_blocks_committed()
                # and is set to the _blocks_committed value for the newly created block.
                self._current_depth_blockitems[d] = IndirectBlockItem(
                    flushitem.get_blocks_committed(), d)
            newBlockId = ibid
        if d == 0:
            self._treeroots[self._current_depth] = newBlockId
            # stash block counts in  current tree (for testing/debugging)
            counts = list()
            for dpt in range(0, self._current_depth + 1):
                counts.append(
                    self._current_depth_blockitems[dpt].get_blocks_committed())
            self._tree_block_counts.append(counts)
            return True
        return False
def unparse_obj_handles(run_options,
                        tobj,
                        ba,
                        afh,
                        objBtypeList=None,
                        targetObj=None,
                        ownerFh=None,
                        dataOffset=None,
                        dataBa=None,
                        direntDataBa=None,
                        direntList=None,
                        directBlockList=None,
                        indirectBlockList=None,
                        backPointerList=None,
                        extattrDict=None,
                        targetName=None):
    '''
    return a byte array of data that can be put in container and is a valid
    clfs inode object. Not that bytearray returned does not have the
    4-byte compression+encryption header
    objBtype: type of object to be unparsed
    tObj: targetObj
    ownerfh: owning fh if datablock or parent dir if directory inode
    dataOffset: offset of data in file
    direntDataba: bytearray of dirent data for dir segments. len(bytes) is number of bytes to be written
    databa: bytearray of data. len(bytes) is number of bytes to be written
    direntList: list of (name,fh) tuples
    directBlockList: list of direct blocks in indices 0 to 1023
    indirectBlockList: list of indirect block pointer to the root of the indirect block trees
    backPointerList: list of parent points for a non-directory object
    extattrDict: dictionary of extended attributes and their values
    '''

    for objHandleType in objBtypeList:
        obdata = None
        if objHandleType == CLFSObjHandleType.OBTYPE_DATA:
            realCount = get_byte_count_data(dataBa)
        elif objHandleType == CLFSObjHandleType.OBTYPE_DIRENTS:
            if direntDataBa is not None:
                obdata = direntDataBa
            else:
                assert direntList
                obdata = unparse_dirents_list(direntList)
            realCount = len(obdata)
        elif objHandleType == CLFSObjHandleType.OBTYPE_VATTR:
            realCount = get_byte_count_attr()
        elif objHandleType == CLFSObjHandleType.OBTYPE_BMAP:
            if len(directBlockList) > CLFSSegment.DIRECT_BLOCKS:
                raise TargetObjectError(
                    tobj, "DirectBlockList Too Large: %d limit: %d" %
                    (len(directBlockList), CLFSSegment.DIRECT_BLOCKS))
            realCount = get_byte_count_bmap(directBlockList, indirectBlockList)
        elif objHandleType == CLFSObjHandleType.OBTYPE_INDIR:
            realCount = get_byte_count_indir(indirectBlockList)
        elif objHandleType == CLFSObjHandleType.OBTYPE_BACK:
            realCount = get_byte_count_back(backPointerList)
        elif objHandleType == CLFSObjHandleType.OBTYPE_DATABACK:
            realCount = get_byte_count_databack(ownerFh)
        elif objHandleType == CLFSObjHandleType.OBTYPE_EXTATTRS:
            obdata = unparse_extattrs(extattrDict)
            realCount = len(obdata)
        elif objHandleType == CLFSObjHandleType.OBTYPE_NAME:
            obdata = unparse_name(targetName)
            realCount = len(obdata)
        else:
            raise TargetObjectError(
                tobj,
                "blob byteCount not implemented for objHandleType %s %s" %
                (objHandleType.__class__.__name__, objHandleType))

        blobCountRaw = realCount + get_byte_count_header()
        blobCount = (blobCountRaw + ClfsObjParseBase.OBTYPE_ROUNDUP_SIZE -
                     1) & ClfsObjParseBase.OBTYPE_ROUNDUP_MASK
        padding = blobCount - blobCountRaw
        unparse_header(ba, objHandleType.value, realCount, blobCount)

        if objHandleType == CLFSObjHandleType.OBTYPE_DATA:
            unparse_data(ba, dataBa)
        elif objHandleType == CLFSObjHandleType.OBTYPE_VATTR:
            unparse_attr(run_options, ba, targetObj)
        elif objHandleType == CLFSObjHandleType.OBTYPE_BMAP:
            if targetObj.ftype == Ftype.DIR:
                otherSegBytes = CLFSSegment.DIR_OTHER_SEGMENT_BYTES
            else:
                otherSegBytes = CLFSSegment.OTHER_SEGMENT_BYTES
            assert afh == targetObj.filehandle
            unparse_bmap(ba, CLFSSegment.FIRST_SEGMENT_BYTES, otherSegBytes,
                         directBlockList, indirectBlockList)
        elif objHandleType == CLFSObjHandleType.OBTYPE_INDIR:
            unparse_indir(ba, indirectBlockList)
        elif objHandleType == CLFSObjHandleType.OBTYPE_BACK:
            unparse_back(ba, backPointerList)
        elif objHandleType == CLFSObjHandleType.OBTYPE_DATABACK:
            unparse_databack(ba, int(tobj.ctime), dataOffset, ownerFh)
        else:
            ba.extend(obdata)

        if padding > 0:
            ba.extend(bytes(padding))

    return ba