예제 #1
0
 def parse(self, filename):
     size = os.stat(filename).st_size
     remaining = size
     f = open(filename, 'r')
     buf = Buffer(f.read())
     f.close()
     rowsizes = SSTableMetadata.unpack_estimated_histogram(buf)
     colcounts = self.unpack_estimated_histogram(buf)
     replaysegid = buf.unpack_longlong()
     replaypos = buf.unpack_int()
     tsmin = buf.unpack_longlong()
     tsmax = buf.unpack_longlong()
     maxlocaldeletiontime = buf.unpack_int()
     bloomfilterfpchance = buf.unpack_double()
     compressionratio = buf.unpack_double()
     partitioner = buf.unpack_utf_string()
     ancestorscount = buf.unpack_int()
     ancestors = []
     for i in xrange(ancestorscount):
         ancestors.append(buf.unpack_int())
     tombstonehistogram = self.unpack_streaming_histogram(buf)
     sstablelevel = 0
     if (buf.available()):
         sstablelevel = buf.unpack_int()
     mincolnames = []
     maxcolnames = []
     count = buf.unpack_int()
     for i in xrange(count):
         mincolnames.append(buf.unpack_utf_string())
     count = buf.unpack_int()
     for i in xrange(count):
         maxcolnames.append(buf.unpack_utf_string())
     return SSTableMetadata(rowsizes, colcounts, replaysegid, replaypos,
                            tsmin, tsmax, maxlocaldeletiontime,
                            bloomfilterfpchance, compressionratio,
                            partitioner, ancestors, tombstonehistogram,
                            sstablelevel, mincolnames, maxcolnames)
예제 #2
0
class WoOram:
    def __init__(self, backend, sup, drip_rate, drip_time):
        self.backend = backend
        self.vtable = sup.vtable
        self.blocksize = sup.blocksize
        self.headerlen = sup.headerlen
        self.N = sup.total_blocks
        self.K = drip_rate
        self.T = drip_time
        self.fbsize = sup.fbsize
        self.split_maxnum = sup.split_maxnum
        self.split_maxsize = sup.split_maxsize

        self.buf = Buffer()
        self.rlock, self.wlock = get_rw_locks()
        self.syncer = Syncer(self, self.T)

        self.active = False  # is the sync thread running
        self.syncing = False  # is a sync operation in progress
        self.recent = None  # set of (vnode, boff) pairs for what has changed during the sync op

    def start(self):
        if self.T > 0:
            self.active = True
            self.syncer.start()
        else:
            print("NOTE: sync thread not actually started...")

    def finish(self):
        """Waits until the buffer has been cleared, then stops the syncer and returns."""
        if self.active:
            self.active = False
            print("Waiting for the sync thread to finish...", file=sys.stderr)
            self.syncer.join()

    def __enter__(self):
        self.start()
        return self

    def __exit__(self, et, ev, tb):
        self.finish()

    def __len__(self):
        """Returns the number of distinct items stored (i.e., # of vnodes)"""
        return len(self.vtable)

    def size(self):
        """Returns the total size (in bytes) of all items stored. (Warning: slow)"""
        with self.rlock:
            return sum(self.get_size(v) for v in self.vtable)

    def num_blocks(self, vnode):
        """Returns the number of blocks this file occupies."""
        return len(self.vtable.get_info(vnode).inodes)

    def get_size(self, vnode):
        """The number of bytes of data stored for the given object."""
        return self.vtable.get_size(vnode)

    def get_mtime(self, vnode):
        """The last modification time of the given object."""
        return self.vtable.get_mtime(vnode)

    def set_mtime(self, vnode, when=None):
        if when is None:
            when = time.time()
        self.vtable.set_mtime(vnode, when)

    def capacity(self):
        """The total space avaiable (in bytes) in the backend."""
        return self.blocksize * self.N

    def _make_block(self, b1, b2):
        """Creates a new block with the given contents on either side.
        Each should be a Block object.
        The block is padded up to self.blocksize.
        """
        # TODO make more efficiently indexable storage representation?
        block = pickle.dumps((b1.contents, b2.contents))
        assert len(block) <= self.blocksize - self.headerlen
        return block + b'\0' * (self.blocksize - len(block) - self.headerlen)

    def _get_backend(self, ind):
        """Returns a tuple of block objects stored at the given index."""
        res = []
        try:
            raw = self.backend[ind]
        except IndexError:
            # this could be a normal error, just a new repository
            res = None
        except:
            res = None
            print("WARNING: error fetching", ind,
                  "from backend. Maybe wrong key?")
        if res is not None:
            try:
                parts = pickle.loads(raw)
            except:
                parts = None
            if type(parts) is not tuple:
                print("WARNING: error unpickling", ind, "from backend")
                res = None
        if res is not None:
            for contents in parts:
                if contents is None:
                    kind = Block.EMPTY
                elif type(contents) is dict:
                    kind = Block.SPLIT
                elif type(contents) is tuple and len(contents) == 2:
                    kind = Block.FULL
                else:
                    print("WARNING: messed up parts in", ind, "from backend")
                    res = None
                    break
                res.append(Block(self, kind, contents))
        if res is None or len(res) != 2:
            return tuple(Block(self, Block.EMPTY) for _ in range(2))
        else:
            return tuple(res)

    def _get_fresh(self, ind):
        """Gets the pair of Blocks stored at the given index,
        after removing anything that's stale."""
        res = []
        inode0 = 2 * ind
        with self.rlock:
            parts = self._get_backend(ind)
            for j, blk in enumerate(parts):
                inode = inode0 + j
                if blk.kind() == Block.SPLIT:
                    stale = [
                        vnode for vnode in blk.contents
                        if self.vtable.is_stale(vnode, inode)
                    ]
                    for vnode in stale:
                        del blk.contents[vnode]
                    if len(blk.contents) == 0:
                        # all entries in split block are stale, so it's considered an empty block
                        blk = Block(self, Block.EMPTY)
                elif blk.kind() == Block.FULL:
                    if self.vtable.is_stale(blk.contents[0], inode):
                        # full block is stale, so it's actually empty
                        blk = Block(self, Block.EMPTY)
                res.append(blk)
        return res

    def _fetch_block_inode(self, vnode, inode, split):
        """Gets the contents of the given vnode stored in backend at the given
        inode. split is a bool indicating whether it's an sblock."""
        assert 0 <= inode < 2 * self.N
        parts = self._get_backend(inode // 2)
        if split:
            for blk in parts:
                if blk.kind() == Block.SPLIT and vnode in blk.contents:
                    return blk.contents[vnode]
        else:
            blk = parts[inode % 2]
            if blk.kind() == Block.FULL and blk.contents[0] == vnode:
                return blk.contents[1]
        return None

    def _fetch_backend(self, vnode, boff):
        with self.rlock:
            inode, split = self.vtable.get_inodes(vnode)[boff]
            if inode < 0:
                return None
            else:
                return self._fetch_block_inode(vnode, inode, split)

    def get(self, vnode, boff):
        """Returns a bytes object for the specified data fragment.
        KeyError if vnode is invalid.
        IndexError if boff is invalid.
        None if the data is inaccessible for some other reason.
        """
        with self.rlock:
            res = self.buf.get(vnode, boff)
            if res is None:
                res = self._fetch_backend(vnode, boff)
        if DEBUG:
            print("wooram: get: buf[{}:{}]=>len({})".format(
                vnode, boff,
                len(res) if res else None),
                  file=sys.stderr)
        return res

    def set(self, vnode, boff, data):
        if len(data) == 0:
            raise ValueError(
                "can't set fragment to empty. Use resize instead.")

        with self.wlock:
            if self.syncing: self.recent.add((vnode, boff))
            self.vtable.change_inode(vnode, boff, len(data))
            self.buf.set(vnode, boff, data)

        if DEBUG:
            print("wooram: set: buf[{}:{}]<=len({})".format(
                vnode, boff,
                len(data) if data else None),
                  file=sys.stderr)

    def new(self):
        return self.vtable.new()

    def delete(self, vnode):
        with self.wlock:
            size = self.num_blocks(vnode)
            if self.syncing:
                self.recent.update((vnode, boff) for boff in range(size))
            self.buf.pop((vnode, boff) for boff in range(size))
            del self.vtable[vnode]

    def resize(self, vnode, size):
        """sets the length in bytes of vnode to the given value."""
        num = math.ceil(size / self.fbsize)
        lbsize = size - self.fbsize * (num - 1)
        with self.wlock:
            info = self.vtable.get_info(vnode)
            curnum = len(info.inodes)
            curlbs = info.lbsize
            if num < curnum:
                # truncating
                self.vtable.trunc_inodes(vnode, num)
                if lbsize < self.fbsize:
                    data = self.get(vnode, num - 1)[:lbsize]
                    self.set(vnode, num - 1, data)
            elif num > curnum:
                # growing
                if curlbs < self.fbsize:
                    # need to pad last block with null bytes
                    data = self.get(
                        vnode, curnum - 1) + b'\0' * (self.fbsize - curlbs)
                    assert len(data) == self.fbsize
                    self.set(vnode, curnum - 1, data)
                for boff in range(curnum, num - 1):
                    self.set(vnode, boff, b'\0' * self.fbsize)
                self.set(vnode, num - 1, b'\0' * lbsize)
            elif lbsize != curlbs:
                data = self.get(vnode, num - 1)
                if lbsize < curlbs:
                    # truncating last block
                    self.set(vnode, num - 1, data[:lbsize])
                else:
                    # growing last block
                    self.set(vnode, num - 1, data + b'\0' * (lbsize - curlbs))

    def sync(self):
        evict_ind = random.sample(range(1, self.N), self.K)

        with self.wlock:
            if self.syncing:
                print("WARNING: SYNC OVERLAP!!")
                print(
                    "You should decrease the drip_rate or increase the drip_time."
                )
                print(
                    "This sync attempt is aborting. Your privacy may be compromised."
                )
                return
            self.syncing = True
            self.recent = set()

        with self.rlock:
            evict_blocks = [self._get_fresh(ind) for ind in evict_ind]
            avail = self.buf.available()

        # compute available space, pre-compacting sblocks when possible
        for blist in evict_blocks:
            if all(b.kind() == Block.SPLIT for b in blist):
                # two sblocks. can they fit into one?
                if sum(b.size() for b in blist) <= self.split_maxsize:
                    # yes!
                    blist[0].contents.update(blist[1].contents)
                    blist[1] = Block(self, Block.EMPTY)

        blocks = [b for blist in evict_blocks for b in blist]
        assert len(blocks) == 2 * self.K

        # pack items from the buffer
        to_pop = []
        for vnode, boff, data in avail:
            assert len(data) > 0
            # try every block, sorting smallest first so best-fit
            blocks.sort(key=lambda b: b.space_avail())
            for b in blocks:
                if b.add_if(vnode, boff, data):
                    break

        # write back blocks to backend
        for ind, (b1, b2) in zip(evict_ind, evict_blocks):
            self.backend[ind] = self._make_block(b1, b2)

        with self.wlock:
            # update vtable for what was added
            for i, blist in enumerate(evict_blocks):
                inode0 = 2 * evict_ind[i]
                for j in range(2):
                    for (vnode, boff) in blist[j].added():
                        if (vnode, boff) not in self.recent:
                            self.vtable.set_inode(vnode, boff, inode0 + j)
                            to_pop.append((vnode, boff))

        with self.rlock:
            save_superblock(self.backend, self.vtable, self.blocksize, self.N,
                            self.headerlen)

        with self.wlock:
            # now that all is set, remove added items from buffer
            self.buf.pop(to_pop)
            self.recent = None
            self.syncing = False
예제 #3
0
class WoOram:
    def __init__(self, backend, sup, drip_rate, drip_time):
        self.backend = backend
        self.vtable = sup.vtable
        self.blocksize = sup.blocksize
        self.headerlen = sup.headerlen
        self.N = sup.total_blocks
        self.K = drip_rate
        self.T = drip_time
        self.fbsize = sup.fbsize
        self.split_maxnum = sup.split_maxnum
        self.split_maxsize = sup.split_maxsize

        self.buf = Buffer()
        self.rlock, self.wlock = get_rw_locks()
        self.syncer = Syncer(self, self.T)

        self.active = False # is the sync thread running
        self.syncing = False # is a sync operation in progress
        self.recent = None # set of (vnode, boff) pairs for what has changed during the sync op

    def start(self):
        if self.T > 0:
            self.active = True
            self.syncer.start()
        else:
            print("NOTE: sync thread not actually started...")

    def finish(self):
        """Waits until the buffer has been cleared, then stops the syncer and returns."""
        if self.active:
            self.active = False
            print("Waiting for the sync thread to finish...", file=sys.stderr)
            self.syncer.join()

    def __enter__(self):
        self.start()
        return self

    def __exit__(self, et, ev, tb):
        self.finish()

    def __len__(self):
        """Returns the number of distinct items stored (i.e., # of vnodes)"""
        return len(self.vtable)

    def size(self):
        """Returns the total size (in bytes) of all items stored. (Warning: slow)"""
        with self.rlock:
            return sum(self.get_size(v) for v in self.vtable)

    def num_blocks(self, vnode):
        """Returns the number of blocks this file occupies."""
        return len(self.vtable.get_info(vnode).inodes)

    def get_size(self, vnode):
        """The number of bytes of data stored for the given object."""
        return self.vtable.get_size(vnode)

    def get_mtime(self, vnode):
        """The last modification time of the given object."""
        return self.vtable.get_mtime(vnode)

    def set_mtime(self, vnode, when=None):
        if when is None:
            when = time.time()
        self.vtable.set_mtime(vnode, when)

    def capacity(self):
        """The total space avaiable (in bytes) in the backend."""
        return self.blocksize * self.N

    def _make_block(self, b1, b2):
        """Creates a new block with the given contents on either side.
        Each should be a Block object.
        The block is padded up to self.blocksize.
        """
        # TODO make more efficiently indexable storage representation?
        block = pickle.dumps((b1.contents, b2.contents))
        assert len(block) <= self.blocksize - self.headerlen
        return block + b'\0'*(self.blocksize - len(block) - self.headerlen)

    def _get_backend(self, ind):
        """Returns a tuple of block objects stored at the given index."""
        res = []
        try:
            raw = self.backend[ind]
        except IndexError:
            # this could be a normal error, just a new repository
            res = None
        except:
            res = None
            print("WARNING: error fetching", ind, "from backend. Maybe wrong key?")
        if res is not None:
            try:
                parts = pickle.loads(raw)
            except:
                parts = None
            if type(parts) is not tuple:
                print("WARNING: error unpickling", ind, "from backend")
                res = None
        if res is not None:
            for contents in parts:
                if contents is None:
                    kind = Block.EMPTY
                elif type(contents) is dict:
                    kind = Block.SPLIT
                elif type(contents) is tuple and len(contents) == 2:
                    kind = Block.FULL
                else:
                    print("WARNING: messed up parts in", ind, "from backend")
                    res = None
                    break
                res.append(Block(self, kind, contents))
        if res is None or len(res) != 2:
            return tuple(Block(self, Block.EMPTY) for _ in range(2))
        else:
            return tuple(res)

    def _get_fresh(self, ind):
        """Gets the pair of Blocks stored at the given index,
        after removing anything that's stale."""
        res = []
        inode0 = 2*ind
        with self.rlock:
            parts = self._get_backend(ind)
            for j, blk in enumerate(parts):
                inode = inode0+j
                if blk.kind() == Block.SPLIT:
                    stale = [vnode for vnode in blk.contents 
                            if self.vtable.is_stale(vnode, inode)]
                    for vnode in stale:
                        del blk.contents[vnode]
                    if len(blk.contents) == 0:
                        # all entries in split block are stale, so it's considered an empty block
                        blk = Block(self, Block.EMPTY)
                elif blk.kind() == Block.FULL:
                    if self.vtable.is_stale(blk.contents[0], inode):
                        # full block is stale, so it's actually empty
                        blk = Block(self, Block.EMPTY)
                res.append(blk)
        return res

    def _fetch_block_inode(self, vnode, inode, split):
        """Gets the contents of the given vnode stored in backend at the given
        inode. split is a bool indicating whether it's an sblock."""
        assert 0 <= inode < 2*self.N
        parts = self._get_backend(inode//2)
        if split:
            for blk in parts:
                if blk.kind() == Block.SPLIT and vnode in blk.contents:
                    return blk.contents[vnode]
        else:
            blk = parts[inode % 2]
            if blk.kind() == Block.FULL and blk.contents[0] == vnode:
                return blk.contents[1]
        return None

    def _fetch_backend(self, vnode, boff):
        with self.rlock:
            inode, split = self.vtable.get_inodes(vnode)[boff]
            if inode < 0:
                return None
            else:
                return self._fetch_block_inode(vnode, inode, split)

    def get(self, vnode, boff):
        """Returns a bytes object for the specified data fragment.
        KeyError if vnode is invalid.
        IndexError if boff is invalid.
        None if the data is inaccessible for some other reason.
        """
        with self.rlock:
            res = self.buf.get(vnode, boff)
            if res is None:
                res = self._fetch_backend(vnode, boff)
        if DEBUG: print("wooram: get: buf[{}:{}]=>len({})".format(vnode,boff,len(res) if res else None), file=sys.stderr)
        return res

    def set(self, vnode, boff, data):
        if len(data) == 0:
            raise ValueError("can't set fragment to empty. Use resize instead.")

        with self.wlock:
            if self.syncing: self.recent.add((vnode, boff))
            self.vtable.change_inode(vnode, boff, len(data))
            self.buf.set(vnode, boff, data)

        if DEBUG: print("wooram: set: buf[{}:{}]<=len({})".format(vnode,boff,len(data) if data else None), file=sys.stderr)

    def new(self):
        return self.vtable.new()

    def delete(self, vnode):
        with self.wlock:
            size = self.num_blocks(vnode)
            if self.syncing:
                self.recent.update((vnode, boff) for boff in range(size))
            self.buf.pop((vnode, boff) for boff in range(size))
            del self.vtable[vnode]

    def resize(self, vnode, size):
        """sets the length in bytes of vnode to the given value."""
        num = math.ceil(size / self.fbsize)
        lbsize = size - self.fbsize*(num-1)
        with self.wlock:
            info = self.vtable.get_info(vnode)
            curnum = len(info.inodes)
            curlbs = info.lbsize
            if num < curnum:
                # truncating
                self.vtable.trunc_inodes(vnode, num)
                if lbsize < self.fbsize:
                    data = self.get(vnode, num-1)[:lbsize]
                    self.set(vnode, num-1, data)
            elif num > curnum:
                # growing
                if curlbs < self.fbsize:
                    # need to pad last block with null bytes
                    data = self.get(vnode, curnum-1) + b'\0'*(self.fbsize - curlbs)
                    assert len(data) == self.fbsize
                    self.set(vnode, curnum-1, data)
                for boff in range(curnum, num-1):
                    self.set(vnode, boff, b'\0'*self.fbsize)
                self.set(vnode, num-1, b'\0'*lbsize)
            elif lbsize != curlbs:
                data = self.get(vnode, num-1)
                if lbsize < curlbs:
                    # truncating last block
                    self.set(vnode, num-1, data[:lbsize])
                else:
                    # growing last block
                    self.set(vnode, num-1, data + b'\0'*(lbsize-curlbs))

    def sync(self):
        evict_ind = random.sample(range(1,self.N), self.K)

        with self.wlock:
            if self.syncing:
                print("WARNING: SYNC OVERLAP!!")
                print("You should decrease the drip_rate or increase the drip_time.")
                print("This sync attempt is aborting. Your privacy may be compromised.")
                return
            self.syncing = True
            self.recent = set()

        with self.rlock:
            evict_blocks = [self._get_fresh(ind) for ind in evict_ind]
            avail = self.buf.available()

        # compute available space, pre-compacting sblocks when possible
        for blist in evict_blocks:
            if all(b.kind() == Block.SPLIT for b in blist):
                # two sblocks. can they fit into one?
                if sum(b.size() for b in blist) <= self.split_maxsize:
                    # yes!
                    blist[0].contents.update(blist[1].contents)
                    blist[1] = Block(self, Block.EMPTY)

        blocks = [b for blist in evict_blocks for b in blist]
        assert len(blocks) == 2*self.K

        # pack items from the buffer
        to_pop = []
        for vnode, boff, data in avail:
            assert len(data) > 0
            # try every block, sorting smallest first so best-fit
            blocks.sort(key=lambda b: b.space_avail())
            for b in blocks:
                if b.add_if(vnode, boff, data):
                    break

        # write back blocks to backend
        for ind, (b1, b2) in zip(evict_ind, evict_blocks):
            self.backend[ind] = self._make_block(b1,b2)

        with self.wlock:
            # update vtable for what was added
            for i, blist in enumerate(evict_blocks):
                inode0 = 2*evict_ind[i]
                for j in range(2):
                    for (vnode, boff) in blist[j].added():
                        if (vnode,boff) not in self.recent:
                            self.vtable.set_inode(vnode, boff, inode0+j)
                            to_pop.append((vnode, boff))

        with self.rlock:
            save_superblock(self.backend, 
                    self.vtable, self.blocksize, self.N, self.headerlen)

        with self.wlock:
            # now that all is set, remove added items from buffer
            self.buf.pop(to_pop)
            self.recent = None
            self.syncing = False