Ejemplo n.º 1
0
    async def copyslab(self, dstpath, compact=True):

        dstpath = s_common.genpath(dstpath)
        if os.path.isdir(dstpath):
            raise s_exc.DataAlreadyExists()

        s_common.gendir(dstpath)

        dstoptspath = s_common.switchext(dstpath, ext='.opts.yaml')

        await self.sync()

        self.lenv.copy(str(dstpath), compact=compact)

        try:
            shutil.copy(self.optspath, dstoptspath)
        except FileNotFoundError:  # pragma: no cover
            pass

        return True
Ejemplo n.º 2
0
    def test_switchext(self):
        retn = s_common.switchext('foo.txt', ext='.rdf')
        self.eq(retn, s_common.genpath('foo.rdf'))

        retn = s_common.switchext('.vim', ext='.rdf')
        self.eq(retn, s_common.genpath('.vim.rdf'))
Ejemplo n.º 3
0
    async def __anit__(self, path, **kwargs):

        await s_base.Base.__anit__(self)

        kwargs.setdefault('map_size', self.DEFAULT_MAPSIZE)
        kwargs.setdefault('lockmemory', False)
        kwargs.setdefault('map_async', True)

        opts = kwargs

        self.path = path
        self.optspath = s_common.switchext(path, ext='.opts.yaml')

        # Make sure we don't have this lmdb DB open already.  (This can lead to seg faults)
        if path in self.allslabs:
            raise s_exc.SlabAlreadyOpen(mesg=path)

        if os.path.isfile(self.optspath):
            opts.update(s_common.yamlload(self.optspath))

        initial_mapsize = opts.get('map_size')
        if initial_mapsize is None:
            raise s_exc.BadArg('Slab requires map_size')

        mdbpath = s_common.genpath(path, 'data.mdb')
        if os.path.isfile(mdbpath):
            mapsize = max(initial_mapsize, os.path.getsize(mdbpath))
        else:
            mapsize = initial_mapsize

        # save the transaction deltas in case of error...
        self.xactops = []
        self.max_xactops_len = opts.pop('max_replay_log', 10000)
        self.recovering = False

        opts.setdefault('max_dbs', 128)
        opts.setdefault('writemap', True)

        self.maxsize = opts.pop('maxsize', None)
        self.growsize = opts.pop('growsize', self.DEFAULT_GROWSIZE)

        self.readonly = opts.get('readonly', False)
        self.lockmemory = opts.pop('lockmemory', False)

        if self.lockmemory:
            lockmem_override = s_common.envbool('SYN_LOCKMEM_DISABLE')
            if lockmem_override:
                logger.info(f'SYN_LOCKMEM_DISABLE envar set, skipping lockmem for {self.path}')
                self.lockmemory = False

        self.mapasync = opts.setdefault('map_async', True)

        self.mapsize = _mapsizeround(mapsize)
        if self.maxsize is not None:
            self.mapsize = min(self.mapsize, self.maxsize)

        self._saveOptsFile()

        self.lenv = lmdb.open(str(path), **opts)
        self.allslabs[path] = self

        self.scans = set()

        self.dirty = False
        if self.readonly:
            self.xact = None
            self.txnrefcount = 0
        else:
            self._initCoXact()

        self.resizeevent = threading.Event()  # triggered when a resize event occurred
        self.lockdoneevent = asyncio.Event()  # triggered when a memory locking finished

        # LMDB layer uses these for status reporting
        self.locking_memory = False
        self.prefaulting = False
        self.memlocktask = None
        self.max_could_lock = 0
        self.lock_progress = 0
        self.lock_goal = 0

        if self.lockmemory:
            async def memlockfini():
                self.resizeevent.set()
                await self.memlocktask
            self.memlocktask = s_coro.executor(self._memorylockloop)
            self.onfini(memlockfini)
        else:
            self.lockdoneevent.set()

        self.dbnames = {None: (None, False)}  # prepopulate the default DB for speed

        self.onfini(self._onSlabFini)

        self.commitstats = collections.deque(maxlen=1000)  # stores Tuple[time, replayloglen, commit time delta]

        if not self.readonly:
            await Slab.initSyncLoop(self)
Ejemplo n.º 4
0
    async def cull(self, offs: int) -> bool:
        '''
        Remove entries up to (and including) the given offset.
        '''

        logger.info('Culling %s at offs %d', self.dirn, offs)

        # Note:  we don't bother deleting the rows from inside a partially culled slab.  We just update self.firstindx
        # so nothing will return those rows anymore.  We only delete from disk entire slabs once they are culled.

        if offs < self.firstindx:
            logger.warning('Unable to cull %s; offs (%d) < starting indx (%d)',
                           self.dirn, offs, self.firstindx)
            return False

        # We keep at least one entry;  this avoids offsets possibly going lower after a restart
        if offs >= self.indx - 1:
            logger.warning(
                'Unable to cull %s at offs %d; must keep at least one entry',
                self.dirn, offs)
            return False

        if self._cacheridx is not None:
            self._cacheridx = None
            assert self._cacheslab
            await self._cacheslab.fini()
            self._cacheslab = self._cacheseqn = None

        del_ridx = None
        for ridx in range(len(self._ranges) - 1):
            startidx = self._ranges[ridx]

            if self._openslabs.get(startidx):
                raise s_exc.SlabInUse(
                    mesg='Attempt to cull while another task is still using it'
                )

            fn = self.slabFilename(self.dirn, startidx)
            if offs < self._ranges[ridx + 1] - 1:
                logger.warning(
                    'Log %s will not be deleted since offs is less than last indx',
                    fn)
                break

            optspath = s_common.switchext(fn, ext='.opts.yaml')
            try:
                os.unlink(optspath)
            except FileNotFoundError:  # pragma: no cover
                pass

            logger.info('Removing log %s with startidx %d', fn, startidx)
            shutil.rmtree(fn)
            del_ridx = ridx

            await asyncio.sleep(0)

        self.firstindx = offs + 1
        self._setFirstIndx(self.tailslab, offs + 1)

        if del_ridx is not None:
            del self._ranges[:del_ridx + 1]

        # Log if there was an attempt to cull into the tailseqn
        if offs >= self._ranges[-1]:
            fn = self.tailslab.path
            logger.warning(
                'Log %s will not be deleted since offs is in the currently active log',
                fn)

        return True