Esempio n. 1
0
    def test_common_chunks(self):
        s = '123456789'
        parts = [chunk for chunk in s_common.chunks(s, 2)]
        self.eq(parts, ['12', '34', '56', '78', '9'])

        parts = [chunk for chunk in s_common.chunks(s, 100000)]
        self.eq(parts, [s])

        parts = [chunk for chunk in s_common.chunks(b'', 10000)]
        self.eq(parts, [b''])

        parts = [chunk for chunk in s_common.chunks([], 10000)]
        self.eq(parts, [[]])

        parts = [chunk for chunk in s_common.chunks('', 10000)]
        self.eq(parts, [''])

        parts = [chunk for chunk in s_common.chunks([1, 2, 3, 4, 5], 2)]
        self.eq(parts, [[1, 2], [3, 4], [5]])

        # set is unslicable
        with self.assertRaises(TypeError) as cm:
            parts = [chunk for chunk in s_common.chunks({1, 2, 3}, 10000)]

        # dict is unslicable
        with self.assertRaises(TypeError) as cm:
            parts = [chunk for chunk in s_common.chunks({1: 2}, 10000)]

        # empty dict is caught during the [0:0] slice
        with self.assertRaises(TypeError) as cm:
            parts = [chunk for chunk in s_common.chunks({}, 10000)]
Esempio n. 2
0
    def test_common_chunks(self):
        s = '123456789'
        parts = [chunk for chunk in s_common.chunks(s, 2)]
        self.eq(parts, ['12', '34', '56', '78', '9'])

        parts = [chunk for chunk in s_common.chunks(s, 100000)]
        self.eq(parts, [s])

        parts = [chunk for chunk in s_common.chunks(b'', 10000)]
        self.eq(parts, [b''])

        parts = [chunk for chunk in s_common.chunks([], 10000)]
        self.eq(parts, [[]])

        parts = [chunk for chunk in s_common.chunks('', 10000)]
        self.eq(parts, [''])

        parts = [chunk for chunk in s_common.chunks([1, 2, 3, 4, 5], 2)]
        self.eq(parts, [[1, 2], [3, 4], [5]])

        # set is unslicable
        with self.assertRaises(TypeError) as cm:
            parts = [chunk for chunk in s_common.chunks({1, 2, 3}, 10000)]

        # dict is unslicable
        with self.assertRaises(TypeError) as cm:
            parts = [chunk for chunk in s_common.chunks({1: 2}, 10000)]

        # empty dict is caught during the [0:0] slice
        with self.assertRaises(TypeError) as cm:
            parts = [chunk for chunk in s_common.chunks({}, 10000)]
Esempio n. 3
0
async def addFeedData(core, outp, feedformat, debug=False, *paths, chunksize=1000, offset=0):

    items = getItems(*paths)
    for path, item in items:

        bname = os.path.basename(path)

        tick = time.time()
        outp.printf(f'Adding items from [{path}]')

        foff = 0
        for chunk in s_common.chunks(item, chunksize):

            clen = len(chunk)
            if offset and foff + clen < offset:
                # We have not yet encountered a chunk which
                # will include the offset size.
                foff += clen
                continue

            await core.addFeedData(feedformat, chunk)

            foff += clen
            outp.printf(f'Added [{clen}] items from [{bname}] - offset [{foff}]')

        tock = time.time()

        outp.printf(f'Done consuming from [{bname}]')
        outp.printf(f'Took [{tock - tick}] seconds.')

    if debug:
        await s_cmdr.runItemCmdr(core, outp)
Esempio n. 4
0
    async def puts(self, items, seqn=None):
        '''
        Add the structured data from items to the CryoTank.

        Args:
            items (list):  A list of objects to store in the CryoTank.
            seqn (iden, offs): An iden / offset pair to record.

        Returns:
            int: The ending offset of the items or seqn.
        '''
        size = 0

        for chunk in s_common.chunks(items, 1000):
            metrics = self._items.save(chunk)
            self._metrics.add(metrics)
            await self.fire('cryotank:puts', numrecords=len(chunk))
            size += len(chunk)
            await asyncio.sleep(0)

        if seqn is not None:
            iden, offs = seqn
            self.setOffset(iden, offs + size)

        return size
Esempio n. 5
0
    async def puts(self, items, seqn=None):
        '''
        Add the structured data from items to the CryoTank.

        Args:
            items (list):  A list of objects to store in the CryoTank.
            seqn (iden, offs): An iden / offset pair to record.

        Returns:
            int: The ending offset of the items or seqn.
        '''
        size = 0

        for chunk in s_common.chunks(items, 1000):
            metrics = self._items.save(chunk)
            self._metrics.add(metrics)
            await self.fire('cryotank:puts', numrecords=len(chunk))
            size += len(chunk)
            await asyncio.sleep(0)

        if seqn is not None:
            iden, offs = seqn
            self.setOffset(iden, offs + size)

        return size
Esempio n. 6
0
    def _revModl201711012123(self):
        now = s_common.now()
        forms = sorted(self.core.getTufoForms())
        nforms = len(forms)
        for n, form in enumerate(forms):
            adds = []
            logger.debug('Computing node:ndef rows for [{}]'.format(form))
            for i, p, v, t in self.core.store.getRowsByProp(form):
                # This is quicker than going through the norm process
                nv = s_common.guid((p, v))
                adds.append((i, 'node:ndef', nv, now))

            if adds:
                tot = len(adds)
                logger.debug('Adding {:,d} node:ndef rows for [{}]'.format(
                    tot, form))
                with self.core.getCoreXact() as xact:
                    i = 0
                    nt = 100000
                    for chunk in s_common.chunks(adds, nt):
                        self.core.store.addRows(chunk)
                        i = i + len(chunk)
                        logger.debug(
                            'Loading {:,d} [{}%] rows into transaction'.format(
                                i, int((i / tot) * 100)))
            logger.debug('Processed {:,d} [{}%] forms.'.format(
                n, int((n / nforms) * 100)))
        logger.debug('Finished adding node:ndef rows to the Cortex')
Esempio n. 7
0
    async def bulkput(self, files, proxykeeper=None):
        '''
        Save a list of files to the axon.

        Args:
            files ([bytes]): A list of files as bytes blobs.

        Returns:
            int: The number of files saved.
        '''
        if proxykeeper is None:
            proxykeeper = self._proxykeeper
        bsid, blobstor = await proxykeeper.randoproxy()
        count = 0
        async with await blobstor.startput() as uploader:
            for bytz in files:
                hashval = hashlib.sha256(bytz).digest()
                if await self.wants([hashval]) == []:
                    continue
                for chunk in s_common.chunks(bytz, CHUNK_SIZE):
                    await uploader.write(chunk)
                await uploader.finishFile()
            count, hashval = await uploader.finish()
            if count:
                await self._executor_nowait(self._addloc, bsid, hashval)

        await self._executor(self.xact.commit)

        return count
Esempio n. 8
0
def addFeedData(core,
                outp,
                feedformat,
                debug=False,
                *paths,
                chunksize=1000,
                offset=0):

    items = getItems(*paths)
    for path, item in items:
        bname = os.path.basename(path)
        tick = time.time()
        outp.printf(f'Adding items from [{path}]')
        foff = 0
        for chunk in s_common.chunks(item, chunksize):

            clen = len(chunk)
            if offset and foff + clen < offset:
                # We have not yet encountered a chunk which
                # will include the offset size.
                foff += clen
                continue

            core.addFeedData(feedformat, chunk)

            foff += clen
            outp.printf(
                f'Added [{clen}] items from [{bname}] - offset [{foff}]')

        tock = time.time()
        outp.printf(f'Done consuming from [{bname}]')
        outp.printf(f'Took [{tock - tick}] seconds.')
    if debug:
        s_cmdr.runItemCmdr(core, outp)
Esempio n. 9
0
 def _get(self, hashval, xact):
     with xact.cursor(db=self._blob_bytes) as curs:
         if not _find_hash(curs, hashval):
             return None
         for k, v in curs:
             if not k[:len(hashval)] == hashval:
                 return None
             yield from s_common.chunks(v, CHUNK_SIZE)
Esempio n. 10
0
    def _onBlobMetrics(self, chan, mesg):

        offs = mesg[1].get('offs', 0)
        with chan:
            chan.setq()
            chan.txok(True)

            metr = self.blobs.metrics(offs=offs)
            genr = s_common.chunks(metr, 1000)

            chan.txwind(genr, 100, timeout=30)
Esempio n. 11
0
    def _onAxonMetrics(self, chan, mesg):
        offs = mesg[1].get('offs', 0)

        chan.setq()
        chan.txok(True)

        with self.lenv.begin() as xact:
            metr = self.metrics.iter(xact, offs)
            genr = s_common.chunks(metr, 1000)
            chan.txwind(genr, 100, timeout=30)
            chan.txfini()
Esempio n. 12
0
    def iterrows():
        for path in opts.csvfiles:

            with open(path, 'r', encoding='utf8') as fd:

                if opts.csv_header:
                    fd.readline()

                def genr():

                    for row in csv.reader(fd):
                        yield row

                for rows in s_common.chunks(genr(), 1000):
                    yield rows
Esempio n. 13
0
    def iterrows():
        for path in opts.csvfiles:

            with open(path, 'r', encoding='utf8') as fd:

                if opts.csv_header:
                    fd.readline()

                def genr():

                    for row in csv.reader(fd):
                        yield row

                for rows in s_common.chunks(genr(), 1000):
                    yield rows
Esempio n. 14
0
    def copydb(self, sourcedbname, destslab, destdbname=None, progresscb=None):
        '''
        Copy an entire database in this slab to a new database in potentially another slab.

        Args:
            sourcedbname (str): name of the db in the source environment
            destslab (LmdbSlab): which slab to copy rows to
            destdbname (str): the name of the database to copy rows to in destslab
            progresscb (Callable[int]):  if not None, this function will be periodically called with the number of rows
                                         completed

        Returns:
            (int): the number of rows copied

        Note:
            If any rows already exist in the target database, this method returns an error.  This means that one cannot
            use destdbname=None unless there are no explicit databases in the destination slab.
        '''
        sourcedb, dupsort = self.dbnames[sourcedbname]

        destslab.initdb(destdbname, dupsort)
        destdb, _ = destslab.dbnames[destdbname]

        statdict = destslab.stat(db=destdbname)
        if statdict['entries'] > 0:
            raise s_exc.DataAlreadyExists()

        rowcount = 0

        for chunk in s_common.chunks(self.scanByFull(db=sourcedbname),
                                     COPY_CHUNKSIZE):
            ccount, acount = destslab.putmulti(chunk,
                                               dupdata=True,
                                               append=True,
                                               db=destdbname)
            if ccount != len(chunk) or acount != len(chunk):
                raise s_exc.BadCoreStore(
                    mesg='Unexpected number of values written'
                )  # pragma: no cover

            rowcount += len(chunk)
            if progresscb is not None and 0 == (rowcount % PROGRESS_PERIOD):
                progresscb(rowcount)

        return rowcount
Esempio n. 15
0
    def _onCryoMetrics(self, chan, mesg):
        name = mesg[1].get('name')
        offs = mesg[1].get('offs')
        size = mesg[1].get('size')

        with chan:

            tank = self.tanks.get(name)
            if tank is None:
                return chan.txfini((False, ('NoSuchName', {'name': name})))

            chan.setq()
            chan.tx((True, True))

            metr = tank.metrics(offs, size=size)

            genr = s_common.chunks(metr, 1000)
            chan.txwind(genr, 100, timeout=30)
Esempio n. 16
0
    async def test_axon_uploader(self):
        async with self.getTestDmon(mirror='axondmon') as dmon, \
                await self.agetTestProxy(dmon, 'axon00') as axon:
            abhash = hashlib.sha256(b'ab').digest()
            cdhash = hashlib.sha256(b'cd').digest()

            blobstorurl = f'tcp://{dmon.addr[0]}:{dmon.addr[1]}/blobstor00'
            await axon.addBlobStor(blobstorurl)

            # Test uploader interface
            async with await axon.startput() as uploader:
                await uploader.write(b'a')
                await uploader.write(b'b')
                await uploader.finishFile()
                await uploader.write(b'cd')
                count, hashval = await uploader.finish()
                self.eq(2, count)
                self.eq(cdhash, hashval)

            # Give the clone subscription a chance to catch up
            self.eq([], await axon.wants([abhash, cdhash]))
            foo = await axon.get(cdhash)
            [x async for x in foo]
            self.eq(b'cd', b''.join([x async for x in await axon.get(cdhash)]))
            self.eq(b'ab', b''.join([x async for x in await axon.get(abhash)]))

            # Test deconfliction, Upload a big boy
            async with await axon.startput() as uploader:
                await uploader.write(b'cd')
                await uploader.finishFile()
                await uploader.write(b'c')
                await uploader.write(b'd')
                await uploader.finishFile()
                for chunk in s_common.chunks(bbuf, s_axon.CHUNK_SIZE + 13):
                    await uploader.write(chunk)
                count, hashval = await uploader.finish()
                self.eq(1, count)
                self.eq(bbufhash, hashval)

            await self._wait_for_axon_files(axon, 3)
            self.eq((), await axon.wants([bbufhash]))
            self.eq(bbuf,
                    b''.join([x async for x in await axon.get(bbufhash)]))
Esempio n. 17
0
    def puts(self, name, items, timeout=None):
        '''
        Add data to the named remote CryoTank by consuming from items.

        Args:
            name (str): The name of the remote CryoTank.
            items (iter): An iterable of data items to load.
            timeout (float/int): The maximum timeout for an ack.

        Returns:
            None
        '''
        with self._cryo_sess.task(('cryo:puts', {'name': name})) as chan:

            if not chan.next(timeout=timeout):
                return False

            iitr = s_common.chunks(items, self._chunksize)
            return chan.txwind(iitr, self._chunksize, timeout=timeout)
Esempio n. 18
0
    def _onCryoSlice(self, chan, mesg):

        name = mesg[1].get('name')
        offs = mesg[1].get('offs')
        size = mesg[1].get('size')

        with chan:

            tank = self.tanks.get(name)
            if tank is None:
                return chan.tx((False, ('NoSuchName', {'name': name})))

            chan.setq()
            chan.tx((True, True))

            genr = tank.slice(offs, size)
            genr = s_common.chunks(genr, 100)

            # 100 chunks of 100 in flight...
            chan.txwind(genr, 100, timeout=30)
Esempio n. 19
0
    def _saveBlobByts(self, todo):

        rows = []
        for buid, sha256, byts in todo:
            for i, ibyts in enumerate(s_common.chunks(byts, blocksize)):
                indx = struct.pack('>Q', i)
                rows.append((buid + indx, ibyts))

        ok, retn = self.blobs.any()
        if not ok:
            return False, retn

        name, cell = retn

        mesg = ('blob:save', {'rows': rows})

        ok, retn = cell.call(mesg, timeout=30)
        if not ok:
            return False, retn

        return True, name
Esempio n. 20
0
 def _revModl201710191144(self):
     with self.core.getCoreXact():
         now = s_common.now()
         adds = []
         logger.debug('Lifting tufo:form rows')
         for i, _, v, t in self.core.store.getRowsByProp('tufo:form'):
             adds.append((i, 'node:created', t, now), )
         logger.debug('Deleting existing node:created rows')
         self.core.store.delRowsByProp('node:created')
         if adds:
             tot = len(adds)
             logger.debug('Adding {:,d} node:created rows'.format(tot))
             i = 0
             n = 100000
             for chunk in s_common.chunks(adds, n):
                 self.core.store.addRows(chunk)
                 i = i + len(chunk)
                 logger.debug(
                     'Loading {:,d} [{}%] rows into transaction'.format(
                         i, int((i / tot) * 100)))
     logger.debug('Finished adding node:created rows to the Cortex')
Esempio n. 21
0
def convertSpliceFd(fpath):
    '''
    Converts an "old" splice log to the new format.

    Args:
        fpath (str): The path to the "old" splice log file.

    Example:
        convertSpliceFd('/stuff/oldsplicelog.mpk')

    Notes:
        This function reads the an "old" splice log file, writes to a temporary
        file, and then overwrites the old file with the new data. This function
        only converts old splices to new splices. If any messages are invalid,
        an exception will be raised and the conversion will exit early and not
        overwrite any data.

    Returns:
        None
    '''
    with tempfile.SpooledTemporaryFile() as tmp:
        with open(fpath, 'r+b') as fd:

            for chnk in s_common.chunks(s_msgpack.iterfd(fd), 1000):
                for mesg in chnk:
                    newspl = convertOldSplice(mesg)
                    if newspl:
                        mesg = newspl[1]['mesg']
                    tmp.write(s_msgpack.en(mesg))

            tmp.seek(0)
            fd.seek(0)

            data = tmp.read(_readsz)
            while data:
                fd.write(data)
                data = tmp.read(_readsz)

            fd.truncate()
Esempio n. 22
0
    def eatbytes(self, byts):
        '''
        Consume a buffer of bytes into the axon as a blob.

        Example:

            tufo = axon.eatbytes(byts)

        '''
        hset = HashSet()

        hset.update(byts)
        iden, props = hset.guid()
        blob = self.byiden(iden)
        if blob is not None:
            return blob

        sess = self.alloc(props.get('size'))

        for chnk in s_common.chunks(byts, 10000000):
            blob = self.chunk(sess, chnk)

        return blob
Esempio n. 23
0
    def copydb(self, sourcedb, destslab, destdbname=None, progresscb=None):
        '''
        Copy an entire database in this slab to a new database in potentially another slab.

        Args:
            sourcedb (LmdbDatabase): which database in this slab to copy rows from
            destslab (LmdbSlab): which slab to copy rows to
            destdbname (str): the name of the database to copy rows to in destslab
            progresscb (Callable[int]):  if not None, this function will be periodically called with the number of rows
                                         completed

        Returns:
            (int): the number of rows copied

        Note:
            If any rows already exist in the target database, this method returns an error.  This means that one cannot
            use destdbname=None unless there are no explicit databases in the destination slab.
        '''
        destdb = destslab.initdb(destdbname, sourcedb.dupsort)

        statdict = destslab.stat(db=destdb)
        if statdict['entries'] > 0:
            raise s_exc.DataAlreadyExists()

        rowcount = 0

        for chunk in s_common.chunks(self.scanByFull(db=sourcedb), COPY_CHUNKSIZE):
            ccount, acount = destslab.putmulti(chunk, dupdata=True, append=True, db=destdb)
            if ccount != len(chunk) or acount != len(chunk):
                raise s_exc.BadCoreStore(mesg='Unexpected number of values written')  # pragma: no cover

            rowcount += len(chunk)
            if progresscb is not None and 0 == (rowcount % PROGRESS_PERIOD):
                progresscb(rowcount)

        return rowcount
Esempio n. 24
0
    async def runAxonTestBase(self, axon):

        tick = s_common.now()

        # asdfhash test

        self.false(await axon.has(asdfhash))

        with self.raises(s_exc.NoSuchFile):
            async for _ in axon.get(asdfhash):
                pass

        with self.raises(s_exc.NoSuchFile):
            await axon.hashset(asdfhash)

        self.len(0, [item async for item in axon.hashes(0)])

        async with await axon.upload() as fd:
            await fd.write(abuf)
            self.eq(asdfretn, await fd.save())

        # do it again to test the short circuit
        async with await axon.upload() as fd:
            await fd.write(abuf)
            self.eq(asdfretn, await fd.save())

        bytz = []
        async for byts in axon.get(asdfhash):
            bytz.append(byts)

        self.eq(b'asdfasdf', b''.join(bytz))

        self.true(await axon.has(asdfhash))
        self.eq(8, await axon.size(asdfhash))

        # bbufhash test

        self.false(await axon.has(bbufhash))

        self.eq((bbufhash,), await axon.wants((bbufhash, asdfhash)))

        async with await axon.upload() as fd:
            await fd.write(bbuf)
            self.eq(bbufretn, await fd.save())

        self.true(await axon.has(asdfhash))
        self.true(await axon.has(bbufhash))
        await self.check_blob(axon, bbufhash)

        self.eq((), await axon.wants((bbufhash, asdfhash)))

        # put() / puts() tests
        # These don't add new data; but exercise apis to load data
        retn = await axon.put(abuf)
        self.eq(retn, asdfretn)

        retn = await axon.puts([abuf, bbuf])
        self.eq(retn, (asdfretn, bbufretn))

        # History and metrics

        items = [x async for x in axon.hashes(0)]
        self.eq(((0, (asdfhash, 8)), (1, (bbufhash, 33554437))), items)

        items = [x[1] async for x in axon.history(tick)]
        self.eq(((asdfhash, 8), (bbufhash, 33554437)), items)

        items = [x[1] async for x in axon.history(0, tock=1)]
        self.eq((), items)

        info = await axon.metrics()
        self.eq(33554445, info.get('size:bytes'))
        self.eq(2, info.get('file:count'))

        # Empty file test

        async with await axon.upload() as fd:
            await fd.write(b'')
            self.eq(emptyretn, await fd.save())

        info = await axon.metrics()
        self.eq(33554445, info.get('size:bytes'))
        self.eq(3, info.get('file:count'))

        bytz = []
        async for byts in axon.get(emptyhash):
            bytz.append(byts)

        self.eq(b'', b''.join(bytz))

        # Healthcheck test
        snfo = await axon.getHealthCheck()
        self.eq(snfo.get('status'), 'nominal')
        axfo = [comp for comp in snfo.get('components') if comp.get('name') == 'axon'][0]
        self.eq(axfo.get('data'), await axon.metrics())

        # Upload context reuse
        with mock.patch('synapse.axon.MAX_SPOOL_SIZE', s_axon.CHUNK_SIZE * 2):

            very_bigbuf = (s_axon.MAX_SPOOL_SIZE + 2) * b'V'
            vbighash = hashlib.sha256(very_bigbuf).digest()
            vbigretn = (len(very_bigbuf), vbighash)

            async with await axon.upload() as fd:
                # We can reuse the FD _after_ we have called save() on it.
                await fd.write(abuf)
                retn = await fd.save()
                self.eq(retn, asdfretn)

                # Reuse after uploading an existing file
                # Now write a new file
                await fd.write(pbuf)
                retn = await fd.save()
                self.eq(retn, pennretn)
                await self.check_blob(axon, pennhash)

                # Reuse test with large file causing a rollover
                for chunk in s_common.chunks(very_bigbuf, s_axon.CHUNK_SIZE):
                    await fd.write(chunk)
                retn = await fd.save()
                self.eq(retn, vbigretn)
                await self.check_blob(axon, vbighash)

                # Reuse test with small file post rollover
                await fd.write(rbuf)
                retn = await fd.save()
                self.eq(retn, rgryretn)
                await self.check_blob(axon, rgryhash)

        info = await axon.metrics()
        self.eq(67108899, info.get('size:bytes'))
        self.eq(6, info.get('file:count'))

        byts = b''.join([s_msgpack.en('foo'), s_msgpack.en('bar'), s_msgpack.en('baz')])
        size, sha256b = await axon.put(byts)
        sha256 = s_common.ehex(sha256b)
        self.eq(('foo', 'bar', 'baz'), [item async for item in axon.iterMpkFile(sha256)])

        # When testing a local axon, we want to ensure that the FD was in fact fini'd
        if isinstance(fd, s_axon.UpLoad):
            self.true(fd.fd.closed)

        self.true(await axon.del_(bbufhash))
        self.eq((False,), await axon.dels((bbufhash,)))

        info = await axon.metrics()
        self.eq(33554474, info.get('size:bytes'))
        self.eq(6, info.get('file:count'))

        self.notin(bbufretn[::-1], [item[1] async for item in axon.hashes(0)])

        self.false(await axon.del_(bbufhash))

        # deleted file re-added gets returned twice by hashes
        retn = await axon.put(bbuf)
        self.eq(retn, bbufretn)
        self.len(2, [item[1] async for item in axon.hashes(0) if item[1][0] == bbufhash])
        self.len(1, [item[1] async for item in axon.hashes(2) if item[1][0] == bbufhash])

        # readlines / jsonlines
        (lsize, l256) = await axon.put(linesbuf)
        (jsize, j256) = await axon.put(jsonsbuf)
        (bsize, b256) = await axon.put(b'\n'.join((jsonsbuf, linesbuf)))

        lines = [item async for item in axon.readlines(s_common.ehex(l256))]
        self.eq(('asdf', '', 'qwer'), lines)
        jsons = [item async for item in axon.jsonlines(s_common.ehex(j256))]
        self.eq(({'foo': 'bar'}, {'baz': 'faz'}), jsons)
        jsons = []
        with self.raises(s_exc.BadJsonText):
            async for item in axon.jsonlines(s_common.ehex(b256)):
                jsons.append(item)
        self.eq(({'foo': 'bar'}, {'baz': 'faz'}), jsons)
Esempio n. 25
0
    def test_axon_cell(self):

        # implement as many tests as possible in this one
        # since it *has* to use a neuron to work correctly

        # put all the things that need fini() into a BusRef...
        with self.getTestDir() as dirn:

            with s_eventbus.BusRef() as bref:

                # neur00 ############################################
                # Set port to zero to allow a port to be automatically assigned during testing
                conf = {'host': 'localhost', 'bind': '127.0.0.1', 'port': 0}
                path = s_common.gendir(dirn, 'neuron')
                logger.debug('Bringing Neuron online')
                neur = s_neuron.Neuron(path, conf)
                bref.put('neur00', neur)

                root = neur.getCellAuth()
                addr = neur.getCellAddr()
                nport = addr[1]  # Save the port for later use

                # blob00 ############################################
                path = s_common.gendir(dirn, 'blob00')
                authblob00 = neur.genCellAuth('blob00')
                s_msgpack.dumpfile(authblob00, os.path.join(path, 'cell.auth'))
                logger.debug('Bringing blob00 online')
                conf = {'host': 'localhost', 'bind': '127.0.0.1'}
                blob00 = s_axon.BlobCell(path, conf)
                bref.put('blob00', blob00)
                self.true(blob00.cellpool.neurwait(timeout=3))

                user = s_cell.CellUser(root)
                blob00sess = user.open(blob00.getCellAddr(), timeout=3)
                bref.put('blob00sess', blob00sess)

                mesg = ('blob:stat', {})
                ok, retn = blob00sess.call(mesg, timeout=3)
                self.true(ok)
                self.eq(retn, {})  # Nothing there yet

                # blob01 ############################################
                path = s_common.gendir(dirn, 'blob01')
                authblob01 = neur.genCellAuth('blob01')
                s_msgpack.dumpfile(authblob01, os.path.join(path, 'cell.auth'))

                blob01conf = dict(conf)
                blob01conf['blob:cloneof'] = 'blob00@localhost'
                logger.debug('Bringing blob01 online')
                blob01 = s_axon.BlobCell(path, blob01conf)
                bref.put('blob01', blob01)
                self.true(blob01.cellpool.neurwait(timeout=3))
                blob01sess = user.open(blob01.getCellAddr(), timeout=3)
                bref.put('blob01sess', blob01sess)
                blob01wait = blob01.waiter(1, 'blob:clone:rows')

                # axon00 ############################################
                path = s_common.gendir(dirn, 'axon00')
                authaxon00 = neur.genCellAuth('axon00')
                s_msgpack.dumpfile(authaxon00, os.path.join(path, 'cell.auth'))
                axonconf = {
                    'host': 'localhost',
                    'bind': '127.0.0.1',
                    'axon:blobs': ('blob00@localhost', ),
                }
                logger.debug('Bringing axon00 online')
                axon00 = s_axon.AxonCell(path, axonconf)
                bref.put('axon00', axon00)
                self.true(axon00.cellpool.neurwait(timeout=3))
                #####################################################

                sess = user.open(axon00.getCellAddr(), timeout=3)
                bref.put('sess', sess)

                # wait for the axon to have blob00
                ready = False

                for i in range(30):

                    if axon00.blobs.items():
                        ready = True
                        break

                    time.sleep(0.1)

                self.true(ready)

                axon = s_axon.AxonClient(sess)
                blob = s_axon.BlobClient(blob00sess)
                blob01c = s_axon.BlobClient(blob01sess)

                self.eq((), tuple(axon.metrics()))
                self.eq((), tuple(blob.metrics()))

                self.len(1, axon.wants([asdfhash]))

                # Asking for bytes prior to the bytes being present raises
                self.genraises(RetnErr, axon.bytes, asdfhash, timeout=3)

                self.eq(1, axon.save([b'asdfasdf'], timeout=3))

                self.eq((), tuple(axon.metrics(offs=999999999)))
                self.eq((), tuple(blob.metrics(offs=99999999, timeout=3)))

                metrics = list(blob.metrics(timeout=3))
                self.len(1, metrics)
                self.eq(8, metrics[0][1].get('size'))
                self.eq(1, metrics[0][1].get('blocks'))

                self.len(0, axon.wants([asdfhash], timeout=3))

                self.eq(b'asdfasdf', b''.join(axon.bytes(asdfhash, timeout=3)))

                stat = axon.stat(timeout=3)
                self.eq(1, stat.get('files'))
                self.eq(8, stat.get('bytes'))

                # lets see if the bytes made it to the blob clone...
                self.nn(blob01wait.wait(timeout=10))

                newp = os.urandom(32)

                def loop():
                    s_common.spin(axon.bytes(newp))

                self.raises(s_exc.RetnErr, loop)

                blob01wait = blob01.waiter(1, 'blob:clone:rows')
                self.eq(qwerhash, axon.upload([b'qwer', b'qwer'], timeout=3))

                self.len(0, axon.wants([qwerhash]))
                self.eq(b'qwerqwer', b''.join(axon.bytes(qwerhash, timeout=3)))
                self.nn(blob01wait.wait(3))

                retn = list(axon.metrics(0, timeout=3))
                self.eq(retn[0][1].get('size'), 8)
                self.eq(retn[0][1].get('cell'), 'blob00@localhost')

                # Try uploading a large file
                logger.debug('Large file test')
                # Monkeypatch axon to a smaller blocksize
                s_axon.blocksize = s_const.kibibyte
                self.raises(RetnErr, axon.locs, bbufhash, timeout=3)
                genr = s_common.chunks(bbuf, s_axon.blocksize)
                blob01wait = blob01.waiter(1, 'blob:clone:rows')
                self.eq(bbufhash, axon.upload(genr, timeout=3))
                self.eq((), axon.wants([bbufhash], timeout=3))

                # Then retrieve it
                size = 0
                gots = []
                testhash = hashlib.sha256()
                for byts in axon.bytes(bbufhash, timeout=3):
                    size += len(byts)
                    gots.append(byts)
                    testhash.update(byts)
                self.eq(bbufhash, testhash.digest())

                try:
                    self.eq(size, len(bbuf))
                    self.eq(bbufhash, testhash.digest())

                except Exception as e:

                    for byts in gots:
                        print(repr(byts))

                    print('SIZE: %d/%d' % (size, len(bbuf)))
                    raise

                self.nn(blob01wait.wait(3))
                locs = axon.locs(bbufhash, timeout=3)
                self.len(1, locs)
                self.isin('blob00', locs[0][0])
                # Use the buid to retrieve the large file from blob01
                buid = locs[0][1]
                testhash = hashlib.sha256()
                for byts in blob01c.bytes(buid, timeout=3):
                    testhash.update(byts)
                self.eq(bbufhash, testhash.digest())

                # Try storing a empty file
                logger.debug('Nullfile test')
                axon.save([b''])
                self.eq((), tuple(axon.wants([nullhash])))
                # Then retrieve it
                parts = []
                for part in axon.bytes(nullhash):
                    parts.append(part)
                self.eq([b''], parts)

                logger.debug('Shutdown / restart blob01 test')
                bref.pop('blob01')
                blob01.fini()
                self.true(blob01.isfini)
                axon.save([b'hehehaha'], timeout=3)
                self.eq((), axon.wants([hehahash], timeout=3))
                # Now bring blob01 back online
                logger.debug('Bringing blob01 back online')
                blob01 = s_axon.BlobCell(path, blob01conf)
                bref.put('blob01', blob01)
                self.true(blob01.cellpool.neurwait(timeout=3))
                blob01wait = blob01.waiter(1, 'blob:clone:rows')
                # Cloning should start up shortly
                self.nn(blob01wait.wait(10))

            # Let everything get shut down by the busref fini
            logger.debug('Bringing everything back up')
            with s_eventbus.BusRef() as bref:
                # neur00 ############################################
                conf = {
                    'host': 'localhost',
                    'bind': '127.0.0.1',
                    'port': nport
                }
                path = s_common.gendir(dirn, 'neuron')
                logger.debug('Bringing Neuron Back online')
                neur = s_neuron.Neuron(path, conf)
                bref.put('neur00', neur)
                root = neur.getCellAuth()
                # blob00 ############################################
                path = s_common.gendir(dirn, 'blob00')
                logger.debug('Bringing blob00 back online')
                conf = {'host': 'localhost', 'bind': '127.0.0.1'}
                blob00 = s_axon.BlobCell(path, conf)
                bref.put('blob00', blob00)
                self.true(blob00.cellpool.neurwait(timeout=3))
                user = s_cell.CellUser(root)
                blob00sess = user.open(blob00.getCellAddr(), timeout=3)
                bref.put('blob00sess', blob00sess)
                # blob01 ############################################
                path = s_common.gendir(dirn, 'blob01')
                blob01conf = dict(conf)
                blob01conf['blob:cloneof'] = 'blob00@localhost'
                logger.debug('Bringing blob01 back online')
                blob01 = s_axon.BlobCell(path, blob01conf)
                bref.put('blob01', blob01)
                self.true(blob01.cellpool.neurwait(timeout=3))
                blob01wait = blob01.waiter(1, 'blob:clone:rows')
                # axon00 ############################################
                path = s_common.gendir(dirn, 'axon00')
                authaxon00 = neur.genCellAuth('axon00')
                s_msgpack.dumpfile(authaxon00, os.path.join(path, 'cell.auth'))
                axonconf = {
                    'host': 'localhost',
                    'bind': '127.0.0.1',
                    'axon:blobs': ('blob00@localhost', ),
                }
                logger.debug('Bringing axon00 online')
                axon00 = s_axon.AxonCell(path, axonconf)
                bref.put('axon00', axon00)
                self.true(axon00.cellpool.neurwait(timeout=3))
                #####################################################
                sess = user.open(axon00.getCellAddr(), timeout=3)
                bref.put('sess', sess)
                # wait for the axon to have blob00
                ready = False
                for i in range(30):
                    if axon00.blobs.items():
                        ready = True
                        break
                    time.sleep(0.1)
                self.true(ready)
                axon = s_axon.AxonClient(sess)

                # Try retrieving a large file
                testhash = hashlib.sha256()
                for byts in axon.bytes(bbufhash, timeout=3):
                    testhash.update(byts)
                self.eq(bbufhash, testhash.digest())

                # Try saving a new file and a existing file to the cluster and ensure it is replicated
                self.eq((ohmyhash, ),
                        axon.wants((ohmyhash, hehahash, nullhash), 3))
                self.eq(1, axon.save([b'ohmyohmyy', b'']))
                self.nn(blob01wait.wait(10))
Esempio n. 26
0
    async def runAxonTestBase(self, axon):

        tick = s_common.now()

        logger.info('asdfhash test')

        self.false(await axon.has(asdfhash))

        with self.raises(s_exc.NoSuchFile):
            async for _ in axon.get(asdfhash):
                pass

        async with await axon.upload() as fd:
            await fd.write(abuf)
            self.eq(asdfretn, await fd.save())

        # do it again to test the short circuit
        async with await axon.upload() as fd:
            await fd.write(abuf)
            self.eq(asdfretn, await fd.save())

        bytz = []
        async for byts in axon.get(asdfhash):
            bytz.append(byts)

        self.eq(b'asdfasdf', b''.join(bytz))

        self.true(await axon.has(asdfhash))
        self.eq(8, await axon.size(asdfhash))

        logger.info('bbufhash test')

        self.false(await axon.has(bbufhash))

        self.eq((bbufhash,), await axon.wants((bbufhash, asdfhash)))

        async with await axon.upload() as fd:
            await fd.write(bbuf)
            self.eq(bbufretn, await fd.save())

        self.true(await axon.has(asdfhash))
        self.true(await axon.has(bbufhash))
        await self.check_blob(axon, bbufhash)

        self.eq((), await axon.wants((bbufhash, asdfhash)))

        logger.info('put() / puts() tests')
        # These don't add new data; but exercise apis to load data
        retn = await axon.put(abuf)
        self.eq(retn, asdfretn)

        retn = await axon.puts([abuf, bbuf])
        self.eq(retn, (asdfretn, bbufretn))

        logger.info('History and metrics')

        items = [x async for x in axon.hashes(0)]
        self.eq(((0, (asdfhash, 8)), (1, (bbufhash, 33554437))), items)

        items = [x[1] async for x in axon.history(tick)]
        self.eq(((asdfhash, 8), (bbufhash, 33554437)), items)

        items = [x[1] async for x in axon.history(0, tock=1)]
        self.eq((), items)

        info = await axon.metrics()
        self.eq(33554445, info.get('size:bytes'))
        self.eq(2, info.get('file:count'))

        logger.info('Empty file test')

        async with await axon.upload() as fd:
            await fd.write(b'')
            self.eq(emptyretn, await fd.save())

        info = await axon.metrics()
        self.eq(33554445, info.get('size:bytes'))
        self.eq(3, info.get('file:count'))

        bytz = []
        async for byts in axon.get(emptyhash):
            bytz.append(byts)

        self.eq(b'', b''.join(bytz))

        logger.info('Healthcheck test')
        snfo = await axon.getHealthCheck()
        self.eq(snfo.get('status'), 'nominal')
        axfo = [comp for comp in snfo.get('components') if comp.get('name') == 'axon'][0]
        self.eq(axfo.get('data'), await axon.metrics())

        logger.info('Upload context reuse')
        with mock.patch('synapse.axon.MAX_SPOOL_SIZE', s_axon.CHUNK_SIZE * 2):

            very_bigbuf = (s_axon.MAX_SPOOL_SIZE + 2) * b'V'
            vbighash = hashlib.sha256(very_bigbuf).digest()
            vbigretn = (len(very_bigbuf), vbighash)

            async with await axon.upload() as fd:
                # We can reuse the FD _after_ we have called save() on it.
                await fd.write(abuf)
                retn = await fd.save()
                self.eq(retn, asdfretn)

                logger.info('Reuse after uploading an existing file')
                # Now write a new file
                await fd.write(pbuf)
                retn = await fd.save()
                self.eq(retn, pennretn)
                await self.check_blob(axon, pennhash)

                logger.info('Reuse test with large file causing a rollover')
                for chunk in s_common.chunks(very_bigbuf, s_axon.CHUNK_SIZE):
                    await fd.write(chunk)
                retn = await fd.save()
                self.eq(retn, vbigretn)
                await self.check_blob(axon, vbighash)

                logger.info('Reuse test with small file post rollover')
                await fd.write(rbuf)
                retn = await fd.save()
                self.eq(retn, rgryretn)
                await self.check_blob(axon, rgryhash)

        info = await axon.metrics()
        self.eq(67108899, info.get('size:bytes'))
        self.eq(6, info.get('file:count'))

        byts = b''.join([s_msgpack.en('foo'), s_msgpack.en('bar'), s_msgpack.en('baz')])
        size, sha256b = await axon.put(byts)
        sha256 = s_common.ehex(sha256b)
        self.eq(('foo', 'bar', 'baz'), [item async for item in axon.iterMpkFile(sha256)])

        # When testing a local axon, we want to ensure that the FD was in fact fini'd
        if isinstance(fd, s_axon.UpLoad):
            self.true(fd.fd.closed)