def test_common_chunks(self): s = '123456789' parts = [chunk for chunk in s_common.chunks(s, 2)] self.eq(parts, ['12', '34', '56', '78', '9']) parts = [chunk for chunk in s_common.chunks(s, 100000)] self.eq(parts, [s]) parts = [chunk for chunk in s_common.chunks(b'', 10000)] self.eq(parts, [b'']) parts = [chunk for chunk in s_common.chunks([], 10000)] self.eq(parts, [[]]) parts = [chunk for chunk in s_common.chunks('', 10000)] self.eq(parts, ['']) parts = [chunk for chunk in s_common.chunks([1, 2, 3, 4, 5], 2)] self.eq(parts, [[1, 2], [3, 4], [5]]) # set is unslicable with self.assertRaises(TypeError) as cm: parts = [chunk for chunk in s_common.chunks({1, 2, 3}, 10000)] # dict is unslicable with self.assertRaises(TypeError) as cm: parts = [chunk for chunk in s_common.chunks({1: 2}, 10000)] # empty dict is caught during the [0:0] slice with self.assertRaises(TypeError) as cm: parts = [chunk for chunk in s_common.chunks({}, 10000)]
async def addFeedData(core, outp, feedformat, debug=False, *paths, chunksize=1000, offset=0): items = getItems(*paths) for path, item in items: bname = os.path.basename(path) tick = time.time() outp.printf(f'Adding items from [{path}]') foff = 0 for chunk in s_common.chunks(item, chunksize): clen = len(chunk) if offset and foff + clen < offset: # We have not yet encountered a chunk which # will include the offset size. foff += clen continue await core.addFeedData(feedformat, chunk) foff += clen outp.printf(f'Added [{clen}] items from [{bname}] - offset [{foff}]') tock = time.time() outp.printf(f'Done consuming from [{bname}]') outp.printf(f'Took [{tock - tick}] seconds.') if debug: await s_cmdr.runItemCmdr(core, outp)
async def puts(self, items, seqn=None): ''' Add the structured data from items to the CryoTank. Args: items (list): A list of objects to store in the CryoTank. seqn (iden, offs): An iden / offset pair to record. Returns: int: The ending offset of the items or seqn. ''' size = 0 for chunk in s_common.chunks(items, 1000): metrics = self._items.save(chunk) self._metrics.add(metrics) await self.fire('cryotank:puts', numrecords=len(chunk)) size += len(chunk) await asyncio.sleep(0) if seqn is not None: iden, offs = seqn self.setOffset(iden, offs + size) return size
def _revModl201711012123(self): now = s_common.now() forms = sorted(self.core.getTufoForms()) nforms = len(forms) for n, form in enumerate(forms): adds = [] logger.debug('Computing node:ndef rows for [{}]'.format(form)) for i, p, v, t in self.core.store.getRowsByProp(form): # This is quicker than going through the norm process nv = s_common.guid((p, v)) adds.append((i, 'node:ndef', nv, now)) if adds: tot = len(adds) logger.debug('Adding {:,d} node:ndef rows for [{}]'.format( tot, form)) with self.core.getCoreXact() as xact: i = 0 nt = 100000 for chunk in s_common.chunks(adds, nt): self.core.store.addRows(chunk) i = i + len(chunk) logger.debug( 'Loading {:,d} [{}%] rows into transaction'.format( i, int((i / tot) * 100))) logger.debug('Processed {:,d} [{}%] forms.'.format( n, int((n / nforms) * 100))) logger.debug('Finished adding node:ndef rows to the Cortex')
async def bulkput(self, files, proxykeeper=None): ''' Save a list of files to the axon. Args: files ([bytes]): A list of files as bytes blobs. Returns: int: The number of files saved. ''' if proxykeeper is None: proxykeeper = self._proxykeeper bsid, blobstor = await proxykeeper.randoproxy() count = 0 async with await blobstor.startput() as uploader: for bytz in files: hashval = hashlib.sha256(bytz).digest() if await self.wants([hashval]) == []: continue for chunk in s_common.chunks(bytz, CHUNK_SIZE): await uploader.write(chunk) await uploader.finishFile() count, hashval = await uploader.finish() if count: await self._executor_nowait(self._addloc, bsid, hashval) await self._executor(self.xact.commit) return count
def addFeedData(core, outp, feedformat, debug=False, *paths, chunksize=1000, offset=0): items = getItems(*paths) for path, item in items: bname = os.path.basename(path) tick = time.time() outp.printf(f'Adding items from [{path}]') foff = 0 for chunk in s_common.chunks(item, chunksize): clen = len(chunk) if offset and foff + clen < offset: # We have not yet encountered a chunk which # will include the offset size. foff += clen continue core.addFeedData(feedformat, chunk) foff += clen outp.printf( f'Added [{clen}] items from [{bname}] - offset [{foff}]') tock = time.time() outp.printf(f'Done consuming from [{bname}]') outp.printf(f'Took [{tock - tick}] seconds.') if debug: s_cmdr.runItemCmdr(core, outp)
def _get(self, hashval, xact): with xact.cursor(db=self._blob_bytes) as curs: if not _find_hash(curs, hashval): return None for k, v in curs: if not k[:len(hashval)] == hashval: return None yield from s_common.chunks(v, CHUNK_SIZE)
def _onBlobMetrics(self, chan, mesg): offs = mesg[1].get('offs', 0) with chan: chan.setq() chan.txok(True) metr = self.blobs.metrics(offs=offs) genr = s_common.chunks(metr, 1000) chan.txwind(genr, 100, timeout=30)
def _onAxonMetrics(self, chan, mesg): offs = mesg[1].get('offs', 0) chan.setq() chan.txok(True) with self.lenv.begin() as xact: metr = self.metrics.iter(xact, offs) genr = s_common.chunks(metr, 1000) chan.txwind(genr, 100, timeout=30) chan.txfini()
def iterrows(): for path in opts.csvfiles: with open(path, 'r', encoding='utf8') as fd: if opts.csv_header: fd.readline() def genr(): for row in csv.reader(fd): yield row for rows in s_common.chunks(genr(), 1000): yield rows
def copydb(self, sourcedbname, destslab, destdbname=None, progresscb=None): ''' Copy an entire database in this slab to a new database in potentially another slab. Args: sourcedbname (str): name of the db in the source environment destslab (LmdbSlab): which slab to copy rows to destdbname (str): the name of the database to copy rows to in destslab progresscb (Callable[int]): if not None, this function will be periodically called with the number of rows completed Returns: (int): the number of rows copied Note: If any rows already exist in the target database, this method returns an error. This means that one cannot use destdbname=None unless there are no explicit databases in the destination slab. ''' sourcedb, dupsort = self.dbnames[sourcedbname] destslab.initdb(destdbname, dupsort) destdb, _ = destslab.dbnames[destdbname] statdict = destslab.stat(db=destdbname) if statdict['entries'] > 0: raise s_exc.DataAlreadyExists() rowcount = 0 for chunk in s_common.chunks(self.scanByFull(db=sourcedbname), COPY_CHUNKSIZE): ccount, acount = destslab.putmulti(chunk, dupdata=True, append=True, db=destdbname) if ccount != len(chunk) or acount != len(chunk): raise s_exc.BadCoreStore( mesg='Unexpected number of values written' ) # pragma: no cover rowcount += len(chunk) if progresscb is not None and 0 == (rowcount % PROGRESS_PERIOD): progresscb(rowcount) return rowcount
def _onCryoMetrics(self, chan, mesg): name = mesg[1].get('name') offs = mesg[1].get('offs') size = mesg[1].get('size') with chan: tank = self.tanks.get(name) if tank is None: return chan.txfini((False, ('NoSuchName', {'name': name}))) chan.setq() chan.tx((True, True)) metr = tank.metrics(offs, size=size) genr = s_common.chunks(metr, 1000) chan.txwind(genr, 100, timeout=30)
async def test_axon_uploader(self): async with self.getTestDmon(mirror='axondmon') as dmon, \ await self.agetTestProxy(dmon, 'axon00') as axon: abhash = hashlib.sha256(b'ab').digest() cdhash = hashlib.sha256(b'cd').digest() blobstorurl = f'tcp://{dmon.addr[0]}:{dmon.addr[1]}/blobstor00' await axon.addBlobStor(blobstorurl) # Test uploader interface async with await axon.startput() as uploader: await uploader.write(b'a') await uploader.write(b'b') await uploader.finishFile() await uploader.write(b'cd') count, hashval = await uploader.finish() self.eq(2, count) self.eq(cdhash, hashval) # Give the clone subscription a chance to catch up self.eq([], await axon.wants([abhash, cdhash])) foo = await axon.get(cdhash) [x async for x in foo] self.eq(b'cd', b''.join([x async for x in await axon.get(cdhash)])) self.eq(b'ab', b''.join([x async for x in await axon.get(abhash)])) # Test deconfliction, Upload a big boy async with await axon.startput() as uploader: await uploader.write(b'cd') await uploader.finishFile() await uploader.write(b'c') await uploader.write(b'd') await uploader.finishFile() for chunk in s_common.chunks(bbuf, s_axon.CHUNK_SIZE + 13): await uploader.write(chunk) count, hashval = await uploader.finish() self.eq(1, count) self.eq(bbufhash, hashval) await self._wait_for_axon_files(axon, 3) self.eq((), await axon.wants([bbufhash])) self.eq(bbuf, b''.join([x async for x in await axon.get(bbufhash)]))
def puts(self, name, items, timeout=None): ''' Add data to the named remote CryoTank by consuming from items. Args: name (str): The name of the remote CryoTank. items (iter): An iterable of data items to load. timeout (float/int): The maximum timeout for an ack. Returns: None ''' with self._cryo_sess.task(('cryo:puts', {'name': name})) as chan: if not chan.next(timeout=timeout): return False iitr = s_common.chunks(items, self._chunksize) return chan.txwind(iitr, self._chunksize, timeout=timeout)
def _onCryoSlice(self, chan, mesg): name = mesg[1].get('name') offs = mesg[1].get('offs') size = mesg[1].get('size') with chan: tank = self.tanks.get(name) if tank is None: return chan.tx((False, ('NoSuchName', {'name': name}))) chan.setq() chan.tx((True, True)) genr = tank.slice(offs, size) genr = s_common.chunks(genr, 100) # 100 chunks of 100 in flight... chan.txwind(genr, 100, timeout=30)
def _saveBlobByts(self, todo): rows = [] for buid, sha256, byts in todo: for i, ibyts in enumerate(s_common.chunks(byts, blocksize)): indx = struct.pack('>Q', i) rows.append((buid + indx, ibyts)) ok, retn = self.blobs.any() if not ok: return False, retn name, cell = retn mesg = ('blob:save', {'rows': rows}) ok, retn = cell.call(mesg, timeout=30) if not ok: return False, retn return True, name
def _revModl201710191144(self): with self.core.getCoreXact(): now = s_common.now() adds = [] logger.debug('Lifting tufo:form rows') for i, _, v, t in self.core.store.getRowsByProp('tufo:form'): adds.append((i, 'node:created', t, now), ) logger.debug('Deleting existing node:created rows') self.core.store.delRowsByProp('node:created') if adds: tot = len(adds) logger.debug('Adding {:,d} node:created rows'.format(tot)) i = 0 n = 100000 for chunk in s_common.chunks(adds, n): self.core.store.addRows(chunk) i = i + len(chunk) logger.debug( 'Loading {:,d} [{}%] rows into transaction'.format( i, int((i / tot) * 100))) logger.debug('Finished adding node:created rows to the Cortex')
def convertSpliceFd(fpath): ''' Converts an "old" splice log to the new format. Args: fpath (str): The path to the "old" splice log file. Example: convertSpliceFd('/stuff/oldsplicelog.mpk') Notes: This function reads the an "old" splice log file, writes to a temporary file, and then overwrites the old file with the new data. This function only converts old splices to new splices. If any messages are invalid, an exception will be raised and the conversion will exit early and not overwrite any data. Returns: None ''' with tempfile.SpooledTemporaryFile() as tmp: with open(fpath, 'r+b') as fd: for chnk in s_common.chunks(s_msgpack.iterfd(fd), 1000): for mesg in chnk: newspl = convertOldSplice(mesg) if newspl: mesg = newspl[1]['mesg'] tmp.write(s_msgpack.en(mesg)) tmp.seek(0) fd.seek(0) data = tmp.read(_readsz) while data: fd.write(data) data = tmp.read(_readsz) fd.truncate()
def eatbytes(self, byts): ''' Consume a buffer of bytes into the axon as a blob. Example: tufo = axon.eatbytes(byts) ''' hset = HashSet() hset.update(byts) iden, props = hset.guid() blob = self.byiden(iden) if blob is not None: return blob sess = self.alloc(props.get('size')) for chnk in s_common.chunks(byts, 10000000): blob = self.chunk(sess, chnk) return blob
def copydb(self, sourcedb, destslab, destdbname=None, progresscb=None): ''' Copy an entire database in this slab to a new database in potentially another slab. Args: sourcedb (LmdbDatabase): which database in this slab to copy rows from destslab (LmdbSlab): which slab to copy rows to destdbname (str): the name of the database to copy rows to in destslab progresscb (Callable[int]): if not None, this function will be periodically called with the number of rows completed Returns: (int): the number of rows copied Note: If any rows already exist in the target database, this method returns an error. This means that one cannot use destdbname=None unless there are no explicit databases in the destination slab. ''' destdb = destslab.initdb(destdbname, sourcedb.dupsort) statdict = destslab.stat(db=destdb) if statdict['entries'] > 0: raise s_exc.DataAlreadyExists() rowcount = 0 for chunk in s_common.chunks(self.scanByFull(db=sourcedb), COPY_CHUNKSIZE): ccount, acount = destslab.putmulti(chunk, dupdata=True, append=True, db=destdb) if ccount != len(chunk) or acount != len(chunk): raise s_exc.BadCoreStore(mesg='Unexpected number of values written') # pragma: no cover rowcount += len(chunk) if progresscb is not None and 0 == (rowcount % PROGRESS_PERIOD): progresscb(rowcount) return rowcount
async def runAxonTestBase(self, axon): tick = s_common.now() # asdfhash test self.false(await axon.has(asdfhash)) with self.raises(s_exc.NoSuchFile): async for _ in axon.get(asdfhash): pass with self.raises(s_exc.NoSuchFile): await axon.hashset(asdfhash) self.len(0, [item async for item in axon.hashes(0)]) async with await axon.upload() as fd: await fd.write(abuf) self.eq(asdfretn, await fd.save()) # do it again to test the short circuit async with await axon.upload() as fd: await fd.write(abuf) self.eq(asdfretn, await fd.save()) bytz = [] async for byts in axon.get(asdfhash): bytz.append(byts) self.eq(b'asdfasdf', b''.join(bytz)) self.true(await axon.has(asdfhash)) self.eq(8, await axon.size(asdfhash)) # bbufhash test self.false(await axon.has(bbufhash)) self.eq((bbufhash,), await axon.wants((bbufhash, asdfhash))) async with await axon.upload() as fd: await fd.write(bbuf) self.eq(bbufretn, await fd.save()) self.true(await axon.has(asdfhash)) self.true(await axon.has(bbufhash)) await self.check_blob(axon, bbufhash) self.eq((), await axon.wants((bbufhash, asdfhash))) # put() / puts() tests # These don't add new data; but exercise apis to load data retn = await axon.put(abuf) self.eq(retn, asdfretn) retn = await axon.puts([abuf, bbuf]) self.eq(retn, (asdfretn, bbufretn)) # History and metrics items = [x async for x in axon.hashes(0)] self.eq(((0, (asdfhash, 8)), (1, (bbufhash, 33554437))), items) items = [x[1] async for x in axon.history(tick)] self.eq(((asdfhash, 8), (bbufhash, 33554437)), items) items = [x[1] async for x in axon.history(0, tock=1)] self.eq((), items) info = await axon.metrics() self.eq(33554445, info.get('size:bytes')) self.eq(2, info.get('file:count')) # Empty file test async with await axon.upload() as fd: await fd.write(b'') self.eq(emptyretn, await fd.save()) info = await axon.metrics() self.eq(33554445, info.get('size:bytes')) self.eq(3, info.get('file:count')) bytz = [] async for byts in axon.get(emptyhash): bytz.append(byts) self.eq(b'', b''.join(bytz)) # Healthcheck test snfo = await axon.getHealthCheck() self.eq(snfo.get('status'), 'nominal') axfo = [comp for comp in snfo.get('components') if comp.get('name') == 'axon'][0] self.eq(axfo.get('data'), await axon.metrics()) # Upload context reuse with mock.patch('synapse.axon.MAX_SPOOL_SIZE', s_axon.CHUNK_SIZE * 2): very_bigbuf = (s_axon.MAX_SPOOL_SIZE + 2) * b'V' vbighash = hashlib.sha256(very_bigbuf).digest() vbigretn = (len(very_bigbuf), vbighash) async with await axon.upload() as fd: # We can reuse the FD _after_ we have called save() on it. await fd.write(abuf) retn = await fd.save() self.eq(retn, asdfretn) # Reuse after uploading an existing file # Now write a new file await fd.write(pbuf) retn = await fd.save() self.eq(retn, pennretn) await self.check_blob(axon, pennhash) # Reuse test with large file causing a rollover for chunk in s_common.chunks(very_bigbuf, s_axon.CHUNK_SIZE): await fd.write(chunk) retn = await fd.save() self.eq(retn, vbigretn) await self.check_blob(axon, vbighash) # Reuse test with small file post rollover await fd.write(rbuf) retn = await fd.save() self.eq(retn, rgryretn) await self.check_blob(axon, rgryhash) info = await axon.metrics() self.eq(67108899, info.get('size:bytes')) self.eq(6, info.get('file:count')) byts = b''.join([s_msgpack.en('foo'), s_msgpack.en('bar'), s_msgpack.en('baz')]) size, sha256b = await axon.put(byts) sha256 = s_common.ehex(sha256b) self.eq(('foo', 'bar', 'baz'), [item async for item in axon.iterMpkFile(sha256)]) # When testing a local axon, we want to ensure that the FD was in fact fini'd if isinstance(fd, s_axon.UpLoad): self.true(fd.fd.closed) self.true(await axon.del_(bbufhash)) self.eq((False,), await axon.dels((bbufhash,))) info = await axon.metrics() self.eq(33554474, info.get('size:bytes')) self.eq(6, info.get('file:count')) self.notin(bbufretn[::-1], [item[1] async for item in axon.hashes(0)]) self.false(await axon.del_(bbufhash)) # deleted file re-added gets returned twice by hashes retn = await axon.put(bbuf) self.eq(retn, bbufretn) self.len(2, [item[1] async for item in axon.hashes(0) if item[1][0] == bbufhash]) self.len(1, [item[1] async for item in axon.hashes(2) if item[1][0] == bbufhash]) # readlines / jsonlines (lsize, l256) = await axon.put(linesbuf) (jsize, j256) = await axon.put(jsonsbuf) (bsize, b256) = await axon.put(b'\n'.join((jsonsbuf, linesbuf))) lines = [item async for item in axon.readlines(s_common.ehex(l256))] self.eq(('asdf', '', 'qwer'), lines) jsons = [item async for item in axon.jsonlines(s_common.ehex(j256))] self.eq(({'foo': 'bar'}, {'baz': 'faz'}), jsons) jsons = [] with self.raises(s_exc.BadJsonText): async for item in axon.jsonlines(s_common.ehex(b256)): jsons.append(item) self.eq(({'foo': 'bar'}, {'baz': 'faz'}), jsons)
def test_axon_cell(self): # implement as many tests as possible in this one # since it *has* to use a neuron to work correctly # put all the things that need fini() into a BusRef... with self.getTestDir() as dirn: with s_eventbus.BusRef() as bref: # neur00 ############################################ # Set port to zero to allow a port to be automatically assigned during testing conf = {'host': 'localhost', 'bind': '127.0.0.1', 'port': 0} path = s_common.gendir(dirn, 'neuron') logger.debug('Bringing Neuron online') neur = s_neuron.Neuron(path, conf) bref.put('neur00', neur) root = neur.getCellAuth() addr = neur.getCellAddr() nport = addr[1] # Save the port for later use # blob00 ############################################ path = s_common.gendir(dirn, 'blob00') authblob00 = neur.genCellAuth('blob00') s_msgpack.dumpfile(authblob00, os.path.join(path, 'cell.auth')) logger.debug('Bringing blob00 online') conf = {'host': 'localhost', 'bind': '127.0.0.1'} blob00 = s_axon.BlobCell(path, conf) bref.put('blob00', blob00) self.true(blob00.cellpool.neurwait(timeout=3)) user = s_cell.CellUser(root) blob00sess = user.open(blob00.getCellAddr(), timeout=3) bref.put('blob00sess', blob00sess) mesg = ('blob:stat', {}) ok, retn = blob00sess.call(mesg, timeout=3) self.true(ok) self.eq(retn, {}) # Nothing there yet # blob01 ############################################ path = s_common.gendir(dirn, 'blob01') authblob01 = neur.genCellAuth('blob01') s_msgpack.dumpfile(authblob01, os.path.join(path, 'cell.auth')) blob01conf = dict(conf) blob01conf['blob:cloneof'] = 'blob00@localhost' logger.debug('Bringing blob01 online') blob01 = s_axon.BlobCell(path, blob01conf) bref.put('blob01', blob01) self.true(blob01.cellpool.neurwait(timeout=3)) blob01sess = user.open(blob01.getCellAddr(), timeout=3) bref.put('blob01sess', blob01sess) blob01wait = blob01.waiter(1, 'blob:clone:rows') # axon00 ############################################ path = s_common.gendir(dirn, 'axon00') authaxon00 = neur.genCellAuth('axon00') s_msgpack.dumpfile(authaxon00, os.path.join(path, 'cell.auth')) axonconf = { 'host': 'localhost', 'bind': '127.0.0.1', 'axon:blobs': ('blob00@localhost', ), } logger.debug('Bringing axon00 online') axon00 = s_axon.AxonCell(path, axonconf) bref.put('axon00', axon00) self.true(axon00.cellpool.neurwait(timeout=3)) ##################################################### sess = user.open(axon00.getCellAddr(), timeout=3) bref.put('sess', sess) # wait for the axon to have blob00 ready = False for i in range(30): if axon00.blobs.items(): ready = True break time.sleep(0.1) self.true(ready) axon = s_axon.AxonClient(sess) blob = s_axon.BlobClient(blob00sess) blob01c = s_axon.BlobClient(blob01sess) self.eq((), tuple(axon.metrics())) self.eq((), tuple(blob.metrics())) self.len(1, axon.wants([asdfhash])) # Asking for bytes prior to the bytes being present raises self.genraises(RetnErr, axon.bytes, asdfhash, timeout=3) self.eq(1, axon.save([b'asdfasdf'], timeout=3)) self.eq((), tuple(axon.metrics(offs=999999999))) self.eq((), tuple(blob.metrics(offs=99999999, timeout=3))) metrics = list(blob.metrics(timeout=3)) self.len(1, metrics) self.eq(8, metrics[0][1].get('size')) self.eq(1, metrics[0][1].get('blocks')) self.len(0, axon.wants([asdfhash], timeout=3)) self.eq(b'asdfasdf', b''.join(axon.bytes(asdfhash, timeout=3))) stat = axon.stat(timeout=3) self.eq(1, stat.get('files')) self.eq(8, stat.get('bytes')) # lets see if the bytes made it to the blob clone... self.nn(blob01wait.wait(timeout=10)) newp = os.urandom(32) def loop(): s_common.spin(axon.bytes(newp)) self.raises(s_exc.RetnErr, loop) blob01wait = blob01.waiter(1, 'blob:clone:rows') self.eq(qwerhash, axon.upload([b'qwer', b'qwer'], timeout=3)) self.len(0, axon.wants([qwerhash])) self.eq(b'qwerqwer', b''.join(axon.bytes(qwerhash, timeout=3))) self.nn(blob01wait.wait(3)) retn = list(axon.metrics(0, timeout=3)) self.eq(retn[0][1].get('size'), 8) self.eq(retn[0][1].get('cell'), 'blob00@localhost') # Try uploading a large file logger.debug('Large file test') # Monkeypatch axon to a smaller blocksize s_axon.blocksize = s_const.kibibyte self.raises(RetnErr, axon.locs, bbufhash, timeout=3) genr = s_common.chunks(bbuf, s_axon.blocksize) blob01wait = blob01.waiter(1, 'blob:clone:rows') self.eq(bbufhash, axon.upload(genr, timeout=3)) self.eq((), axon.wants([bbufhash], timeout=3)) # Then retrieve it size = 0 gots = [] testhash = hashlib.sha256() for byts in axon.bytes(bbufhash, timeout=3): size += len(byts) gots.append(byts) testhash.update(byts) self.eq(bbufhash, testhash.digest()) try: self.eq(size, len(bbuf)) self.eq(bbufhash, testhash.digest()) except Exception as e: for byts in gots: print(repr(byts)) print('SIZE: %d/%d' % (size, len(bbuf))) raise self.nn(blob01wait.wait(3)) locs = axon.locs(bbufhash, timeout=3) self.len(1, locs) self.isin('blob00', locs[0][0]) # Use the buid to retrieve the large file from blob01 buid = locs[0][1] testhash = hashlib.sha256() for byts in blob01c.bytes(buid, timeout=3): testhash.update(byts) self.eq(bbufhash, testhash.digest()) # Try storing a empty file logger.debug('Nullfile test') axon.save([b'']) self.eq((), tuple(axon.wants([nullhash]))) # Then retrieve it parts = [] for part in axon.bytes(nullhash): parts.append(part) self.eq([b''], parts) logger.debug('Shutdown / restart blob01 test') bref.pop('blob01') blob01.fini() self.true(blob01.isfini) axon.save([b'hehehaha'], timeout=3) self.eq((), axon.wants([hehahash], timeout=3)) # Now bring blob01 back online logger.debug('Bringing blob01 back online') blob01 = s_axon.BlobCell(path, blob01conf) bref.put('blob01', blob01) self.true(blob01.cellpool.neurwait(timeout=3)) blob01wait = blob01.waiter(1, 'blob:clone:rows') # Cloning should start up shortly self.nn(blob01wait.wait(10)) # Let everything get shut down by the busref fini logger.debug('Bringing everything back up') with s_eventbus.BusRef() as bref: # neur00 ############################################ conf = { 'host': 'localhost', 'bind': '127.0.0.1', 'port': nport } path = s_common.gendir(dirn, 'neuron') logger.debug('Bringing Neuron Back online') neur = s_neuron.Neuron(path, conf) bref.put('neur00', neur) root = neur.getCellAuth() # blob00 ############################################ path = s_common.gendir(dirn, 'blob00') logger.debug('Bringing blob00 back online') conf = {'host': 'localhost', 'bind': '127.0.0.1'} blob00 = s_axon.BlobCell(path, conf) bref.put('blob00', blob00) self.true(blob00.cellpool.neurwait(timeout=3)) user = s_cell.CellUser(root) blob00sess = user.open(blob00.getCellAddr(), timeout=3) bref.put('blob00sess', blob00sess) # blob01 ############################################ path = s_common.gendir(dirn, 'blob01') blob01conf = dict(conf) blob01conf['blob:cloneof'] = 'blob00@localhost' logger.debug('Bringing blob01 back online') blob01 = s_axon.BlobCell(path, blob01conf) bref.put('blob01', blob01) self.true(blob01.cellpool.neurwait(timeout=3)) blob01wait = blob01.waiter(1, 'blob:clone:rows') # axon00 ############################################ path = s_common.gendir(dirn, 'axon00') authaxon00 = neur.genCellAuth('axon00') s_msgpack.dumpfile(authaxon00, os.path.join(path, 'cell.auth')) axonconf = { 'host': 'localhost', 'bind': '127.0.0.1', 'axon:blobs': ('blob00@localhost', ), } logger.debug('Bringing axon00 online') axon00 = s_axon.AxonCell(path, axonconf) bref.put('axon00', axon00) self.true(axon00.cellpool.neurwait(timeout=3)) ##################################################### sess = user.open(axon00.getCellAddr(), timeout=3) bref.put('sess', sess) # wait for the axon to have blob00 ready = False for i in range(30): if axon00.blobs.items(): ready = True break time.sleep(0.1) self.true(ready) axon = s_axon.AxonClient(sess) # Try retrieving a large file testhash = hashlib.sha256() for byts in axon.bytes(bbufhash, timeout=3): testhash.update(byts) self.eq(bbufhash, testhash.digest()) # Try saving a new file and a existing file to the cluster and ensure it is replicated self.eq((ohmyhash, ), axon.wants((ohmyhash, hehahash, nullhash), 3)) self.eq(1, axon.save([b'ohmyohmyy', b''])) self.nn(blob01wait.wait(10))
async def runAxonTestBase(self, axon): tick = s_common.now() logger.info('asdfhash test') self.false(await axon.has(asdfhash)) with self.raises(s_exc.NoSuchFile): async for _ in axon.get(asdfhash): pass async with await axon.upload() as fd: await fd.write(abuf) self.eq(asdfretn, await fd.save()) # do it again to test the short circuit async with await axon.upload() as fd: await fd.write(abuf) self.eq(asdfretn, await fd.save()) bytz = [] async for byts in axon.get(asdfhash): bytz.append(byts) self.eq(b'asdfasdf', b''.join(bytz)) self.true(await axon.has(asdfhash)) self.eq(8, await axon.size(asdfhash)) logger.info('bbufhash test') self.false(await axon.has(bbufhash)) self.eq((bbufhash,), await axon.wants((bbufhash, asdfhash))) async with await axon.upload() as fd: await fd.write(bbuf) self.eq(bbufretn, await fd.save()) self.true(await axon.has(asdfhash)) self.true(await axon.has(bbufhash)) await self.check_blob(axon, bbufhash) self.eq((), await axon.wants((bbufhash, asdfhash))) logger.info('put() / puts() tests') # These don't add new data; but exercise apis to load data retn = await axon.put(abuf) self.eq(retn, asdfretn) retn = await axon.puts([abuf, bbuf]) self.eq(retn, (asdfretn, bbufretn)) logger.info('History and metrics') items = [x async for x in axon.hashes(0)] self.eq(((0, (asdfhash, 8)), (1, (bbufhash, 33554437))), items) items = [x[1] async for x in axon.history(tick)] self.eq(((asdfhash, 8), (bbufhash, 33554437)), items) items = [x[1] async for x in axon.history(0, tock=1)] self.eq((), items) info = await axon.metrics() self.eq(33554445, info.get('size:bytes')) self.eq(2, info.get('file:count')) logger.info('Empty file test') async with await axon.upload() as fd: await fd.write(b'') self.eq(emptyretn, await fd.save()) info = await axon.metrics() self.eq(33554445, info.get('size:bytes')) self.eq(3, info.get('file:count')) bytz = [] async for byts in axon.get(emptyhash): bytz.append(byts) self.eq(b'', b''.join(bytz)) logger.info('Healthcheck test') snfo = await axon.getHealthCheck() self.eq(snfo.get('status'), 'nominal') axfo = [comp for comp in snfo.get('components') if comp.get('name') == 'axon'][0] self.eq(axfo.get('data'), await axon.metrics()) logger.info('Upload context reuse') with mock.patch('synapse.axon.MAX_SPOOL_SIZE', s_axon.CHUNK_SIZE * 2): very_bigbuf = (s_axon.MAX_SPOOL_SIZE + 2) * b'V' vbighash = hashlib.sha256(very_bigbuf).digest() vbigretn = (len(very_bigbuf), vbighash) async with await axon.upload() as fd: # We can reuse the FD _after_ we have called save() on it. await fd.write(abuf) retn = await fd.save() self.eq(retn, asdfretn) logger.info('Reuse after uploading an existing file') # Now write a new file await fd.write(pbuf) retn = await fd.save() self.eq(retn, pennretn) await self.check_blob(axon, pennhash) logger.info('Reuse test with large file causing a rollover') for chunk in s_common.chunks(very_bigbuf, s_axon.CHUNK_SIZE): await fd.write(chunk) retn = await fd.save() self.eq(retn, vbigretn) await self.check_blob(axon, vbighash) logger.info('Reuse test with small file post rollover') await fd.write(rbuf) retn = await fd.save() self.eq(retn, rgryretn) await self.check_blob(axon, rgryhash) info = await axon.metrics() self.eq(67108899, info.get('size:bytes')) self.eq(6, info.get('file:count')) byts = b''.join([s_msgpack.en('foo'), s_msgpack.en('bar'), s_msgpack.en('baz')]) size, sha256b = await axon.put(byts) sha256 = s_common.ehex(sha256b) self.eq(('foo', 'bar', 'baz'), [item async for item in axon.iterMpkFile(sha256)]) # When testing a local axon, we want to ensure that the FD was in fact fini'd if isinstance(fd, s_axon.UpLoad): self.true(fd.fd.closed)