async def _storBuidSet(self, oper): _, (form, oldb, newb) = oper fenc = self.encoder[form] pvoldval = s_msgpack.en((oldb, )) pvnewval = s_msgpack.en((newb, )) for lkey, lval in self.layrslab.scanByPref(oldb, db=self.bybuid): penc = lkey[32:] valu, indx = s_msgpack.un(lval) if penc[0] in (46, 35): # ".univ" or "#tag" byunivkey = penc + indx self.layrslab.put(byunivkey, pvnewval, db=self.byuniv) self.layrslab.delete(byunivkey, pvoldval, db=self.byuniv) bypropkey = fenc + penc + indx self.layrslab.put(bypropkey, pvnewval, db=self.byprop) self.layrslab.delete(bypropkey, pvoldval, db=self.byprop) self.layrslab.put(newb + penc, lval, db=self.bybuid) self.layrslab.delete(lkey, db=self.bybuid)
def _storPropSetCommon(self, buid, penc, bpkey, pvpref, univ, valu, indx): bpval = s_msgpack.en((valu, indx)) pvvalu = s_msgpack.en((buid, )) byts = self.layrslab.replace(bpkey, bpval, db=self.bybuid) if byts is not None: oldv, oldi = s_msgpack.un(byts) if oldi is not None: self.layrslab.delete(pvpref + oldi, pvvalu, db=self.byprop) if univ: self.layrslab.delete(penc + oldi, pvvalu, db=self.byuniv) if indx is not None: self.layrslab.put(pvpref + indx, pvvalu, dupdata=True, db=self.byprop) if univ: self.layrslab.put(penc + indx, pvvalu, dupdata=True, db=self.byuniv)
def dump_rows(outp, fd, store, compress=False, genrows_kwargs=None): outp.printf('Starting row dump') if not genrows_kwargs: genrows_kwargs = {} i = 0 j = 0 cur_bytes = 0 bufs = [] kwargs = preset_args.get(store.getStoreType(), {}) kwargs.update(genrows_kwargs) tick = time.time() for rows in store.genStoreRows(**kwargs): j += len(rows) i += len(rows) tufo = s_tufo.tufo('core:save:add:rows', rows=rows) if compress: tufo[1]['rows'] = gzip.compress(s_msgpack.en(rows), 9) byts = s_msgpack.en(tufo) bufs.append(byts) cur_bytes += len(byts) if cur_bytes > s_const.mebibyte * DUMP_MEGS: fd.write(b''.join([byts for byts in bufs])) outp.printf('Stored {} rows, total {} rows'.format(j, i)) bufs = [] cur_bytes = 0 j = 0 # There still may be rows we need too write out. if bufs: fd.write(b''.join([byts for byts in bufs])) outp.printf('Stored {} rows, total {} rows'.format(j, i)) bufs = [] tock = time.time() outp.printf('Done dumping rows - took {} seconds.'.format(tock - tick)) outp.printf('Dumped {} rows'.format(i))
def test_lib_crypto_tnfl_break(self): ekey = s_tinfoil.newkey() tinh = s_tinfoil.TinFoilHat(ekey) goodbyts = tinh.enc(b'foobar', b'hehe') edict = s_msgpack.un(goodbyts) # Empty values will fail to decrypt for key in ('iv', 'data', 'asscd'): bdict = {k: v for k, v in edict.items() if k != key} byts = s_msgpack.en(bdict) self.none(tinh.dec(byts)) # Tampered values will fail bdict = {k: v for k, v in edict.items()} bdict['iv'] = os.urandom(16) byts = s_msgpack.en(bdict) self.none(tinh.dec(byts)) bdict = {k: v for k, v in edict.items()} bdict['data'] = os.urandom(16) byts = s_msgpack.en(bdict) self.none(tinh.dec(byts)) bdict = {k: v for k, v in edict.items()} bdict['asscd'] = os.urandom(16) byts = s_msgpack.en(bdict) self.none(tinh.dec(byts))
def add(self, item, indx=None): ''' Add a single item to the sequence. ''' if indx is not None: if indx >= self.indx: self.slab.put(s_common.int64en(indx), s_msgpack.en(item), append=True, db=self.db) self.indx = indx + 1 self.size += 1 self._wake_waiters() return indx oldv = self.slab.replace(s_common.int64en(indx), s_msgpack.en(item), db=self.db) if oldv is None: self.size += 1 return indx indx = self.indx retn = self.slab.put(s_common.int64en(indx), s_msgpack.en(item), append=True, db=self.db) assert retn, "Not adding the largest index" self.indx += 1 self.size += 1 self._wake_waiters() return indx
def persist(self, progressonly=False, txn=None): ''' Persists the index info to the database Args: progressonly (bool): if True, only persists the progress (i.e. more dynamic) information txn (Optional[lmdb.Transaction]): if not None, will use that transaction to record data. txn is not committed. Returns: None ''' d = { 'delete': self.deleting, 'present': {k: metaentry.en() for k, metaentry in self.indices.items()} } with contextlib.ExitStack() as stack: if txn is None: txn = stack.enter_context( self._dbenv.begin(db=self._metatbl, buffers=True, write=True)) if not progressonly: txn.put(b'indices', s_msgpack.en(d), db=self._metatbl) txn.put(b'progress', s_msgpack.en(self.progresses), db=self._metatbl)
def test_msgpack_bad_types(self): self.raises(s_exc.NotMsgpackSafe, s_msgpack.en, {1, 2}) self.raises(s_exc.NotMsgpackSafe, s_msgpack.en, Exception()) self.raises(s_exc.NotMsgpackSafe, s_msgpack.en, s_msgpack.en) # too long with self.raises(s_exc.NotMsgpackSafe) as cm: s_msgpack.en({'longlong': 45234928034723904723906}) self.isin('OverflowError', cm.exception.get('mesg'))
def _addRows(self, rows): ''' Adds a bunch of rows to the database Take care: this was written this way for performance, in particular when len(rows) is large. ''' encs = [] with self._getTxn(write=True) as txn: next_pk = self.next_pk # First, we encode all the i, p, v, t for all rows for i, p, v, t in rows: if next_pk > MAX_PK: raise s_common.HitCoreLimit( name='MAX_PK', size=MAX_PK, mesg='Out of primary key values') if len(p) > MAX_PROP_LEN: raise s_common.HitCoreLimit( name='MAX_PROP_LEN', size=MAX_PROP_LEN, mesg='Property length too large') i_enc = _encIden(i) p_enc = _encProp(p) v_key_enc = _encValKey(v) t_enc = s_msgpack.en(t) pk_enc = _encPk(next_pk) row_enc = s_msgpack.en((i, p, v, t)) # idx 0 1 2 3 4 5 encs.append((i_enc, p_enc, row_enc, t_enc, v_key_enc, pk_enc)) next_pk += 1 # An iterator of what goes into the main table: key=pk_enc, val=encoded(i, p, v, t) kvs = ((x[5], x[2]) for x in encs) # Shove it all in at once consumed, added = txn.cursor(self.rows).putmulti(kvs, overwrite=False, append=True) if consumed != added or consumed != len(encs): # Will only fail if record already exists, which should never happen raise s_common.BadCoreStore(store='lmdb', mesg='unexpected pk in DB') # Update the indices for all rows kvs = ((x[0] + x[1], x[5]) for x in encs) txn.cursor(self.index_ip).putmulti(kvs, dupdata=True) kvs = ((x[1] + x[4] + x[3], x[5]) for x in encs) txn.cursor(self.index_pvt).putmulti(kvs, dupdata=True) kvs = ((x[1] + x[3], x[5]) for x in encs) txn.cursor(self.index_pt).putmulti(kvs, dupdata=True) # self.next_pk should be protected from multiple writers. Luckily lmdb write lock does # that for us. self.next_pk = next_pk
def dump_blobs(outp, fd, store): i = 0 outp.printf('Dumping blobstore') for key in store.getBlobKeys(): valu = store.getBlobValu(key) tufo = s_tufo.tufo('syn:core:blob:set', key=key, valu=s_msgpack.en(valu)) byts = s_msgpack.en(tufo) fd.write(byts) i += 1 outp.printf('Done dumping {} keys from blobstore.'.format(i))
def __init__(self, model: s_datamodel.Model, dbenv: lmdb.Environment) -> None: ''' Creates metadata for all the indices. Args: dbenv (lmdb.Environment): the lmdb instance in which to store the metadata. Returns: None ''' self._dbenv = dbenv self.model = model # The table in the database file (N.B. in LMDB speak, this is called a database) self._metatbl = dbenv.open_db(b'meta') is_new_db = False with dbenv.begin(db=self._metatbl, buffers=True) as txn: indices_enc = txn.get(b'indices') progress_enc = txn.get(b'progress') if indices_enc is None or progress_enc is None: if indices_enc is None and progress_enc is None: is_new_db = True indices_enc = s_msgpack.en({'present': {}, 'deleting': []}) progress_enc = s_msgpack.en({}) else: raise s_exc.CorruptDatabase( 'missing meta information in index meta' ) # pragma: no cover indices = s_msgpack.un(indices_enc) # The details about what the indices are actually indexing: the datapath and type. self.indices = { k: _MetaEntry(model, **s_msgpack.un(v)) for k, v in indices.get('present', {}).items() } self.deleting = list(indices.get('deleting', ())) # Keeps track (non-persistently) of which indices have been paused self.asleep = defaultdict(bool) # type: ignore # How far each index has progressed as well as statistics self.progresses = s_msgpack.un(progress_enc) if not all(p in self.indices for p in self.deleting): raise s_exc.CorruptDatabase( 'index meta table: deleting entry with unrecognized property name' ) # pragma: no cover if not all(p in self.indices for p in self.progresses): raise s_exc.CorruptDatabase( 'index meta table: progress entry with unrecognized property name' ) # pragma: no cover if is_new_db: self.persist()
def puts(self, items, seqn=None): ''' Add the structured data from items to the CryoTank. Args: items (list): A list of objects to store in the CryoTank. seqn (iden, offs): An iden / offset pair to record. Returns: int: The ending offset of the items or seqn. ''' itembyts = [s_msgpack.en(i) for i in items] tick = s_common.now() bytesize = sum([len(b) for b in itembyts]) with self.lenv.begin(db=self.lenv_items, write=True) as xact: todo = [] for byts in itembyts: todo.append((struct.pack('>Q', self.items_indx), byts)) self.items_indx += 1 retn = self.items_indx with xact.cursor() as curs: curs.putmulti(todo, append=True) took = s_common.now() - tick with xact.cursor(db=self.lenv_metrics) as curs: lkey = struct.pack('>Q', self.metrics_indx) self.metrics_indx += 1 info = { 'time': tick, 'count': len(items), 'size': bytesize, 'took': took } curs.put(lkey, s_msgpack.en(info), append=True) if seqn is not None: iden, offset = seqn nextoff = offset + len(items) self.offs.xset(xact, iden, nextoff) retn = nextoff self.schedCoroSafe(self.fire('cryotank:puts', numrecords=len(itembyts))) return retn
def test_fmt_mpk(self): with self.getTestDir() as dirn: fp = s_common.genpath(dirn, 'woot.mpk') with s_common.genfile(fp) as fd: fd.write(s_msgpack.en('foo.com')) fd.write(s_msgpack.en('bar.com')) with s_common.genfile(fp) as fd: lines = list( s_encoding.iterdata(fd, close_fd=False, format='mpk')) self.len(2, lines) e = ['foo.com', 'bar.com'] self.eq(lines, e)
def test_msgpack_large_data(self): big_string = s_const.mebibyte * 129 * 'V' struct = ('test', {'key': big_string}) buf = s_msgpack.en(struct) unpacked_struct = s_msgpack.un(buf) self.eq(struct, unpacked_struct) # Ensure our use of msgpack.Unpacker can also handle this data with self.getTestDir() as dirn: with s_common.genfile(dirn, 'test.mpk') as fd: fd.write(buf) with s_common.genfile(dirn, 'test.mpk') as fd: genr = s_msgpack.iterfd(fd) objs = list(genr) self.len(1, objs) self.eq(objs[0], struct) # Ensure that our streaming Unpk object can also handle this data unpk = s_msgpack.Unpk() objs = unpk.feed(buf) self.len(1, objs) self.eq(objs[0], (135266320, struct))
def test_msgpack_surrogates(self): bads = '\u01cb\ufffd\ud842\ufffd\u0012' obyts = s_msgpack.en(bads) self.isinstance(obyts, bytes) outs = s_msgpack.un(obyts) self.eq(outs, bads) with self.getTestDir() as fdir: fd = s_common.genfile(fdir, 'test.mpk') fd.write(obyts) fd.close() fd = s_common.genfile(fdir, 'test.mpk') gen = s_msgpack.iterfd(fd) items = [obj for obj in gen] self.len(1, items) self.eq(outs, bads) fd.close() unpk = s_msgpack.Unpk() ret = unpk.feed(obyts) self.len(1, ret) self.eq([(13, bads)], ret)
async def test_cryo_cell_indexing(self): # conf = {'defvals': {'mapsize': s_t_utils.TEST_MAP_SIZE}} async with self.getTestDmon(mirror='cryodmon') as dmon, \ await self.agetTestProxy(dmon, 'cryo00') as ccell, \ await self.agetTestProxy(dmon, 'cryo00/woot:woot') as tank: # Setting the _chunksize to 1 forces iteration on the client # side of puts, as well as the server-side. tank._chunksize = 1 await tank.puts(cryodata) # Test index operations self.eq((), await tank.getIndices()) await self.asyncraises(s_exc.BadOperArg, tank.addIndex('prop1', 'str', [])) await tank.addIndex('prop1', 'str', ['0']) await tank.delIndex('prop1') await self.asyncraises(s_exc.NoSuchIndx, tank.delIndex('noexist')) await tank.addIndex('prop1', 'str', ['0']) await tank.pauseIndex('prop1') await tank.pauseIndex() await tank.resumeIndex() self.eq([(1, 'baz'), (0, 'foo')], await alist(await tank.queryNormValu('prop1'))) self.eq([(1, 'baz')], await alist(await tank.queryNormValu('prop1', valu='b'))) self.eq([], await alist(await tank.queryNormValu('prop1', valu='bz'))) self.eq([(1, {'prop1': 'baz'})], (await alist(await tank.queryNormRecords('prop1', valu='b')))) self.eq([(1, s_msgpack.en(('baz', {'faz': 20})))], await alist(await tank.queryRows('prop1', valu='b'))) await ccell.init('woot:boring', {'noindex': True}) async with await self.agetTestProxy(dmon, 'cryo00/woot:boring') as tank2: self.eq([], await tank2.getIndices())
def main(argv, outp=None): if outp is None: # pragma: no cover outp = s_output.OutPut() pars = getArgParser() opts = pars.parse_args(argv) for path in opts.paths: if not path.endswith('.json'): outp.printf('skip: %s (not .json extension)' % (path,)) continue if not os.path.isfile(path): outp.printf('skip: %s (not a file)' % (path,)) continue base = path[:-5] newp = base + '.mpk' outp.printf('converting: %s -> .mpk' % (path,)) with open(path, 'r', encoding='utf8') as fd: with open(newp, 'wb') as pk: for line in fd: item = json.loads(line) pk.write(s_msgpack.en(item)) if opts.rm: os.unlink(path)
def hashitem(item): ''' Generate a uniq hash for the JSON compatible primitive data structure. ''' norm = normitem(item) byts = s_msgpack.en(norm) return hashlib.md5(byts).hexdigest()
def main(argv, outp=None): if outp is None: # pragma: no cover outp = s_output.OutPut() parser = makeargpaser() opts = parser.parse_args(argv) if not opts.verbose: logging.disable(logging.DEBUG) if os.path.isfile(opts.output) and not opts.force: outp.printf('Cannot overwrite a backup.') return 1 genrows_kwargs = {} if opts.extra_args: with open(opts.extra_args, 'rb') as fd: genrows_kwargs = json.loads(fd.read().decode()) storconf = {'rev:storage': False} if opts.revstorage: storconf['rev:storage'] = True backup_tufo = gen_backup_tufo(opts) with open(opts.output, 'wb') as fd: fd.write(s_msgpack.en(backup_tufo)) with s_cortex.openstore(opts.store, storconf=storconf) as store: dump_store(outp, fd, store, compress=opts.compress, dump_blobstore=opts.dump_blobstore, genrows_kwargs=genrows_kwargs) outp.printf('Fin') return 0
def _storPropDel(self, oper): _, (buid, form, prop, info) = oper fenc = form.encode() + b'\x00' penc = prop.encode() + b'\x00' if prop: bpkey = buid + prop.encode() else: bpkey = buid + b'*' + form.encode() univ = info.get('univ') byts = self.layrslab.pop(bpkey, db=self.bybuid) if byts is None: return del self.buidcache[buid] oldv, oldi = s_msgpack.un(byts) pvvalu = s_msgpack.en((buid,)) if oldi is not None: self.layrslab.delete(fenc + penc + oldi, pvvalu, db=self.byprop) if univ: self.layrslab.delete(penc + oldi, pvvalu, db=self.byuniv)
def _calcFirstLastKeys(prop, valu, mintime, maxtime): ''' Returns the encoded bytes for the start and end keys to the pt or pvt index. Helper function for _{get,del}RowsByProp ''' p_enc = _encProp(prop) v_key_enc = b'' if valu is None else _encValKey(valu) v_is_hashed = valu is not None and (v_key_enc[0] == HASH_VAL_MARKER_ENC) if mintime is None and maxtime is None: return (p_enc + v_key_enc, None, v_is_hashed, True) mintime_enc = b'' if mintime is None else s_msgpack.en(mintime) maxtime_enc = MAX_TIME_ENC if maxtime is None else s_msgpack.en(maxtime) first_key = p_enc + v_key_enc + mintime_enc last_key = p_enc + v_key_enc + maxtime_enc return (first_key, last_key, v_is_hashed, False)
def _tx_real(self, mesg): if self.txtinh is None: raise s_exc.NotReady() data = self.txtinh.enc(s_msgpack.en(mesg)) self.link.tx(('xmit', {'data': data}))
async def puts(self, name, items, reqid=None): if self.queues.get(name) is None: mesg = f'No queue named {name}.' raise s_exc.NoSuchName(mesg=mesg, name=name) abrv = self.abrv.nameToAbrv(name) offs = retn = self.offsets.get(name, 0) if reqid is not None: if reqid == self.lastreqid.get(name): return retn self.lastreqid.set(name, reqid) for item in items: putv = self.slab.put(abrv + s_common.int64en(offs), s_msgpack.en(item), db=self.qdata) assert putv, 'Put failed' self.sizes.inc(name, 1) offs = self.offsets.inc(name, 1) # wake the sleepers evnt = self.waiters.get(name) if evnt is not None: evnt.set() return retn
async def test_modification_persistence(self): with self.getTestDir() as fdir: async with self.getTestCore(dirn=fdir) as core: rootiden = core.auth.getUserByName('root').iden core.triggers.add('root', 'node:add', '[inet:user=1] | testcmd', info={'form': 'inet:ipv4'}) triggers = core.triggers.list() self.eq(triggers[0][1].get('storm'), '[inet:user=1] | testcmd') iden = triggers[0][0] core.triggers.mod(iden, '[inet:user=2 .test:univ=4] | testcmd') triggers = core.triggers.list() self.eq(triggers[0][1].get('storm'), '[inet:user=2 .test:univ=4] | testcmd') # Sad case self.raises(s_exc.BadSyntax, core.triggers.mod, iden, ' | | badstorm ') self.raises(s_exc.NoSuchIden, core.triggers.mod, 'deadb33f', 'inet:user') # Manually store a v0 trigger ruledict = {'ver': 0, 'cond': 'node:add', 'form': 'inet:ipv4', 'user': '******', 'storm': 'testcmd'} iden = b'\xff' * 16 core.slab.put(iden, s_msgpack.en(ruledict), db=core.triggers.trigdb) async with self.getTestCore(dirn=fdir) as core: triggers = core.triggers.list() self.len(2, triggers) self.eq(triggers[0][1].get('storm'), '[inet:user=2 .test:univ=4] | testcmd') # Verify that the v0 trigger was migrated correctly iden2, trig2 = triggers[1] self.eq(iden2, 'ff' * 16) self.eq(trig2['useriden'], rootiden) self.eq(trig2['ver'], 1) self.eq(trig2['storm'], 'testcmd')
async def puts(self, name, items): if self.queues.get(name) is None: mesg = f'No queue named {name}.' raise s_exc.NoSuchName(mesg=mesg, name=name) abrv = self.abrv.nameToAbrv(name) offs = retn = self.offsets.get(name, 0) for item in items: self.slab.put(abrv + s_common.int64en(offs), s_msgpack.en(item), db=self.qdata) self.sizes.inc(name, 1) offs = self.offsets.inc(name, 1) # wake the sleepers evnt = self.waiters.get(name) if evnt is not None: evnt.set() return retn
def norm(self, valu): try: s_common.reqjsonsafe(valu) except s_exc.MustBeJsonSafe as e: raise s_exc.BadTypeValu(name=self.name, valu=valu, mesg=str(e)) from None byts = s_msgpack.en(valu) return s_msgpack.un(byts), {}
def addRole(self, name): ''' Add a new role to the auth system. Args: name (str): The role name. Returns: Role: The newly created role. Raises: s_exc.DupRoleName: If the role already exists. ''' with self.lenv.begin(write=True) as xact: if self.roles.get(name) is not None: raise s_exc.DupRoleName(name=name) # Role() does rdef validation role = Role(self, name) self.roles[name] = role renc = name.encode('utf8') data = role._getAuthData() data['vers'] = authver byts = s_msgpack.en(role._getAuthData()) xact.put(renc, byts, db=self._db_roles) return role
def delRole(self, name): ''' Delete a role from the auth system. Args: name (str): The user name to delete. Returns: True: True if the operation succeeded. Raises: s_exc.NoSuchRole: If the role does not exist. ''' with self.lenv.begin(write=True) as xact: role = self.roles.pop(name, None) if role is None: raise s_exc.NoSuchRole(name=name) nenc = name.encode('utf8') xact.delete(nenc, db=self._db_roles) for user in self.users.values(): role = user.roles.pop(name, None) if role is not None: nenc = user.name.encode('utf8') data = user._getAuthData() data['vers'] = authver byts = s_msgpack.en(data) xact.put(nenc, byts, db=self._db_users) return True
async def test_synsplice_remote(self): async with self.getTestCore() as core: await self.addCreatorDeleterRoles(core) host, port = await core.dmon.listen('tcp://127.0.0.1:0/') curl = f'tcp://*****:*****@{host}:{port}/' mesg = ('node:add', {'ndef': ('test:str', 'foo')}) splicefp = s_common.genpath(core.dirn, 'splice.mpk') with s_common.genfile(splicefp) as fd: fd.write(s_msgpack.en(mesg)) argv = [ '--cortex', curl, '--format', 'syn.splice', '--modules', 'synapse.tests.utils.TestModule', splicefp ] outp = self.getTestOutp() self.eq(await s_feed.main(argv, outp=outp), 0) nodes = await core.eval('test:str=foo').list() self.len(1, nodes)
def migrate_v0_rules(self): ''' Remove any v0 (i.e. pre-010) rules from storage and replace them with v1 rules. Notes: v0 had two differences user was a username. Replaced with iden of user as 'iden' field. Also 'iden' was storage as binary. Now it is stored as hex string. ''' for iden, valu in self.core.slab.scanByFull(db=self.trigdb): ruledict = s_msgpack.un(valu) ver = ruledict.get('ver') if ver != 0: continue user = ruledict.pop('user') if user is None: logger.warning('Username missing in stored trigger rule %r', iden) continue # In v0, stored user was username, in >0 user is useriden user = self.core.auth.getUserByName(user).iden if user is None: logger.warning('Unrecognized username in stored trigger rule %r', iden) continue ruledict['ver'] = 1 ruledict['useriden'] = user newiden = s_common.ehex(iden) self.core.slab.pop(iden, db=self.trigdb) self.core.slab.put(newiden.encode(), s_msgpack.en(ruledict), db=self.trigdb)
def save(self, xact, items): ''' Save a series of items to a sequence. Args: xact (lmdb.Transaction): An LMDB write transaction. items (tuple): The series of items to save into the sequence. Returns: None ''' rows = [] indx = self.indx for item in items: byts = s_msgpack.en(item) lkey = struct.pack('>Q', indx) indx += 1 rows.append((lkey, byts)) with xact.cursor(db=self.db) as curs: curs.putmulti(rows, append=True) self.indx = indx
async def test_synnodes_offset(self): async with self.getTestCore() as core: await self.addCreatorDeleterRoles(core) host, port = await core.dmon.listen('tcp://127.0.0.1:0/') curl = f'tcp://*****:*****@{host}:{port}/' with self.getTestDir() as dirn: mpkfp = s_common.genpath(dirn, 'podes.mpk') with s_common.genfile(mpkfp) as fd: for i in range(20): pode = (('test:int', i), {}) fd.write(s_msgpack.en(pode)) argv = ['--cortex', curl, '--format', 'syn.nodes', '--modules', 'synapse.tests.utils.TestModule', '--chunksize', '4', '--offset', '15', mpkfp] outp = self.getTestOutp() self.eq(await s_feed.main(argv, outp=outp), 0) # Sad path catch outp = self.getTestOutp() argv.append(mpkfp) self.eq(await s_feed.main(argv, outp=outp), 1) self.true(outp.expect('Cannot start from a arbitrary offset for more than 1 file.')) nodes = await core.eval('test:int').list() self.len(8, nodes)
def save(self, items): ''' Save a series of items to a sequence. Args: items (tuple): The series of items to save into the sequence. Returns: The index of the first item ''' rows = [] indx = self.indx size = 0 tick = s_common.now() for item in items: byts = s_msgpack.en(item) size += len(byts) lkey = s_common.int64en(indx) indx += 1 rows.append((lkey, byts)) self.slab.putmulti(rows, append=True, db=self.db) took = s_common.now() - tick origindx = self.indx self.indx = indx return {'indx': indx, 'size': size, 'count': len(items), 'time': tick, 'took': took, 'orig': origindx}
async def runCmdOpts(self, opts): self.printf(f'exporting nodes') queryopts = {} if opts.include_tags: queryopts['scrub'] = {'include': {'tags': opts.include_tags}} if opts.no_tags: queryopts['scrub'] = {'include': {'tags': []}} try: query = opts.query[1:-1] with s_common.genfile(opts.filepath) as fd: cnt = 0 async for pode in self._cmd_cli.item.exportStorm( query, opts=queryopts): byts = fd.write(s_msgpack.en(pode)) cnt += 1 self.printf(f'saved {cnt} nodes to: {opts.filepath}') except asyncio.CancelledError as e: raise except s_exc.SynErr as e: self.printf(e.errinfo.get('mesg', str(e)))
def addUser(self, name): ''' Add a new user to the auth system. Args: name (str): The user name. Returns: User: The newly created user. Raises: s_exc.DupUserName: If the user already exists. ''' with self.lenv.begin(write=True) as xact: if self.users.get(name) is not None: raise s_exc.DupUserName(name=name) user = User(self, name) self.users[name] = user uenc = name.encode('utf8') data = user._getAuthData() data['vers'] = authver byts = s_msgpack.en(data) xact.put(uenc, byts, db=self._db_users) return user
async def test_prov(self): s_provenance.reset() async with self.getTestCore() as real, real.getLocalProxy() as core: # Non-existent iden self.none(await core.getProvStack('abcd')) await core.addTrigger('node:add', '[ test:int=1 ]', info={'form': 'test:str'}) await s_common.aspin(core.eval('[ test:str=foo ]')) await self.agenlen(1, core.eval('test:int')) await self.agenlen(0, core.eval('test:int | delnode')) splices = await alist(core.splices(0, 1000)) self.len(9, splices) idens = [splice[1]['prov'] for splice in splices] self.eq(idens[0], idens[1]) self.eq(idens[0], idens[2]) self.eq(idens[3], idens[4]) self.eq(idens[7], idens[8]) # node:add and prop:set self.eq(idens[5], idens[6]) # The source splices prov1 = await core.getProvStack(idens[0]) self.eq(({}, ()), prov1) # The test:str splices prov2 = await core.getProvStack(idens[3]) rootiden = prov2[1][0][1]['user'] s2 = ('storm', {'q': '[ test:str=foo ]', 'user': rootiden}) self.eq((s2, ), prov2[1]) # Validate that the iden calc itself is correct rawprov = ({}, [('storm', (('q', '[ test:str=foo ]'), ('user', rootiden)))]) hash = hashlib.md5(s_msgpack.en(rawprov)).hexdigest() self.eq(hash, idens[3]) # The trigger splices prov3 = await core.getProvStack(idens[5]) s3 = ('trig', {'cond': 'node:add', 'form': 'test:str', 'tag': None, 'prop': None}) s4 = ('storm', {'q': '[ test:int=1 ]', 'user': rootiden}) self.eq((s2, s3, s4), prov3[1]) # prop:del/node:del prov4 = await core.getProvStack(idens[7]) ds2 = ('storm', {'q': 'test:int | delnode', 'user': rootiden}) ds3 = ('stormcmd', {'name': 'delnode', 'argv': ()}) self.eq((ds2, ds3), prov4[1]) # Test the streaming API provstacks = await alist(core.provStacks(0, 1000)) correct = [(idens[0], prov1), (idens[3], prov2), (idens[5], prov3), (idens[7], prov4)] self.eq(provstacks, correct)
def test_msgpack_loadfile(self): t0 = ('5678', {'key': 1}) t1 = ('1234', {'key': 'haha'}) with self.getTestDir() as fdir: fd = s_common.genfile(fdir, 'oneobj.mpk') fd.write(s_msgpack.en(t0)) fd.close() fd = s_common.genfile(fdir, 'twoobjs.mpk') for obj in (t0, t1): fd.write(s_msgpack.en(obj)) fd.close() data = s_msgpack.loadfile(s_common.genpath(fdir, 'oneobj.mpk')) self.eq(data, ('5678', {'key': 1})) # Files containing multiple objects are not supported self.raises(msgpack.exceptions.ExtraData, s_msgpack.loadfile, s_common.genpath(fdir, 'twoobjs.mpk'))
async def test_datamodel_getModelDef(self): async with self.getTestCore() as core: modeldef = core.model.getModelDef() # Verify it doesn't have any unmarshallable elements s_msgpack.en(modeldef) for field in ('ctors', 'types', 'forms', 'univs'): self.isin(field, modeldef[0][1]) self.lt(0, len(modeldef[0][1][field])) modelinfo = s_datamodel.ModelInfo() modelinfo.addDataModels(modeldef) self.true(modelinfo.isform('test:str')) self.true(modelinfo.isuniv('.seen')) self.false(modelinfo.isuniv('seen')) self.true(modelinfo.isprop('test:type10:intprop')) self.true(modelinfo.isprop('test:type10.seen'))
def encrypt(self, mesg): ''' Wrap a message with a sequence number and encrypt it. Args: mesg: The mesg to encrypt. Returns: bytes: The encrypted message. ''' seqn = next(self._tx_sn) rv = self._tx_tinh.enc(s_msgpack.en((seqn, mesg))) return rv
def _storPropSetCommon(self, buid, penc, bpkey, pvpref, univ, valu, indx): bpval = s_msgpack.en((valu, indx)) pvvalu = s_msgpack.en((buid,)) byts = self.layrslab.replace(bpkey, bpval, db=self.bybuid) if byts is not None: oldv, oldi = s_msgpack.un(byts) if oldi is not None: self.layrslab.delete(pvpref + oldi, pvvalu, db=self.byprop) if univ: self.layrslab.delete(penc + oldi, pvvalu, db=self.byuniv) if indx is not None: self.layrslab.put(pvpref + indx, pvvalu, dupdata=True, db=self.byprop) if univ: self.layrslab.put(penc + indx, pvvalu, dupdata=True, db=self.byuniv)
def main(argv, outp=s_output.stdout): pars = argparse.ArgumentParser(prog='cryo.cat', description='display data items from a cryo cell') pars.add_argument('cryotank', help='The telepath URL for the remote cryotank.') pars.add_argument('--offset', default=0, type=int, help='Begin at offset index') pars.add_argument('--size', default=10, type=int, help='How many items to display') pars.add_argument('--omit-offset', default=False, action='store_true', help='Output raw items with no offsets.') # TODO: synapse.tools.cryo.list <cryocell> #pars.add_argument('--list', default=False, action='store_true', help='List tanks in the remote cell and return') group = pars.add_mutually_exclusive_group() group.add_argument('--jsonl', action='store_true', help='Input/Output items in jsonl format') group.add_argument('--msgpack', action='store_true', help='Input/Output items in msgpack format') pars.add_argument('--verbose', '-v', default=False, action='store_true', help='Verbose output') pars.add_argument('--ingest', '-i', default=False, action='store_true', help='Reverses direction: feeds cryotank from stdin in msgpack or jsonl format') opts = pars.parse_args(argv) if opts.verbose: logger.setLevel(logging.INFO) if opts.ingest and not opts.jsonl and not opts.msgpack: outp.printf('Must specify exactly one of --jsonl or --msgpack if --ingest is specified') return 1 logger.info(f'connecting to: {opts.cryotank}') with s_telepath.openurl(opts.cryotank) as tank: if opts.ingest: if opts.msgpack: items = list(s_msgpack.iterfd(sys.stdin.buffer)) tank.puts(items) return 0 items = [json.loads(l) for l in sys.stdin] tank.puts(items) return 0 for item in tank.slice(opts.offset, opts.size): if opts.jsonl: outp.printf(json.dumps(item[1], sort_keys=True)) elif opts.msgpack: sys.stdout.buffer.write(s_msgpack.en(item[1])) else: outp.printf(pprint.pformat(item)) return 0
def getProvIden(self, provstack): ''' Returns the iden corresponding to a provenance stack and stores if it hasn't seen it before ''' iden = _providen(provstack) misc, frames = provstack # Convert each frame back from (k, v) tuples to a dict dictframes = [(typ, {k: v for (k, v) in info}) for (typ, info) in frames] bytz = s_msgpack.en((misc, dictframes)) didwrite = self.slab.put(iden, bytz, overwrite=False, db=self.db) if didwrite: self.provseq.save([iden]) return iden
async def _storBuidSet(self, oper): ''' Migration-only method Notes: Precondition: buid cache must be disabled ''' assert self.buidcache.disabled _, (form, oldb, newb) = oper fenc = form.encode() + b'\x00' pvoldval = s_msgpack.en((oldb,)) pvnewval = s_msgpack.en((newb,)) for lkey, lval in self.layrslab.scanByPref(oldb, db=self.bybuid): proputf8 = lkey[32:] valu, indx = s_msgpack.un(lval) if indx is not None: # <prop><00><indx> propindx = proputf8 + b'\x00' + indx if proputf8[0] in (46, 35): # ".univ" or "#tag" self.layrslab.put(propindx, pvnewval, dupdata=True, db=self.byuniv) self.layrslab.delete(propindx, pvoldval, db=self.byuniv) bypropkey = fenc + propindx self.layrslab.put(bypropkey, pvnewval, db=self.byprop) self.layrslab.delete(bypropkey, pvoldval, db=self.byprop) self.layrslab.put(newb + proputf8, lval, db=self.bybuid) self.layrslab.delete(lkey, db=self.bybuid)
def encodeMsg(self, mesg): '''Get byts for a message''' fmt = self.locs.get('log:fmt') if fmt == 'jsonl': s = json.dumps(mesg, sort_keys=True) + '\n' buf = s.encode() return buf elif fmt == 'mpk': buf = s_msgpack.en(mesg) return buf mesg = f'Unknown encoding format: {fmt}' raise s_exc.SynErr(mesg=mesg)
def set(self, name, valu): ''' Set a name in the SlabDict. Args: name (str): The key name. valu (obj): A msgpack compatible value. Returns: None ''' byts = s_msgpack.en(valu) lkey = self.pref + name.encode('utf8') self.slab.put(lkey, byts, db=self.db) self.info[name] = valu
def test_lib_crypto_tnfl_vector(self): key = binascii.unhexlify(b'fc066c018159a674c13ae1fb7c5c6548a4e05a11d742a0ebed35d28724b767b0') edict = {'data': b'02f9f72c9164e231f0e6795fd1d1fb21db6e8b0c049ef611ea6' b'432ed8ec6d54b245d66864b06cc6cbdc52ebf5f0dbe1382b42e' b'94a67411f7042d0562f3fd9b1a6961aacff69292aa596382c9f' b'869e2957269191c5f916f56889188db03eb60d2caf7f7dd7388' b'a5a9ef13494aaeb905f08e658fbb907afd7169b879b0313d065' b'c1045e844c039b43296f44d6bc5', 'hmac': b'fb4b53fb2b94d4ef91b5a094ab786b879ba6274384e23da15f7990609df5ab88', 'iv': b'ecf8ed3d7932834fc76b7323d6ab73ce', 'asscd': b'' } msg = s_msgpack.en({k: binascii.unhexlify(v) for k, v in edict.items()}) tinh = s_tinfoil.TinFoilHat(key) self.eq(hashlib.md5(tinh.dec(msg)).digest(), binascii.unhexlify(b'3303e226461e38f0f36988e441825e19'))
def enc(self, byts, asscd=None): ''' Encrypt the given bytes and return an envelope dict in msgpack form. Args: byts (bytes): The message to be encrypted. asscd (bytes): Extra data that needs to be authenticated (but not encrypted). Returns: bytes: The encrypted message. This is a msgpacked dictionary containing the IV, ciphertext, and associated data. ''' iv = os.urandom(16) encryptor = AESGCM(self.ekey) byts = encryptor.encrypt(iv, byts, asscd) envl = {'iv': iv, 'data': byts, 'asscd': asscd} return s_msgpack.en(envl)
def test_msgpack_iterfile(self): t0 = ('5678', {'key': 1}) t1 = ('1234', {'key': 'haha'}) with self.getTestDir() as fdir: fd = s_common.genfile(fdir, 'test.mpk') for obj in (t0, t1): fd.write(s_msgpack.en(obj)) fd.close() gen = s_msgpack.iterfile(s_common.genpath(fdir, 'test.mpk')) items = [obj for obj in gen] self.len(2, items) self.sorteq(items, [t0, t1]) fd.close()
async def getFormTodo(self, name): ''' Produce a deconflicted list of form values across layers as a *copy* to avoid iter vs edit issues in the indexes. ''' size = 0 logger.warning(f'MIGRATION: calculating form todo: {name}') async with self.getTempSlab() as slab: for layr in self.layers: async for buid, valu in layr.iterFormRows(name): slab.put(buid, s_msgpack.en(valu), overwrite=False) size += 1 logger.warning(f'MIGRATION: {name} todo size: {size}') for buid, byts in slab.scanByFull(): yield buid, s_msgpack.un(byts)
async def tx(self, mesg): ''' Async transmit routine which will wait for writer drain(). ''' if self.isfini: raise s_exc.IsFini() byts = s_msgpack.en(mesg) try: self.writer.write(byts) # Avoid Python bug. See https://bugs.python.org/issue29930 async with self._drain_lock: await self.writer.drain() except Exception as e: await self.fini() einfo = s_common.retnexc(e) logger.debug('link.tx connection trouble %s', einfo) raise
async def editNodeNdef(self, oldv, newv): ''' Migration-only method Notes: Precondition: buid cache must be disabled ''' assert self.buidcache.disabled oldb = s_common.buid(oldv) newb = s_common.buid(newv) pvoldval = s_msgpack.en((oldb,)) pvnewval = s_msgpack.en((newb,)) oldfenc = oldv[0].encode() + b'\x00' newfenc = newv[0].encode() + b'\x00' newprel = b'*' + newv[0].encode() newnindx = self.core.model.prop(newv[0]).type.indx(newv[1]) # avoid any potential iter/edit issues... todo = list(self.layrslab.scanByPref(oldb, db=self.bybuid)) for lkey, lval in todo: proputf8 = lkey[32:] valu, indx = s_msgpack.un(lval) # for the *<form> prop, the byprop index has <form><00><00><indx> if proputf8[0] == 42: newpropkey = newfenc + b'\x00' + newnindx if indx is not None: oldpropkey = oldfenc + b'\x00' + indx if not self.layrslab.delete(oldpropkey, pvoldval, db=self.byprop): # pragma: no cover logger.warning(f'editNodeNdef del byprop missing for {repr(oldv)} {repr(oldpropkey)}') self.layrslab.put(newpropkey, pvnewval, dupdata=True, db=self.byprop) byts = s_msgpack.en((newv[1], newnindx)) self.layrslab.put(newb + newprel, byts, db=self.bybuid) else: # <prop><00><indx> propindx = proputf8 + b'\x00' + indx if proputf8[0] in (46, 35): # ".univ" or "#tag" self.layrslab.put(propindx, pvnewval, dupdata=True, db=self.byuniv) self.layrslab.delete(propindx, pvoldval, db=self.byuniv) oldpropkey = oldfenc + propindx newpropkey = newfenc + propindx if not self.layrslab.delete(oldpropkey, pvoldval, db=self.byprop): # pragma: no cover logger.warning(f'editNodeNdef del byprop missing for {repr(oldv)} {repr(oldpropkey)}') self.layrslab.put(newpropkey, pvnewval, dupdata=True, db=self.byprop) self.layrslab.put(newb + proputf8, lval, db=self.bybuid) self.layrslab.delete(lkey, db=self.bybuid)
async def storNodeValu(self, full, valu): lval = s_msgpack.en(valu) lkey = '\x00'.join(full).encode('utf8') self.slab.put(lkey, lval, db=self.db) return valu
def en(self): return s_msgpack.en(dataclasses.asdict(self))
def _providen(prov): ''' Calculates a provenance iden from a provenance stack ''' return hashlib.md5(s_msgpack.en(prov)).digest()
def add(self, item): tick = s_common.now() lkey = tick.to_bytes(8, 'big') self.slab.put(lkey, s_msgpack.en(item), dupdata=True, db=self.db)
def test_lib_crypto_tnfl_base(self): ekey = s_tinfoil.newkey() self.len(32, ekey) self.isinstance(ekey, bytes) # Keys are random from s_tinfoil.newkey self.ne(ekey, s_tinfoil.newkey()) self.ne(ekey, s_tinfoil.newkey()) tinh = s_tinfoil.TinFoilHat(ekey) self.true(tinh.bend is default_backend()) byts = tinh.enc(b'foobar') # Ensure the envelope is shaped as we expect it too be edict = s_msgpack.un(byts) self.isinstance(edict, dict) self.len(3, edict) data = edict.get('data') self.isinstance(data, bytes) self.len(6 + 16, data) iv = edict.get('iv') self.isinstance(iv, bytes) self.len(16, iv) asscd = edict.get('asscd') self.eq(asscd, None) # We can decrypt and get our original message back self.eq(tinh.dec(byts), b'foobar') # There isn't anythign special about the tinfoilhat object # We can make a new one to decrypt our existing message with # the known key self.eq(s_tinfoil.TinFoilHat(ekey).dec(byts), b'foobar') # We can encrypt/decrypt null messages byts = tinh.enc(b'') self.eq(tinh.dec(byts), b'') # Attempting to decrypt with the wrong key fails self.none(s_tinfoil.TinFoilHat(s_tinfoil.newkey()).dec(byts)) # Messages are stream encoded so the length is 1 to 1 for msize in [0, 1, 2, 15, 16, 17, 31, 32, 33, 63, 65]: mesg = msize * b'!' byts = tinh.enc(mesg) edict = s_msgpack.un(byts) self.len(16, edict.get('iv')) data = edict.get('data') self.len(len(mesg) + 16, data) self.eq(tinh.dec(byts), mesg) # We can pass in additional data that we want authed too byts = tinh.enc(b'robert grey', b'pennywise') edict = s_msgpack.un(byts) self.eq(edict.get('asscd'), b'pennywise') self.eq(tinh.dec(byts), b'robert grey') # A malformed edict with a bad asscd won't decrypt edict['asscd'] = b'georgey' self.none(tinh.dec(s_msgpack.en(edict)))
async def test_cryocat(self): async with self.getTestCryo() as cryo: cryourl = cryo.getLocalUrl(share='cryotank/hehe') argv = ['--ingest', cryourl] retn, outp = await self.execToolMain(s_cryocat.main, argv) self.eq(1, retn) # Happy path jsonl ingest outp = self.getTestOutp() argv = ['--ingest', '--jsonl', cryourl] inp = io.StringIO('{"foo": "bar"}\n[]\n') with self.redirectStdin(inp): retn, outp = await self.execToolMain(s_cryocat.main, argv) self.eq(0, retn) # Sad path jsonl ingest argv = ['--ingest', '--jsonl', cryourl] inp = io.StringIO('{"foo: "bar"}\n[]\n') with self.redirectStdin(inp): with self.raises(json.decoder.JSONDecodeError): retn, outp = await self.execToolMain(s_cryocat.main, argv) # Happy path msgpack ingest argv = ['--ingest', '--msgpack', cryourl] to_ingest1 = s_msgpack.en({'foo': 'bar'}) to_ingest2 = s_msgpack.en(['lol', 'brb']) inp = Mock() inp.buffer = io.BytesIO(to_ingest1 + to_ingest2) with self.redirectStdin(inp): retn, outp = await self.execToolMain(s_cryocat.main, argv) self.eq(0, retn) # Sad path msgpack ingest argv = ['--ingest', '--msgpack', cryourl] good_encoding = s_msgpack.en({'foo': 'bar'}) bad_encoding = bytearray(good_encoding) bad_encoding[2] = 0xff inp = Mock() inp.buffer = io.BytesIO(bad_encoding) with self.redirectStdin(inp): with self.raises(msgpack.UnpackValueError): retn, outp = await self.execToolMain(s_cryocat.main, argv) argv = ['--offset', '0', '--size', '1', cryourl] retn, outp = await self.execToolMain(s_cryocat.main, argv) self.eq(0, retn) self.true(outp.expect("(0, {'foo': 'bar'})")) async with self.getTestCryo() as cryo: cryourl = cryo.getLocalUrl(share='cryotank/hehe') items = [(None, {'key': i}) for i in range(20)] tank = await cryo.init('hehe') await tank.puts(items) argv = ['--offset', '0', '--jsonl', '--size', '2', '--omit-offset', cryourl] retn, outp = await self.execToolMain(s_cryocat.main, argv) self.true(outp.expect('[null, {"key": 0}]\n[null, {"key": 1}]\n')) argv = ['--offset', '0', '--size', '20', cryourl] retn, outp = await self.execToolMain(s_cryocat.main, argv) self.eq(0, retn) self.true(outp.expect("(0, (None, {'key': 0}))")) self.true(outp.expect("(9, (None, {'key': 9}))")) argv = ['--offset', '10', '--size', '20', cryourl] retn, outp = await self.execToolMain(s_cryocat.main, argv) self.eq(0, retn) self.false(outp.expect("(9, (None, {'key': 9}))", throw=False))