def addIndex(self, prop, syntype, datapaths): ''' Add an index to the cryotank Args: prop (str): the name of the property this will be stored as in the normalized record syntype (str): the synapse type this will be interpreted as datapaths (Iterable[str]): datapaths that will be tried in order. Returns: None Note: Additional datapaths will only be tried if prior datapaths are not present, and *not* if the normalization fails. ''' if self.iidFromProp(prop) is not None: raise s_exc.DupIndx(mesg='Index already exists', index=prop) if not len(datapaths): raise s_exc.BadOperArg( mesg='datapaths must have at least one entry') if self.model.type(syntype) is None: raise s_exc.BadOperArg(mesg=f'unknown synapse type {syntype}') iid = int.from_bytes(os.urandom(8), 'little') self.indices[iid] = _MetaEntry(self.model, propname=prop, syntype=syntype, datapaths=datapaths) self.progresses[iid] = {'nextoffset': 0, 'ngood': 0, 'nnormfail': 0} self.persist()
async def execStormCmd(self, runt, genr): self.snap = runt.snap self.omit_traversal_forms = set(self.opts.omit_traversal_form) self.omit_traversal_tags = set(self.opts.omit_traversal_tag) self.omit_forms = set(self.opts.omit_form) self.omit_tags = set(self.opts.omit_tag) self.ndef_props = [ prop for prop in self.snap.model.props.values() if isinstance(prop.type, s_types.Ndef) ] if self.opts.degrees < 1: raise s_exc.BadOperArg( mesg='degrees must be greater than or equal to 1', arg='degrees') visited = set() async for node, path in genr: if self.opts.join: yield node, path if self.opts.unique is False: visited = set() # Don't revisit the inbound node from genr visited.add(node.buid) async for nnode, npath in self.doRefs(node, path, visited): yield nnode, npath
def queryNormValu(self, prop: str, valu: Optional[Union[int, str]] = None, exact=False): ''' Query for normalized individual property values Args: prop: The name of the indexed property valu: The normalized value. If not present, all records with prop present, sorted by prop will be returned. It will be considered a prefix if exact is False. exact (bool): Indicates that the result must match exactly. Conversely, if False, indicates a prefix match. Returns: A generator of offset, normalized value tuples ''' if not exact and valu is not None and isinstance( valu, str) and len(valu) >= s_lmdb.LARGE_STRING_SIZE: raise s_exc.BadOperArg( mesg='prefix search valu cannot exceed 128 characters') for (offset, offset_enc, iidenc, txn) in self._iterrows(prop, valu, exact): rv = txn.get(bytes(offset_enc) + iidenc, None, db=self._normtbl) if rv is None: raise s_exc.CorruptDatabase( 'Missing normalized record') # pragma: no cover yield offset, s_msgpack.un(rv)
def _iterrows(self, prop, valu, exact=False): ''' Query against an index. Args: prop (str): The name of the indexed property valu (Optional[Union[int, str]]): The normalized value. If not present, all records with prop present, sorted by prop will be returned. It will be considered prefix if exact is False. exact (bool): Indicates that the result must match exactly. Conversly, if False, indicates a prefix match. Returns: Iterable[Tuple[int, bytes, bytes, lmdb.Transaction]: a generator of a Tuple of the offset, the encoded offset, the encoded index ID, and the LMDB read transaction. Note: Ordering of Tuples disregard everything after the first 128 bytes of a property. ''' iid = self._meta.iidFromProp(prop) if iid is None: raise s_exc.NoSuchIndx(mesg='No such index', index=prop) iidenc = _iid_en(iid) islarge = valu is not None and isinstance( valu, str) and len(valu) >= s_lmdb.LARGE_STRING_SIZE if islarge and not exact: raise s_exc.BadOperArg( mesg='prefix search valu cannot exceed 128 characters') if islarge and exact: key = iidenc + s_lmdb.encodeValAsKey(valu) elif valu is None: key = iidenc else: key = iidenc + s_lmdb.encodeValAsKey(valu, isprefix=not exact) with self._dbenv.begin(db=self._idxtbl, buffers=True) as txn, txn.cursor() as curs: if exact: rv = curs.set_key(key) else: rv = curs.set_range(key) if not rv: return while True: rv = [] curkey, offset_enc = curs.item() if (not exact and not curkey[:len(key)] == key) or ( exact and curkey != key): return offset = _Int64be.unpack(offset_enc)[0] yield (offset, offset_enc, iidenc, txn) if not curs.next(): return
async def aget(self, key): if not self.iscorocall: raise s_exc.BadOperArg( 'cache was initialized with non coroutine. Must use get') valu = self.cache.get(key, s_common.novalu) if valu is not s_common.novalu: return valu valu = await self.callback(key) if valu is s_common.novalu: return valu self.put(key, valu) return valu
async def aget(self, key): if not self.iscorocall: raise s_exc.BadOperArg( 'cache was initialized with non coroutine. Must use get') valu = self.cache.get(key, s_common.novalu) if valu is not s_common.novalu: return valu valu = await self.callback(key) if valu is s_common.novalu: return valu self.cache[key] = valu self.fifo.append(key) while len(self.fifo) > self.size: key = self.fifo.popleft() self.cache.pop(key, None) return valu
def queryRows(self, prop: str, valu: Optional[Union[int, str]] = None, exact=False) -> Iterable[Tuple[int, bytes]]: ''' Query for raw (i.e. from the cryotank itself) records Args: prop: The name of the indexed property valu: The normalized value. If not present, all records with prop present, sorted by prop will be returned. It will be considered a prefix if exact is False. exact: Indicates that the result must match exactly. Conversely, if False, indicates a prefix match. Returns: Iterable[Tuple[int, bytes]]: A generator of tuple (offset, messagepack encoded) raw records ''' if not exact and valu is not None and isinstance( valu, str) and len(valu) >= s_lmdb.LARGE_STRING_SIZE: raise s_exc.BadOperArg( mesg='prefix search valu cannot exceed 128 characters') for offset, _, _, txn in self._iterrows(prop, valu, exact): yield next(self.cryotank.rows(offset, 1))
def queryNormRecords(self, prop: str, valu: Optional[Union[int, str]] = None, exact=False) -> \ Iterable[Tuple[int, Dict[str, Union[str, int]]]]: ''' Query for normalized property values grouped together in dicts Args: prop: The name of the indexed property valu: The normalized value. If not present, all records with prop present, sorted by prop will be returned. It will be considered a prefix if exact is False. exact: Indicates that the result must match exactly. Conversely, if False, indicates a prefix match. Returns: A generator of offset, dictionary tuples ''' if not exact and valu is not None and isinstance( valu, str) and len(valu) >= s_lmdb.LARGE_STRING_SIZE: raise s_exc.BadOperArg( mesg='prefix search valu cannot exceed 128 characters') for offset, offset_enc, _, txn in self._iterrows(prop, valu, exact): norm = {} olen = len(offset_enc) with txn.cursor(db=self._normtbl) as curs: if not curs.set_range(offset_enc): raise s_exc.CorruptDatabase( 'Missing normalized record') # pragma: no cover while True: curkey, norm_enc = curs.item() if curkey[:olen] != offset_enc: break iid = _iid_un(curkey[olen:]) # this is racy with the worker, but it is still safe idx = self._meta.indices.get(iid) if idx is not None: norm[idx.propname] = s_msgpack.un(norm_enc) if not curs.next(): break yield offset, norm
async def execStormCmd(self, runt, genr): snap = runt.snap nodes = [ node async for node in snap.getNodesBy('syn:tag', self.opts.oldtag) ] if not nodes: raise s_exc.BadOperArg( mesg='Cannot move a tag which does not exist.', oldtag=self.opts.oldtag) oldt = nodes[0] oldstr = oldt.ndef[1] oldsize = len(oldstr) oldparts = oldstr.split('.') noldparts = len(oldparts) newt = await snap.addNode('syn:tag', self.opts.newtag) newstr = newt.ndef[1] if oldstr == newstr: raise s_exc.BadOperArg(mesg='Cannot retag a tag to the same valu.', newtag=newstr, oldtag=oldstr) retag = {oldstr: newstr} # first we set all the syn:tag:isnow props async for node in snap.getNodesBy('syn:tag', self.opts.oldtag, cmpr='^='): tagstr = node.ndef[1] tagparts = tagstr.split('.') # Are we in the same tree? if tagparts[:noldparts] != oldparts: continue newtag = newstr + tagstr[oldsize:] newnode = await snap.addNode('syn:tag', newtag) olddoc = node.get('doc') if olddoc is not None: await newnode.set('doc', olddoc) oldtitle = node.get('title') if oldtitle is not None: await newnode.set('title', oldtitle) # Copy any tags over to the newnode if any are present. for k, v in node.tags.items(): await newnode.addTag(k, v) retag[tagstr] = newtag await node.set('isnow', newtag) # now we re-tag all the nodes... count = 0 async for node in snap.getNodesBy(f'#{oldstr}'): count += 1 tags = list(node.tags.items()) tags.sort(reverse=True) for name, valu in tags: newt = retag.get(name) if newt is None: continue await node.delTag(name) await node.addTag(newt, valu=valu) await snap.printf(f'moved tags on {count} nodes.') async for node, path in genr: yield node, path