Exemple #1
0
    def addIndex(self, prop, syntype, datapaths):
        '''
        Add an index to the cryotank

        Args:
            prop (str):  the name of the property this will be stored as in the normalized record
            syntype (str):  the synapse type this will be interpreted as
            datapaths (Iterable[str]):  datapaths that will be tried in order.
        Returns:
            None
        Note:
            Additional datapaths will only be tried if prior datapaths are not present, and *not* if
            the normalization fails.
        '''
        if self.iidFromProp(prop) is not None:
            raise s_exc.DupIndx(mesg='Index already exists', index=prop)
        if not len(datapaths):
            raise s_exc.BadOperArg(
                mesg='datapaths must have at least one entry')

        if self.model.type(syntype) is None:
            raise s_exc.BadOperArg(mesg=f'unknown synapse type {syntype}')
        iid = int.from_bytes(os.urandom(8), 'little')
        self.indices[iid] = _MetaEntry(self.model,
                                       propname=prop,
                                       syntype=syntype,
                                       datapaths=datapaths)
        self.progresses[iid] = {'nextoffset': 0, 'ngood': 0, 'nnormfail': 0}
        self.persist()
Exemple #2
0
    async def execStormCmd(self, runt, genr):

        self.snap = runt.snap

        self.omit_traversal_forms = set(self.opts.omit_traversal_form)
        self.omit_traversal_tags = set(self.opts.omit_traversal_tag)
        self.omit_forms = set(self.opts.omit_form)
        self.omit_tags = set(self.opts.omit_tag)
        self.ndef_props = [
            prop for prop in self.snap.model.props.values()
            if isinstance(prop.type, s_types.Ndef)
        ]

        if self.opts.degrees < 1:
            raise s_exc.BadOperArg(
                mesg='degrees must be greater than or equal to 1',
                arg='degrees')

        visited = set()

        async for node, path in genr:

            if self.opts.join:
                yield node, path

            if self.opts.unique is False:
                visited = set()

            # Don't revisit the inbound node from genr
            visited.add(node.buid)

            async for nnode, npath in self.doRefs(node, path, visited):
                yield nnode, npath
Exemple #3
0
    def queryNormValu(self,
                      prop: str,
                      valu: Optional[Union[int, str]] = None,
                      exact=False):
        '''
        Query for normalized individual property values

        Args:
            prop:  The name of the indexed property
            valu:  The normalized value.  If not present, all records with prop present, sorted by prop will be
                returned.  It will be considered a prefix if exact is False.
            exact (bool): Indicates that the result must match exactly.  Conversely, if False, indicates a prefix match.

        Returns:
            A generator of offset, normalized value tuples
        '''
        if not exact and valu is not None and isinstance(
                valu, str) and len(valu) >= s_lmdb.LARGE_STRING_SIZE:
            raise s_exc.BadOperArg(
                mesg='prefix search valu cannot exceed 128 characters')
        for (offset, offset_enc, iidenc,
             txn) in self._iterrows(prop, valu, exact):
            rv = txn.get(bytes(offset_enc) + iidenc, None, db=self._normtbl)
            if rv is None:
                raise s_exc.CorruptDatabase(
                    'Missing normalized record')  # pragma: no cover
            yield offset, s_msgpack.un(rv)
Exemple #4
0
    def _iterrows(self, prop, valu, exact=False):
        '''
        Query against an index.

        Args:
            prop (str):  The name of the indexed property
            valu (Optional[Union[int, str]]):  The normalized value.  If not present, all records with prop present,
            sorted by prop will be returned.  It will be considered prefix if exact is False.
            exact (bool): Indicates that the result must match exactly.  Conversly, if False, indicates a prefix match.

        Returns:
            Iterable[Tuple[int, bytes, bytes, lmdb.Transaction]: a generator of a Tuple of the offset, the encoded
            offset, the encoded index ID, and the LMDB read transaction.

        Note:
            Ordering of Tuples disregard everything after the first 128 bytes of a property.
        '''
        iid = self._meta.iidFromProp(prop)
        if iid is None:
            raise s_exc.NoSuchIndx(mesg='No such index', index=prop)
        iidenc = _iid_en(iid)

        islarge = valu is not None and isinstance(
            valu, str) and len(valu) >= s_lmdb.LARGE_STRING_SIZE
        if islarge and not exact:
            raise s_exc.BadOperArg(
                mesg='prefix search valu cannot exceed 128 characters')

        if islarge and exact:
            key = iidenc + s_lmdb.encodeValAsKey(valu)
        elif valu is None:
            key = iidenc
        else:
            key = iidenc + s_lmdb.encodeValAsKey(valu, isprefix=not exact)
        with self._dbenv.begin(db=self._idxtbl,
                               buffers=True) as txn, txn.cursor() as curs:
            if exact:
                rv = curs.set_key(key)
            else:
                rv = curs.set_range(key)
            if not rv:
                return
            while True:
                rv = []
                curkey, offset_enc = curs.item()
                if (not exact and not curkey[:len(key)] == key) or (
                        exact and curkey != key):
                    return
                offset = _Int64be.unpack(offset_enc)[0]
                yield (offset, offset_enc, iidenc, txn)
                if not curs.next():
                    return
Exemple #5
0
    async def aget(self, key):
        if not self.iscorocall:
            raise s_exc.BadOperArg(
                'cache was initialized with non coroutine.  Must use get')

        valu = self.cache.get(key, s_common.novalu)
        if valu is not s_common.novalu:
            return valu

        valu = await self.callback(key)
        if valu is s_common.novalu:
            return valu

        self.put(key, valu)
        return valu
Exemple #6
0
    async def aget(self, key):
        if not self.iscorocall:
            raise s_exc.BadOperArg(
                'cache was initialized with non coroutine.  Must use get')

        valu = self.cache.get(key, s_common.novalu)
        if valu is not s_common.novalu:
            return valu

        valu = await self.callback(key)
        if valu is s_common.novalu:
            return valu

        self.cache[key] = valu
        self.fifo.append(key)

        while len(self.fifo) > self.size:
            key = self.fifo.popleft()
            self.cache.pop(key, None)

        return valu
Exemple #7
0
    def queryRows(self,
                  prop: str,
                  valu: Optional[Union[int, str]] = None,
                  exact=False) -> Iterable[Tuple[int, bytes]]:
        '''
        Query for raw (i.e. from the cryotank itself) records

        Args:
            prop:  The name of the indexed property
            valu:  The normalized value.  If not present, all records with prop present,
            sorted by prop will be returned.  It will be considered a prefix if exact is False.
            exact: Indicates that the result must match exactly.  Conversely, if False, indicates a prefix match.

        Returns:
            Iterable[Tuple[int, bytes]]: A generator of tuple (offset, messagepack encoded) raw records
        '''
        if not exact and valu is not None and isinstance(
                valu, str) and len(valu) >= s_lmdb.LARGE_STRING_SIZE:
            raise s_exc.BadOperArg(
                mesg='prefix search valu cannot exceed 128 characters')
        for offset, _, _, txn in self._iterrows(prop, valu, exact):
            yield next(self.cryotank.rows(offset, 1))
Exemple #8
0
    def queryNormRecords(self, prop: str, valu: Optional[Union[int, str]] = None, exact=False) -> \
            Iterable[Tuple[int, Dict[str, Union[str, int]]]]:
        '''
        Query for normalized property values grouped together in dicts

        Args:
            prop:  The name of the indexed property
            valu:  The normalized value.  If not present, all records with prop present, sorted by prop will be
                returned.  It will be considered a prefix if exact is False.
            exact: Indicates that the result must match exactly.  Conversely, if False, indicates a prefix match.

        Returns:
            A generator of offset, dictionary tuples
        '''
        if not exact and valu is not None and isinstance(
                valu, str) and len(valu) >= s_lmdb.LARGE_STRING_SIZE:
            raise s_exc.BadOperArg(
                mesg='prefix search valu cannot exceed 128 characters')
        for offset, offset_enc, _, txn in self._iterrows(prop, valu, exact):
            norm = {}
            olen = len(offset_enc)
            with txn.cursor(db=self._normtbl) as curs:
                if not curs.set_range(offset_enc):
                    raise s_exc.CorruptDatabase(
                        'Missing normalized record')  # pragma: no cover
                while True:
                    curkey, norm_enc = curs.item()
                    if curkey[:olen] != offset_enc:
                        break
                    iid = _iid_un(curkey[olen:])

                    # this is racy with the worker, but it is still safe
                    idx = self._meta.indices.get(iid)

                    if idx is not None:
                        norm[idx.propname] = s_msgpack.un(norm_enc)
                    if not curs.next():
                        break
            yield offset, norm
Exemple #9
0
    async def execStormCmd(self, runt, genr):
        snap = runt.snap

        nodes = [
            node async for node in snap.getNodesBy('syn:tag', self.opts.oldtag)
        ]
        if not nodes:
            raise s_exc.BadOperArg(
                mesg='Cannot move a tag which does not exist.',
                oldtag=self.opts.oldtag)
        oldt = nodes[0]
        oldstr = oldt.ndef[1]
        oldsize = len(oldstr)
        oldparts = oldstr.split('.')
        noldparts = len(oldparts)

        newt = await snap.addNode('syn:tag', self.opts.newtag)
        newstr = newt.ndef[1]

        if oldstr == newstr:
            raise s_exc.BadOperArg(mesg='Cannot retag a tag to the same valu.',
                                   newtag=newstr,
                                   oldtag=oldstr)

        retag = {oldstr: newstr}

        # first we set all the syn:tag:isnow props
        async for node in snap.getNodesBy('syn:tag',
                                          self.opts.oldtag,
                                          cmpr='^='):

            tagstr = node.ndef[1]
            tagparts = tagstr.split('.')
            # Are we in the same tree?
            if tagparts[:noldparts] != oldparts:
                continue

            newtag = newstr + tagstr[oldsize:]

            newnode = await snap.addNode('syn:tag', newtag)

            olddoc = node.get('doc')
            if olddoc is not None:
                await newnode.set('doc', olddoc)

            oldtitle = node.get('title')
            if oldtitle is not None:
                await newnode.set('title', oldtitle)

            # Copy any tags over to the newnode if any are present.
            for k, v in node.tags.items():
                await newnode.addTag(k, v)

            retag[tagstr] = newtag
            await node.set('isnow', newtag)

        # now we re-tag all the nodes...
        count = 0
        async for node in snap.getNodesBy(f'#{oldstr}'):

            count += 1

            tags = list(node.tags.items())
            tags.sort(reverse=True)

            for name, valu in tags:

                newt = retag.get(name)
                if newt is None:
                    continue

                await node.delTag(name)
                await node.addTag(newt, valu=valu)

        await snap.printf(f'moved tags on {count} nodes.')

        async for node, path in genr:
            yield node, path