Exemple #1
0
def test_pack_unpack():
    value = (
        (uuid4(), None, SingleFloat(3.1415), b"x42", 1, -1, 3.1415, -3.1415, ("abc",)),
        ("d", "e", "f"),
        2.718281828459045,
    )  # noqa
    assert found.unpack(found.pack(value)) == value
Exemple #2
0
async def test_query():
    # prepare
    db = await open()

    async def set(tx):
        for number in range(10):
            found.set(tx, found.pack((number,)), found.pack((str(number),)))

    await found.transactional(db, set)

    async def query(tx):
        out = found.query(tx, found.pack((1,)), found.pack((8,)))
        out = await aiolist(out)
        return out

    out = await found.transactional(db, query)
    for (key, value), index in zip(out, range(10)[1:-1]):
        assert found.unpack(key)[0] == index
        assert found.unpack(value)[0] == str(index)
Exemple #3
0
 async def uuid(self, tr):
     uid = uuid4()
     start = found.pack((self._prefix, PREFIX_SPO, uid))
     end = b"\xFF"
     items = await tr.get_range(start, end, limit=1)
     if not items:
         return uid
     key, _ = items[0]
     _, _, subject, _, _ = found.unpack(key)
     assert subject != uid, "Unlikely Error!"
     return uid
Exemple #4
0
 async def _lookup_pos_subjects(self, tr, predicate, object):
     predicate = self._predicates[predicate]
     object = predicate.pack(object)
     start = found.pack((self._prefix, PREFIX_POS, predicate.name, object))
     end = found.strinc(start)
     items = await tr.get_range(start, end)
     out = list()
     for key, _ in items:
         _, _, _, _, subject = found.unpack(key)
         out.append(subject)
     return out
def _score(args):
    candidate, keywords, counter = args
    counter = dict(found.unpack(zstd.decompress(counter)))
    score = 0
    for keyword in keywords:
        try:
            count = counter[keyword]
        except KeyError:
            return None
        else:
            score += count
    return (candidate, score)
Exemple #6
0
 async def all(self, tr):
     start = found.pack((self._prefix, PREFIX_SPO))
     end = found.strinc(start)
     msg = "fetching everything between start=%r and end=%r"
     log.debug(msg, start, end)
     out = []
     items = await tr.get_range(start, end)
     for key, _ in items:  # value is always empty
         _, _, subject, predicate, object = found.unpack(key)
         predicate = self._predicates[predicate]
         object = predicate.unpack(object)
         out.append((subject, predicate.name, object))
     return out
async def massage(tx, store, candidate, keywords, hits):
    score = 0
    counter = await found.get(tx, found.pack(
        (store.prefix_counters, candidate)))
    # TODO: replace the dictionary and the following for loop with
    # a single iteration over the counter, using zigzag algorithm.
    counter = dict(found.unpack(zstd.decompress(counter)))
    for keyword in keywords:
        try:
            count = counter[keyword]
        except KeyError:
            return None
        else:
            score += count
    hits[candidate] = score
async def search(tx, store, keywords, limit=13):
    coroutines = (_keywords_to_token(tx, store.tokens, keyword)
                  for keyword in keywords)
    keywords = await asyncio.gather(*coroutines)
    # If a keyword is not present in store.tokens, then there is no
    # document associated with it, hence there is no document that
    # match that keyword, hence no document that has all the requested
    # keywords. Return an empty counter.
    if any(keyword is None for keyword in keywords):
        return list()

    # Select seed token
    coroutines = (_token_to_size(tx, store.prefix_index, token)
                  for token in keywords)
    sizes = await asyncio.gather(*coroutines)
    _, seed = min(zip(sizes, keywords), key=itemgetter(0))

    # Select candidates
    candidates = []
    key = found.pack((store.prefix_index, seed))
    query = found.query(tx, key, found.next_prefix(key))

    async for key, _ in query:
        _, _, uid = found.unpack(key)
        candidates.append(uid)

    # XXX: 500 was empirically discovered, to make it so that the
    #      search takes less than 1 second or so.
    if len(candidates) >= FOUND_PSTORE_SAMPLE_COUNT:
        candidates = random.sample(candidates, FOUND_PSTORE_SAMPLE_COUNT)

    # score, filter and construct hits aka. massage
    hits = Counter()

    coroutines = (massage(tx, store, c, keywords, hits) for c in candidates)
    await asyncio.gather(*coroutines)

    out = hits.most_common(limit)

    return out
Exemple #9
0
    async def where(self, tr, pattern, *patterns):
        # seed bindings
        vars = tuple((isinstance(item, var) for item in pattern))
        if vars == (True, False, False):
            subject, predicate, object = pattern
            subjects = await self._lookup_pos_subjects(tr, predicate, object)
            name = subject.name
            bindings = [Map().set(name, subject) for subject in subjects]
        elif vars == (False, True, True):
            # TODO: extract to a method
            subject = pattern[0]
            start = found.pack((self._prefix, PREFIX_SPO, subject))
            end = found.strinc(start)
            items = await tr.get_range(start, end)
            bindings = []
            for key, _ in items:
                _, _, _, predicate, object = found.unpack(key)
                predicate = self._predicates[predicate]
                object = predicate.unpack(object)
                binding = Map()
                binding = binding.set(pattern[1].name, predicate.name)
                binding = binding.set(pattern[2].name, object)
                bindings.append(binding)
        elif vars == (False, False, True):
            # TODO: extract to a method
            subject = pattern[0]
            predicate = pattern[1]
            start = found.pack((self._prefix, PREFIX_SPO, subject, predicate))
            end = found.strinc(start)
            items = await tr.get_range(start, end)
            bindings = []
            for key, _ in items:
                _, _, _, _, object = found.unpack(key)
                predicate = self._predicates[predicate]
                object = predicate.unpack(object)
                binding = Map()
                binding = binding.set(pattern[2].name, object)
                bindings.append(binding)
        else:
            raise PatternException(pattern)

        log.debug("seed bindings: %r", bindings)
        # contine matching other patterns, if any.
        for pattern in patterns:  # one
            log.debug("matching pattern: %r", pattern)
            next_bindings = []
            for binding in bindings:  # two
                bound_pattern = pattern_bind(pattern, binding)
                log.debug("bound pattern: %r", bound_pattern)
                vars = tuple((isinstance(item, var) for item in bound_pattern))
                if vars == (False, False, False):
                    log.debug("clause: False, False, False")
                    ok = await self.exists(tr, *bound_pattern)
                    if ok:
                        # this binding is valid against this bound_pattern,
                        # proceed with this binding and continue with
                        # the next pattern.
                        next_bindings.append(binding)
                elif vars == (False, False, True):
                    # TODO: extract to a method
                    log.debug("clause: False, False, True")
                    subject, predicate, object = bound_pattern
                    predicate = self._predicates[predicate]
                    start = found.pack(
                        (self._prefix, PREFIX_SPO, subject, predicate.name))
                    end = found.strinc(start)
                    items = await tr.get_range(start, end)
                    for key, _ in items:
                        _, _, _, _, value = found.unpack(key)
                        value = predicate.pack(value)
                        new = binding.set(object.name, value)
                        next_bindings.append(new)
                elif vars == (True, False, False):
                    log.debug("clause: True, False, False")
                    subject, predicate, object = bound_pattern
                    predicate = self._predicates[predicate]
                    object = predicate.pack(object)
                    values = await self._lookup_pos_subjects(
                        tr, predicate.name, object)
                    for value in values:
                        new = binding.set(subject.name, value)
                        next_bindings.append(new)
                else:
                    raise PatternException(pattern)
            bindings = next_bindings
        return bindings