def test_pack_unpack(): value = ( (uuid4(), None, SingleFloat(3.1415), b"x42", 1, -1, 3.1415, -3.1415, ("abc",)), ("d", "e", "f"), 2.718281828459045, ) # noqa assert found.unpack(found.pack(value)) == value
async def test_query(): # prepare db = await open() async def set(tx): for number in range(10): found.set(tx, found.pack((number,)), found.pack((str(number),))) await found.transactional(db, set) async def query(tx): out = found.query(tx, found.pack((1,)), found.pack((8,))) out = await aiolist(out) return out out = await found.transactional(db, query) for (key, value), index in zip(out, range(10)[1:-1]): assert found.unpack(key)[0] == index assert found.unpack(value)[0] == str(index)
async def uuid(self, tr): uid = uuid4() start = found.pack((self._prefix, PREFIX_SPO, uid)) end = b"\xFF" items = await tr.get_range(start, end, limit=1) if not items: return uid key, _ = items[0] _, _, subject, _, _ = found.unpack(key) assert subject != uid, "Unlikely Error!" return uid
async def _lookup_pos_subjects(self, tr, predicate, object): predicate = self._predicates[predicate] object = predicate.pack(object) start = found.pack((self._prefix, PREFIX_POS, predicate.name, object)) end = found.strinc(start) items = await tr.get_range(start, end) out = list() for key, _ in items: _, _, _, _, subject = found.unpack(key) out.append(subject) return out
def _score(args): candidate, keywords, counter = args counter = dict(found.unpack(zstd.decompress(counter))) score = 0 for keyword in keywords: try: count = counter[keyword] except KeyError: return None else: score += count return (candidate, score)
async def all(self, tr): start = found.pack((self._prefix, PREFIX_SPO)) end = found.strinc(start) msg = "fetching everything between start=%r and end=%r" log.debug(msg, start, end) out = [] items = await tr.get_range(start, end) for key, _ in items: # value is always empty _, _, subject, predicate, object = found.unpack(key) predicate = self._predicates[predicate] object = predicate.unpack(object) out.append((subject, predicate.name, object)) return out
async def massage(tx, store, candidate, keywords, hits): score = 0 counter = await found.get(tx, found.pack( (store.prefix_counters, candidate))) # TODO: replace the dictionary and the following for loop with # a single iteration over the counter, using zigzag algorithm. counter = dict(found.unpack(zstd.decompress(counter))) for keyword in keywords: try: count = counter[keyword] except KeyError: return None else: score += count hits[candidate] = score
async def search(tx, store, keywords, limit=13): coroutines = (_keywords_to_token(tx, store.tokens, keyword) for keyword in keywords) keywords = await asyncio.gather(*coroutines) # If a keyword is not present in store.tokens, then there is no # document associated with it, hence there is no document that # match that keyword, hence no document that has all the requested # keywords. Return an empty counter. if any(keyword is None for keyword in keywords): return list() # Select seed token coroutines = (_token_to_size(tx, store.prefix_index, token) for token in keywords) sizes = await asyncio.gather(*coroutines) _, seed = min(zip(sizes, keywords), key=itemgetter(0)) # Select candidates candidates = [] key = found.pack((store.prefix_index, seed)) query = found.query(tx, key, found.next_prefix(key)) async for key, _ in query: _, _, uid = found.unpack(key) candidates.append(uid) # XXX: 500 was empirically discovered, to make it so that the # search takes less than 1 second or so. if len(candidates) >= FOUND_PSTORE_SAMPLE_COUNT: candidates = random.sample(candidates, FOUND_PSTORE_SAMPLE_COUNT) # score, filter and construct hits aka. massage hits = Counter() coroutines = (massage(tx, store, c, keywords, hits) for c in candidates) await asyncio.gather(*coroutines) out = hits.most_common(limit) return out
async def where(self, tr, pattern, *patterns): # seed bindings vars = tuple((isinstance(item, var) for item in pattern)) if vars == (True, False, False): subject, predicate, object = pattern subjects = await self._lookup_pos_subjects(tr, predicate, object) name = subject.name bindings = [Map().set(name, subject) for subject in subjects] elif vars == (False, True, True): # TODO: extract to a method subject = pattern[0] start = found.pack((self._prefix, PREFIX_SPO, subject)) end = found.strinc(start) items = await tr.get_range(start, end) bindings = [] for key, _ in items: _, _, _, predicate, object = found.unpack(key) predicate = self._predicates[predicate] object = predicate.unpack(object) binding = Map() binding = binding.set(pattern[1].name, predicate.name) binding = binding.set(pattern[2].name, object) bindings.append(binding) elif vars == (False, False, True): # TODO: extract to a method subject = pattern[0] predicate = pattern[1] start = found.pack((self._prefix, PREFIX_SPO, subject, predicate)) end = found.strinc(start) items = await tr.get_range(start, end) bindings = [] for key, _ in items: _, _, _, _, object = found.unpack(key) predicate = self._predicates[predicate] object = predicate.unpack(object) binding = Map() binding = binding.set(pattern[2].name, object) bindings.append(binding) else: raise PatternException(pattern) log.debug("seed bindings: %r", bindings) # contine matching other patterns, if any. for pattern in patterns: # one log.debug("matching pattern: %r", pattern) next_bindings = [] for binding in bindings: # two bound_pattern = pattern_bind(pattern, binding) log.debug("bound pattern: %r", bound_pattern) vars = tuple((isinstance(item, var) for item in bound_pattern)) if vars == (False, False, False): log.debug("clause: False, False, False") ok = await self.exists(tr, *bound_pattern) if ok: # this binding is valid against this bound_pattern, # proceed with this binding and continue with # the next pattern. next_bindings.append(binding) elif vars == (False, False, True): # TODO: extract to a method log.debug("clause: False, False, True") subject, predicate, object = bound_pattern predicate = self._predicates[predicate] start = found.pack( (self._prefix, PREFIX_SPO, subject, predicate.name)) end = found.strinc(start) items = await tr.get_range(start, end) for key, _ in items: _, _, _, _, value = found.unpack(key) value = predicate.pack(value) new = binding.set(object.name, value) next_bindings.append(new) elif vars == (True, False, False): log.debug("clause: True, False, False") subject, predicate, object = bound_pattern predicate = self._predicates[predicate] object = predicate.pack(object) values = await self._lookup_pos_subjects( tr, predicate.name, object) for value in values: new = binding.set(subject.name, value) next_bindings.append(new) else: raise PatternException(pattern) bindings = next_bindings return bindings