def reset(args): if args.force or input('Type "yes" to delete ALL data: ') == 'yes': DB.flushdb() DS.flushdb() print('All data has been deleted.') else: print('Nothing has been deleted.')
def only_commons(helper): if len(helper.tokens) == len(helper.common): # Only common terms, shortcut to search keys = [t.db_key for t in helper.tokens] if helper.geohash_key: keys.append(helper.geohash_key) helper.debug('Adding geohash %s', helper.geohash_key) if len(keys) == 1 or helper.geohash_key: helper.add_to_bucket(keys) if helper.bucket_dry and len(keys) > 1: count = 0 # Scan the less frequent token. helper.tokens.sort(key=lambda t: t.frequency) first = helper.tokens[0] if first.frequency < config.INTERSECT_LIMIT: helper.debug('Under INTERSECT_LIMIT, brut force.') keys = [t.db_key for t in helper.tokens] helper.add_to_bucket(keys) else: helper.debug('INTERSECT_LIMIT hit, manual scan on %s', first) others = [t.db_key for t in helper.tokens[1:]] ids = DB.zrevrange(first.db_key, 0, 500) for id_ in ids: count += 1 if all(DB.sismember(f, id_) for f in helper.filters) \ and all(DB.zrank(k, id_) for k in others): helper.bucket.add(id_) if helper.bucket_full: break helper.debug('%s results after scan (%s loops)', len(helper.bucket), count)
def test_index_edge_ngrams(): before = count_keys() index_edge_ngrams(DB, 'street') after = count_keys() assert after - before == 3 assert DB.smembers('n|str') == set([b'street']) assert DB.smembers('n|stre') == set([b'street']) assert DB.smembers('n|stree') == set([b'street'])
def test_force_reset(factory): class Args: force = True factory(name="rue de l'avoine") assert DB.keys() reset(Args()) assert not DB.keys()
def test_index_document_with_skip_digit_false(config): from addok.helpers.index import _CACHE _CACHE.clear() # Do this in addok.pytest teardown? config.TRIGRAM_SKIP_DIGIT = False index_document(DOC.copy()) assert DB.exists('w|123') assert DB.exists('w|234') assert DB.exists('w|345') assert len(DB.keys()) == 17
def test_allow_list_values(): doc = { 'id': 'xxxx', 'type': 'street', 'name': ['Vernou-la-Celle-sur-Seine', 'Vernou'], 'city': 'Paris', 'lat': '49.32545', 'lon': '4.2565' } index_document(doc) assert DB.zscore('w|vernou', 'd|xxxx') == 4 assert DB.zscore('w|celle', 'd|xxxx') == 4 / 5
def test_reset(factory, monkeypatch): class Args: force = False factory(name="rue de l'avoine") assert DB.keys() monkeypatch.setitem(__builtins__, 'input', lambda *args, **kwargs: 'no') reset(Args()) assert DB.keys() monkeypatch.setitem(__builtins__, 'input', lambda *args, **kwargs: 'yes') reset(Args()) assert not DB.keys()
def test_deindex_document_should_deindex_list_values(): doc = { 'id': 'xxxx', 'type': 'street', 'name': ['Vernou-la-Celle-sur-Seine', 'Vernou'], 'city': 'Paris', 'lat': '49.32545', 'lon': '4.2565' } index_document(doc) deindex_document(doc['id']) assert not ds._DB.exists('d|xxxx') assert not DB.exists('w|vernou') assert not DB.exists('w|celle') assert len(DB.keys()) == 0
def test_deindex_document_should_deindex_list_values(): doc = { 'id': 'xxxx', 'type': 'street', 'name': ['Vernou-la-Celle-sur-Seine', 'Vernou'], 'city': 'Paris', 'lat': '49.32545', 'lon': '4.2565' } index_document(doc) deindex_document(doc['id']) assert not DB.exists('d|xxxx') assert not DB.exists('w|vernou') assert not DB.exists('w|celle') assert len(DB.keys()) == 0
def only_commons(helper): if len(helper.tokens) == len(helper.common): # Only common terms, shortcut to search keys = [t.db_key for t in helper.tokens] if helper.geohash_key: keys.append(helper.geohash_key) helper.debug('Adding geohash %s', helper.geohash_key) if len(keys) == 1 or helper.geohash_key: helper.add_to_bucket(keys) if helper.bucket_dry and len(keys) > 1: # Scan the less frequent token. helper.tokens.sort(key=lambda t: t.frequency) keys = [t.db_key for t in helper.tokens] first = helper.tokens[0] if first.frequency < config.INTERSECT_LIMIT: helper.debug('Under INTERSECT_LIMIT, force intersect.') helper.add_to_bucket(keys) else: helper.debug('INTERSECT_LIMIT hit, manual scan') if helper.filters: # Always consider filters when doing manual intersect. keys = keys + helper.filters # But, hey, can we brute force again? if any( DB.scard(k) < config.INTERSECT_LIMIT for k in helper.filters): helper.debug('Filters under INTERSECT_LIMIT, force') helper.add_to_bucket(keys) return helper.debug('manual scan on "%s"', first) ids = scripts.manual_scan(keys=keys, args=[helper.min]) helper.bucket.update(ids) helper.debug('%s results after scan', len(helper.bucket))
def test_index_document_without_explicit_id(): doc = DOC.copy() del doc['_id'] index_document(doc) assert ds._DB.exists('d|jR') assert ds._DB.type('d|jR') == b'string' assert DB.exists('w|rue')
def test_should_be_possible_to_override_boost_with_callable(config): config.FIELDS = [ {'key': 'name', 'boost': lambda doc: 5}, {'key': 'city'}, ] doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'name': 'Lilas', 'city': 'Cergy' } index_document(doc) assert DB.exists('d|xxxx') assert DB.zscore('w|lilas', 'd|xxxx') == 5 assert DB.zscore('w|cergy', 'd|xxxx') == 1
def deindex_document(id_, **kwargs): key = document_key(id_) doc = DB.hgetall(key) if not doc: return tokens = [] for indexer in config.DEINDEXERS: indexer(DB, key, doc, tokens, **kwargs)
def deindex_document(id_, **kwargs): key = keys.document_key(id_) doc = DB.hgetall(key) if not doc: return tokens = [] for indexer in config.DEINDEXERS: indexer(DB, key, doc, tokens, **kwargs)
def pair(word): """See all token associated with a given token. PAIR lilas""" word = list(preprocess_query(word))[0] key = pair_key(word) tokens = [t.decode() for t in DB.smembers(key)] tokens.sort() print(white(tokens)) print(magenta('(Total: {})'.format(len(tokens))))
def test_should_be_possible_to_define_fields_from_config(config): config.FIELDS = [ {'key': 'custom'}, {'key': 'special'}, ] doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'custom': 'rue', 'special': 'Lilas', 'thisone': 'is not indexed', } index_document(doc) assert DB.exists('d|xxxx') assert DB.exists('w|lilas') assert DB.exists('w|rue') assert not DB.exists('w|indexed')
def pair(cmd, word): """See all token associated with a given token. PAIR lilas""" word = list(preprocess_query(word))[0] key = pair_key(word) tokens = [t.decode() for t in DB.smembers(key)] tokens.sort() print(white(tokens)) print(magenta('(Total: {})'.format(len(tokens))))
def test_null_value_should_not_be_index(config): doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'name': 'Port-Cergy', 'city': '' } index_document(doc) assert 'city' not in DB.hgetall('d|xxxx')
def test_field_with_only_non_alphanumeric_chars_is_not_indexed(): doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'name': 'Lilas', 'city': '//' } index_document(doc) assert 'city' not in DB.hgetall('d|xxxx')
def _compute_onetomany_relations(tokens): relations = defaultdict(list) for token in tokens: for other in tokens: if other == token: continue if (token in relations[other] or DB.sismember(pair_key(token), other)): relations[token].append(other) return relations
def index_document(doc, **kwargs): key = keys.document_key(doc['id']) pipe = DB.pipeline() tokens = {} for indexer in config.INDEXERS: try: indexer(pipe, key, doc, tokens, **kwargs) except ValueError as e: print(e) return # Do not index. pipe.execute()
def index_document(doc, **kwargs): key = document_key(doc['id']) pipe = DB.pipeline() tokens = {} for indexer in config.INDEXERS: try: indexer(pipe, key, doc, tokens, **kwargs) except ValueError as e: print(e) return # Do not index. pipe.execute()
def test_create_edge_ngrams(config): config.MIN_EDGE_NGRAMS = 2 doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'name': '28 Lilas', # 28 should not appear in ngrams 'city': 'Paris' } index_document(doc, update_ngrams=False) assert not DB.exists('n|li') assert not DB.exists('n|lil') assert not DB.exists('n|pa') assert not DB.exists('n|par') create_edge_ngrams() assert DB.exists('n|li') assert DB.exists('n|lil') assert DB.exists('n|pa') assert DB.exists('n|par') assert not DB.exists('n|28') assert len(DB.keys()) == 12
def do_fuzzyindex(self, word): """Compute fuzzy extensions of word that exist in index. FUZZYINDEX lilas""" word = list(preprocess_query(word))[0] token = Token(word) token.make_fuzzy() neighbors = [(n, DB.zcard(dbkeys.token_key(n))) for n in token.neighbors] neighbors.sort(key=lambda n: n[1], reverse=True) for token, freq in neighbors: if freq == 0: break print(white(token), blue(freq))
def test_should_be_possible_to_override_boost_with_callable(config): config.FIELDS = [ { 'key': 'name', 'boost': lambda doc: 5 }, { 'key': 'city' }, ] doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'name': 'Lilas', 'city': 'Cergy' } index_document(doc) assert ds._DB.exists('d|xxxx') assert DB.zscore('w|lilas', 'd|xxxx') == 5 assert DB.zscore('w|cergy', 'd|xxxx') == 1
def do_fuzzyindex(self, word): """Compute fuzzy extensions of word that exist in index. FUZZYINDEX lilas""" word = list(preprocess_query(word))[0] token = Token(word) neighbors = make_fuzzy(token) neighbors = [(n, DB.zcard(dbkeys.token_key(n))) for n in neighbors] neighbors.sort(key=lambda n: n[1], reverse=True) for token, freq in neighbors: if freq == 0: break print(white(token), blue(freq))
def test_should_be_possible_to_override_boost_from_config(config): config.FIELDS = [ { 'key': 'name', 'boost': 5 }, { 'key': 'city' }, ] doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'name': 'Lilas', 'city': 'Cergy' } index_document(doc) assert DB.exists('d|xxxx') assert DB.zscore('w|lila', 'd|xxxx') == 5 assert DB.zscore('w|serji', 'd|xxxx') == 1
def test_index_housenumber_uses_housenumber_preprocessors(config): doc = { "id": "xxxx", "type": "street", "name": "rue des Lilas", "city": "Paris", "lat": "49.32545", "lon": "4.2565", "housenumbers": {"1 bis": {"lat": "48.325451", "lon": "2.25651"}}, } index_document(doc) index = DB.hgetall("d|xxxx") assert index[b"h|1b"] == b"1 bis|48.325451|2.25651"
def test_should_be_possible_to_define_fields_from_config(config): config.FIELDS = [ { 'key': 'custom' }, { 'key': 'special' }, ] doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'custom': 'rue', 'special': 'Lilas', 'thisone': 'is not indexed', } index_document(doc) assert ds._DB.exists('d|xxxx') assert DB.exists('w|lilas') assert DB.exists('w|rue') assert not DB.exists('w|indexed')
def test_doc_with_null_value_should_not_be_index_if_not_allowed(config): config.FIELDS = [ {'key': 'name', 'null': False}, {'key': 'city'}, ] doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'name': '', 'city': 'Cergy' } index_document(doc) assert not DB.exists('d|xxxx')
def index_ngram_keys(*keys): pipe = DB.pipeline(transaction=False) for key in keys: key = key.decode() _, token = key.split('|') if token.isdigit(): continue index_edge_ngrams(pipe, token) try: pipe.execute() except redis.RedisError as e: msg = 'Error while generating ngrams:\n{}'.format(str(e)) raise ValueError(msg) return keys
def index_document(doc, **kwargs): key = keys.document_key(doc['id']) pipe = DB.pipeline() tokens = {} for indexer in config.INDEXERS: try: indexer(pipe, key, doc, tokens, **kwargs) except ValueError as e: print(e) return # Do not index. try: pipe.execute() except redis.RedisError as e: msg = 'Error while importing document:\n{}\n{}'.format(doc, str(e)) raise ValueError(msg)
def create_edge_ngrams(): start = time.time() pool = Pool() count = 0 chunk = [] for key in DB.scan_iter(match='w|*'): count += 1 chunk.append(key) if count % 10000 == 0: pool.map(index_ngram_key, chunk) print("Done", count, time.time() - start) chunk = [] if chunk: pool.map(index_ngram_key, chunk) pool.close() pool.join() print('Done', count, 'in', time.time() - start)
def create_edge_ngrams(*args): start = time.time() pool = Pool() count = 0 chunk = [] for key in DB.scan_iter(match='w|*'): count += 1 chunk.append(key) if count % 10000 == 0: pool.map(index_ngram_key, chunk) print("Done", count, time.time() - start) chunk = [] if chunk: pool.map(index_ngram_key, chunk) pool.close() pool.join() print('Done', count, 'in', time.time() - start)
def index_documents(docs): pipe = DB.pipeline(transaction=False) for doc in docs: if not doc: continue if doc.get('_action') in ['delete', 'update']: key = keys.document_key(doc['_id']).encode() known_doc = get_document(key) if known_doc: deindex_document(known_doc) if doc.get('_action') in ['index', 'update', None]: index_document(pipe, doc) yield doc try: pipe.execute() except redis.RedisError as e: msg = 'Error while importing document:\n{}\n{}'.format(doc, str(e)) raise ValueError(msg)
def store_documents(docs): to_upsert = [] to_remove = [] for doc in docs: if not doc: continue if '_id' not in doc: doc['_id'] = DB.next_id() key = keys.document_key(doc['_id']) if doc.get('_action') in ['delete', 'update']: to_remove.append(key) if doc.get('_action') in ['index', 'update', None]: to_upsert.append((key, config.DOCUMENT_SERIALIZER.dumps(doc))) yield doc if to_remove: DS.remove(*to_remove) if to_upsert: DS.upsert(*to_upsert)
def test_index_housenumber_uses_housenumber_preprocessors(): # By default it glues ordinal to number doc = { 'id': 'xxxx', 'type': 'street', 'name': 'rue des Lilas', 'city': 'Paris', 'lat': '49.32545', 'lon': '4.2565', 'housenumbers': { '1 bis': { 'lat': '48.325451', 'lon': '2.25651' } } } index_document(doc) index = DB.hgetall('d|xxxx') assert index[b'h|1b'] == b'1 bis|48.325451|2.25651'
def autocomplete(helper, tokens, skip_commons=False, use_geohash=False): helper.debug('Autocompleting %s', helper.last_token) keys = [t.db_key for t in tokens if not t.is_last] pair_keys = [pair_key(t) for t in tokens if not t.is_last] key = edge_ngram_key(helper.last_token) autocomplete_tokens = DB.sinter(pair_keys + [key]) helper.debug('Found tokens to autocomplete %s', autocomplete_tokens) for token in autocomplete_tokens: key = dbkeys.token_key(token.decode()) if skip_commons\ and token_key_frequency(key) > config.COMMON_THRESHOLD: helper.debug('Skip common token to autocomplete %s', key) continue if not helper.bucket_overflow or helper.last_token in helper.not_found: helper.debug('Trying to extend bucket. Autocomplete %s', key) extra_keys = [key] if use_geohash and helper.geohash_key: extra_keys.append(helper.geohash_key) helper.add_to_bucket(keys + extra_keys)
def test_doc_with_null_value_should_not_be_index_if_not_allowed(config): config.FIELDS = [ { 'key': 'name', 'null': False }, { 'key': 'city' }, ] doc = { 'id': 'xxxx', 'lat': '49.32545', 'lon': '4.2565', 'name': '', 'city': 'Cergy' } index_document(doc) assert not DB.exists('w|cergy')
def autocomplete(helper, tokens, skip_commons=False, use_geohash=False): helper.debug('Autocompleting %s', helper.last_token) # helper.last_token.autocomplete() keys = [t.db_key for t in tokens if not t.is_last] pair_keys = [pair_key(t) for t in tokens if not t.is_last] key = edge_ngram_key(helper.last_token) autocomplete_tokens = DB.sinter(pair_keys + [key]) helper.debug('Found tokens to autocomplete %s', autocomplete_tokens) for token in autocomplete_tokens: key = dbkeys.token_key(token.decode()) if skip_commons\ and token_key_frequency(key) > config.COMMON_THRESHOLD: helper.debug('Skip common token to autocomplete %s', key) continue if not helper.bucket_overflow or helper.last_token in helper.not_found: helper.debug('Trying to extend bucket. Autocomplete %s', key) extra_keys = [key] if use_geohash and helper.geohash_key: extra_keys.append(helper.geohash_key) helper.add_to_bucket(keys + extra_keys)
def try_fuzzy(helper, tokens, include_common=True): if not helper.bucket_dry or not tokens: return helper.debug('Fuzzy on. Trying with %s.', tokens) tokens.sort(key=lambda t: len(t), reverse=True) allkeys = helper.keys[:] if include_common: # As we are in fuzzy, try to narrow as much as possible by adding # unused common tokens. allkeys.extend( [t.db_key for t in helper.common if t.db_key not in helper.keys]) for try_one in tokens: if helper.bucket_full: break keys = allkeys[:] if try_one.db_key in keys: keys.remove(try_one.db_key) if try_one.isdigit(): continue helper.debug('Going fuzzy with %s and %s', try_one, keys) neighbors = make_fuzzy(try_one, max=helper.fuzzy) if len(keys): # Only retain tokens that have been seen in the index at least # once with the other tokens. DB.sadd(helper.pid, *neighbors) interkeys = [pair_key(k[2:]) for k in keys] interkeys.append(helper.pid) fuzzy_words = DB.sinter(interkeys) DB.delete(helper.pid) # Keep the priority we gave in building fuzzy terms (inversion # first, then substitution, etc.). fuzzy_words = [w.decode() for w in fuzzy_words] fuzzy_words.sort(key=lambda x: neighbors.index(x)) else: # The token we are considering is alone. fuzzy_words = [] for neighbor in neighbors: key = dbkeys.token_key(neighbor) count = DB.zcard(key) if count: fuzzy_words.append(neighbor) if fuzzy_words: helper.debug('Found fuzzy candidates %s', fuzzy_words) fuzzy_keys = [dbkeys.token_key(w) for w in fuzzy_words] for key in fuzzy_keys: if helper.bucket_dry: helper.add_to_bucket(keys + [key])
def test_index_should_join_housenumbers_payload_fields(config): config.HOUSENUMBERS_PAYLOAD_FIELDS = ['key', 'one'] doc = { 'id': 'xxxx', 'type': 'street', 'name': 'rue des Lilas', 'city': 'Paris', 'lat': '49.32545', 'lon': '4.2565', 'housenumbers': { '1 bis': { 'lat': '48.325451', 'lon': '2.25651', 'key': 'myvalue', 'thisone': 'no', 'one': 'two', } } } index_document(doc) index = DB.hgetall('d|xxxx') assert index[b'h|1bis'] == b'1 bis|48.325451|2.25651|myvalue|two'
def try_fuzzy(helper, tokens, include_common=True): if not helper.bucket_dry or not tokens: return helper.debug('Fuzzy on. Trying with %s.', tokens) tokens.sort(key=lambda t: len(t), reverse=True) allkeys = helper.keys[:] if include_common: # As we are in fuzzy, try to narrow as much as possible by adding # unused commons tokens. common = [t for t in helper.common if t.db_key not in helper.keys] allkeys.extend([t.db_key for t in common]) for try_one in tokens: if helper.bucket_full: break keys = allkeys[:] if try_one.db_key in keys: keys.remove(try_one.db_key) if try_one.isdigit(): continue helper.debug('Going fuzzy with %s', try_one) neighbors = make_fuzzy(try_one, max=helper.fuzzy) if len(keys): # Only retains tokens that have been seen in the index at least # once with the other tokens. DB.sadd(helper.query, *neighbors) interkeys = [pair_key(k[2:]) for k in keys] interkeys.append(helper.query) fuzzy_words = DB.sinter(interkeys) DB.delete(helper.query) # Keep the priority we gave in building fuzzy terms (inversion # first, then substitution, etc.). fuzzy_words = [w.decode() for w in fuzzy_words] fuzzy_words.sort(key=lambda x: neighbors.index(x)) else: # The token we are considering is alone. fuzzy_words = [] for neighbor in neighbors: key = dbkeys.token_key(neighbor) count = DB.zcard(key) if count: fuzzy_words.append(neighbor) helper.debug('Found fuzzy candidates %s', fuzzy_words) fuzzy_keys = [dbkeys.token_key(w) for w in fuzzy_words] for key in fuzzy_keys: if helper.bucket_dry: helper.add_to_bucket(keys + [key])
def pytest_runtest_teardown(item, nextitem): from addok.db import DB assert DB.connection_pool.connection_kwargs['db'] == 15 DB.flushdb()
def edge_ngram_deindexer(db, key, doc, tokens, **kwargs): if config.INDEX_EDGE_NGRAMS: for token in tokens: tkey = dbkeys.token_key(token) if not DB.exists(tkey): deindex_edge_ngrams(token)
def test_index_document(): index_document(DOC.copy()) assert DB.exists('d|xxxx') assert DB.type('d|xxxx') == b'hash' assert DB.exists('w|rue') assert b'd|xxxx' in DB.zrange('w|rue', 0, -1) assert DB.exists('w|des') assert DB.exists('w|lilas') assert DB.exists('w|andresy') assert DB.exists('w|un') # Housenumber. assert DB.exists('p|rue') assert DB.exists('p|des') assert DB.exists('p|lilas') assert DB.exists('p|andresy') assert b'lilas' in DB.smembers('p|andresy') assert b'andresy' in DB.smembers('p|lilas') assert DB.exists('p|un') assert DB.exists('g|u09dgm7') assert b'd|xxxx' in DB.smembers('g|u09dgm7') assert DB.exists('n|lil') assert DB.exists('n|lila') assert DB.exists('n|and') assert b'andresy' in DB.smembers('n|and') assert DB.exists('n|andr') assert b'andresy' in DB.smembers('n|andr') assert DB.exists('n|andre') assert b'andresy' in DB.smembers('n|andre') assert DB.exists('n|andres') assert b'andresy' in DB.smembers('n|andres') assert b'lilas' in DB.smembers('n|lil') assert DB.exists('f|type|street') assert b'd|xxxx' in DB.smembers('f|type|street') assert DB.exists('f|type|housenumber') assert b'd|xxxx' in DB.smembers('f|type|housenumber') assert len(DB.keys()) == 20
def count_keys(): """Helper method to return the number of keys in the test database.""" try: return DB.info()['db15']['keys'] except KeyError: return 0
def test_deindex_document_should_deindex(): index_document(DOC.copy()) deindex_document(DOC['id']) assert not DB.exists('d|xxxx') assert not DB.exists('w|de') assert not DB.exists('w|lilas') assert not DB.exists('w|un') # Housenumber. assert not DB.exists('p|rue') assert not DB.exists('p|des') assert not DB.exists('p|lilas') assert not DB.exists('p|un') assert not DB.exists('g|u09dgm7') assert not DB.exists('n|lil') assert not DB.exists('n|and') assert not DB.exists('n|andr') assert not DB.exists('n|andre') assert not DB.exists('n|andres') assert not DB.exists('f|type|street') assert len(DB.keys()) == 0
def test_deindex_document_should_not_affect_other_docs(): DOC2 = { 'id': 'xxxx2', 'type': 'street', 'name': 'rue des Lilas', 'city': 'Paris', 'lat': '49.32545', 'lon': '4.2565', 'housenumbers': { '1': { 'lat': '48.325451', # Same geohash as DOC. 'lon': '2.25651' } } } index_document(DOC.copy()) index_document(DOC2) deindex_document(DOC['id']) assert not DB.exists('d|xxxx') assert b'd|xxxx' not in DB.zrange('w|rue', 0, -1) assert b'd|xxxx' not in DB.zrange('w|des', 0, -1) assert b'd|xxxx' not in DB.zrange('w|lilas', 0, -1) assert b'd|xxxx' not in DB.zrange('w|un', 0, -1) assert DB.exists('g|u09dgm7') assert b'd|xxxx' not in DB.smembers('g|u09dgm7') assert DB.exists('w|des') assert DB.exists('w|lilas') assert DB.exists('w|un') # Housenumber. assert DB.exists('p|rue') assert b'd|xxxx2' in DB.zrange('w|rue', 0, -1) assert b'd|xxxx2' in DB.zrange('w|des', 0, -1) assert b'd|xxxx2' in DB.zrange('w|lilas', 0, -1) assert b'd|xxxx2' in DB.zrange('w|un', 0, -1) assert b'd|xxxx2' in DB.smembers('g|u09dgm7') assert b'd|xxxx2' in DB.smembers('g|u0g08g7') assert DB.exists('p|des') assert DB.exists('p|lilas') assert DB.exists('p|un') assert not DB.exists('n|and') assert not DB.exists('n|andr') assert not DB.exists('n|andre') assert not DB.exists('n|andres') assert DB.exists('n|par') assert DB.exists('n|pari') assert DB.exists('n|lil') assert DB.exists('n|lila') assert b'lilas' in DB.smembers('n|lil') assert b'lilas' in DB.smembers('n|lila') assert DB.exists('f|type|street') assert b'd|xxxx2' in DB.smembers('f|type|street') assert DB.exists('f|type|housenumber') assert b'd|xxxx2' in DB.smembers('f|type|housenumber') assert len(DB.keys()) == 19
def token_key_frequency(key): return DB.zcard(key)
def deindex_token(key, token): tkey = keys.token_key(token) DB.zrem(tkey, key)
def deindex_geohash(key, lat, lon): lat = float(lat) lon = float(lon) geoh = geohash.encode(lat, lon, config.GEOHASH_PRECISION) geok = keys.geohash_key(geoh) DB.srem(geok, key)
def search(self): if DB.exists(self.key): self.db_key = self.key