Ejemplo n.º 1
0
def test_index_document_with_skip_digit_false(config):
    from addok.helpers.index import _CACHE
    _CACHE.clear()  # Do this in addok.pytest teardown?
    config.TRIGRAM_SKIP_DIGIT = False
    index_document(DOC.copy())
    assert DB.exists('w|123')
    assert DB.exists('w|234')
    assert DB.exists('w|345')
    assert len(DB.keys()) == 17
Ejemplo n.º 2
0
def test_deindex_document_should_deindex_list_values():
    doc = {
        'id': 'xxxx',
        'type': 'street',
        'name': ['Vernou-la-Celle-sur-Seine', 'Vernou'],
        'city': 'Paris',
        'lat': '49.32545',
        'lon': '4.2565'
    }
    index_document(doc)
    deindex_document(doc['id'])
    assert not DB.exists('d|xxxx')
    assert not DB.exists('w|vernou')
    assert not DB.exists('w|celle')
    assert len(DB.keys()) == 0
Ejemplo n.º 3
0
def test_deindex_document_should_deindex_list_values():
    doc = {
        'id': 'xxxx',
        'type': 'street',
        'name': ['Vernou-la-Celle-sur-Seine', 'Vernou'],
        'city': 'Paris',
        'lat': '49.32545',
        'lon': '4.2565'
    }
    index_document(doc)
    deindex_document(doc['id'])
    assert not ds._DB.exists('d|xxxx')
    assert not DB.exists('w|vernou')
    assert not DB.exists('w|celle')
    assert len(DB.keys()) == 0
Ejemplo n.º 4
0
def test_index_document_without_explicit_id():
    doc = DOC.copy()
    del doc['_id']
    index_document(doc)
    assert ds._DB.exists('d|jR')
    assert ds._DB.type('d|jR') == b'string'
    assert DB.exists('w|rue')
Ejemplo n.º 5
0
def test_should_be_possible_to_define_fields_from_config(config):
    config.FIELDS = [
        {'key': 'custom'},
        {'key': 'special'},
    ]
    doc = {
        'id': 'xxxx',
        'lat': '49.32545',
        'lon': '4.2565',
        'custom': 'rue',
        'special': 'Lilas',
        'thisone': 'is not indexed',
    }
    index_document(doc)
    assert DB.exists('d|xxxx')
    assert DB.exists('w|lilas')
    assert DB.exists('w|rue')
    assert not DB.exists('w|indexed')
Ejemplo n.º 6
0
def test_should_be_possible_to_define_fields_from_config(config):
    config.FIELDS = [
        {
            'key': 'custom'
        },
        {
            'key': 'special'
        },
    ]
    doc = {
        'id': 'xxxx',
        'lat': '49.32545',
        'lon': '4.2565',
        'custom': 'rue',
        'special': 'Lilas',
        'thisone': 'is not indexed',
    }
    index_document(doc)
    assert ds._DB.exists('d|xxxx')
    assert DB.exists('w|lilas')
    assert DB.exists('w|rue')
    assert not DB.exists('w|indexed')
Ejemplo n.º 7
0
def test_doc_with_null_value_should_not_be_index_if_not_allowed(config):
    config.FIELDS = [
        {'key': 'name', 'null': False},
        {'key': 'city'},
    ]
    doc = {
        'id': 'xxxx',
        'lat': '49.32545',
        'lon': '4.2565',
        'name': '',
        'city': 'Cergy'
    }
    index_document(doc)
    assert not DB.exists('d|xxxx')
Ejemplo n.º 8
0
def test_create_edge_ngrams(config):
    config.MIN_EDGE_NGRAMS = 2
    doc = {
        'id': 'xxxx',
        'lat': '49.32545',
        'lon': '4.2565',
        'name': '28 Lilas',  # 28 should not appear in ngrams
        'city': 'Paris'
    }
    index_document(doc, update_ngrams=False)
    assert not DB.exists('n|li')
    assert not DB.exists('n|lil')
    assert not DB.exists('n|pa')
    assert not DB.exists('n|par')
    create_edge_ngrams()
    assert DB.exists('n|li')
    assert DB.exists('n|lil')
    assert DB.exists('n|pa')
    assert DB.exists('n|par')
    assert not DB.exists('n|28')
    assert len(DB.keys()) == 12
Ejemplo n.º 9
0
def test_create_edge_ngrams(config):
    config.MIN_EDGE_NGRAMS = 2
    doc = {
        'id': 'xxxx',
        'lat': '49.32545',
        'lon': '4.2565',
        'name': '28 Lilas',  # 28 should not appear in ngrams
        'city': 'Paris'
    }
    index_document(doc, update_ngrams=False)
    assert not DB.exists('n|li')
    assert not DB.exists('n|lil')
    assert not DB.exists('n|pa')
    assert not DB.exists('n|par')
    create_edge_ngrams()
    assert DB.exists('n|li')
    assert DB.exists('n|lil')
    assert DB.exists('n|pa')
    assert DB.exists('n|par')
    assert not DB.exists('n|28')
    assert len(DB.keys()) == 12
Ejemplo n.º 10
0
def test_should_be_possible_to_override_boost_with_callable(config):
    config.FIELDS = [
        {'key': 'name', 'boost': lambda doc: 5},
        {'key': 'city'},
    ]
    doc = {
        'id': 'xxxx',
        'lat': '49.32545',
        'lon': '4.2565',
        'name': 'Lilas',
        'city': 'Cergy'
    }
    index_document(doc)
    assert DB.exists('d|xxxx')
    assert DB.zscore('w|lilas', 'd|xxxx') == 5
    assert DB.zscore('w|cergy', 'd|xxxx') == 1
Ejemplo n.º 11
0
def test_doc_with_null_value_should_not_be_index_if_not_allowed(config):
    config.FIELDS = [
        {
            'key': 'name',
            'null': False
        },
        {
            'key': 'city'
        },
    ]
    doc = {
        'id': 'xxxx',
        'lat': '49.32545',
        'lon': '4.2565',
        'name': '',
        'city': 'Cergy'
    }
    index_document(doc)
    assert not DB.exists('w|cergy')
Ejemplo n.º 12
0
def test_deindex_document_should_not_affect_other_docs():
    DOC2 = {
        'id': 'xxxx2',
        'type': 'street',
        'name': 'rue des Lilas',
        'city': 'Paris',
        'lat': '49.32545',
        'lon': '4.2565',
        'housenumbers': {
            '1': {
                'lat': '48.325451',  # Same geohash as DOC.
                'lon': '2.25651'
            }
        }
    }
    index_document(DOC.copy())
    index_document(DOC2)
    deindex_document(DOC['id'])
    assert not DB.exists('d|xxxx')
    assert DB.exists('w|rue')
    assert DB.exists('w|des')
    assert DB.exists('w|lil')
    assert DB.exists('w|un')  # Housenumber.
    assert b'd|xxxx' not in DB.zrange('w|rue', 0, -1)
    assert b'd|xxxx' not in DB.zrange('w|des', 0, -1)
    assert b'd|xxxx' not in DB.zrange('w|lil', 0, -1)
    assert b'd|xxxx' not in DB.zrange('w|un', 0, -1)
    assert DB.exists('g|u09dgm7')
    assert b'd|xxxx' not in DB.smembers('g|u09dgm7')
    assert b'd|xxxx2' in DB.zrange('w|rue', 0, -1)
    assert b'd|xxxx2' in DB.zrange('w|des', 0, -1)
    assert b'd|xxxx2' in DB.zrange('w|lil', 0, -1)
    assert b'd|xxxx2' in DB.zrange('w|un', 0, -1)
    assert b'd|xxxx2' in DB.smembers('g|u09dgm7')
    assert b'd|xxxx2' in DB.smembers('g|u0g08g7')
    assert DB.exists('f|type|street')
    assert b'd|xxxx2' in DB.smembers('f|type|street')
    assert DB.exists('f|type|housenumber')
    assert b'd|xxxx2' in DB.smembers('f|type|housenumber')
    assert len(DB.keys()) == 14
Ejemplo n.º 13
0
def test_should_be_possible_to_override_boost_with_callable(config):
    config.FIELDS = [
        {
            'key': 'name',
            'boost': lambda doc: 5
        },
        {
            'key': 'city'
        },
    ]
    doc = {
        'id': 'xxxx',
        'lat': '49.32545',
        'lon': '4.2565',
        'name': 'Lilas',
        'city': 'Cergy'
    }
    index_document(doc)
    assert DB.exists('d|xxxx')
    assert DB.zscore('w|lilas', 'd|xxxx') == 5
    assert DB.zscore('w|cergy', 'd|xxxx') == 1
Ejemplo n.º 14
0
def test_deindex_document_should_not_affect_other_docs():
    DOC2 = {
        'id': 'xxxx2',
        '_id': 'xxxx2',
        'type': 'street',
        'name': 'rue des Lilas',
        'city': 'Paris',
        'lat': '49.32545',
        'lon': '4.2565',
        'housenumbers': {
            '1': {
                'lat': '48.325451',  # Same geohash as DOC.
                'lon': '2.25651'
            }
        }
    }
    index_document(DOC.copy())
    index_document(DOC2)
    deindex_document(DOC['id'])
    assert not ds._DB.exists('d|xxxx')
    assert DB.exists('w|rue')
    assert DB.exists('w|des')
    assert DB.exists('w|lil')
    assert b'd|xxxx' not in DB.zrange('w|rue', 0, -1)
    assert b'd|xxxx' not in DB.zrange('w|des', 0, -1)
    assert b'd|xxxx' not in DB.zrange('w|lil', 0, -1)
    assert b'd|xxxx' not in DB.zrange('w|un', 0, -1)
    assert DB.exists('g|u09dgm7')
    assert b'd|xxxx' not in DB.smembers('g|u09dgm7')
    assert b'd|xxxx2' in DB.zrange('w|rue', 0, -1)
    assert b'd|xxxx2' in DB.zrange('w|des', 0, -1)
    assert b'd|xxxx2' in DB.zrange('w|lil', 0, -1)
    assert b'd|xxxx2' in DB.smembers('g|u09dgm7')
    assert b'd|xxxx2' in DB.smembers('g|u0g08g7')
    assert DB.exists('f|type|street')
    assert b'd|xxxx2' in DB.smembers('f|type|street')
    assert DB.exists('f|type|housenumber')
    assert b'd|xxxx2' in DB.smembers('f|type|housenumber')
    assert len(DB.keys()) == 12
Ejemplo n.º 15
0
def deindex_token(key, token):
    tkey = token_key(token)
    DB.zrem(tkey, key)
    if not DB.exists(tkey):
        deindex_edge_ngrams(token)
Ejemplo n.º 16
0
def test_deindex_document_should_deindex():
    index_document(DOC.copy())
    deindex_document(DOC['id'])
    assert not ds._DB.exists('d|xxxx')
    assert not DB.exists('w|des')
    assert not DB.exists('w|lil')
    assert not DB.exists('w|ila')
    assert not DB.exists('w|las')
    assert not DB.exists('w|and')
    assert not DB.exists('w|ndr')
    assert not DB.exists('w|dre')
    assert not DB.exists('w|res')
    assert not DB.exists('w|esy')
    assert not DB.exists('g|u09dgm7')
    assert not DB.exists('f|type|street')
    assert not DB.exists('f|type|housenumber')
    assert len(DB.keys()) == 0
Ejemplo n.º 17
0
def deindex_token(key, token):
    tkey = token_key(token)
    DB.zrem(tkey, key)
    if not DB.exists(tkey):
        deindex_edge_ngrams(token)
Ejemplo n.º 18
0
 def deindex(db, key, doc, tokens, **kwargs):
     if config.INDEX_EDGE_NGRAMS:
         for token in tokens:
             tkey = dbkeys.token_key(token)
             if not DB.exists(tkey):
                 deindex_edge_ngrams(token)
Ejemplo n.º 19
0
 def search(self):
     if DB.exists(self.key):
         self.db_key = self.key
Ejemplo n.º 20
0
def test_deindex_document_should_deindex():
    index_document(DOC.copy())
    deindex_document(DOC['id'])
    assert not DB.exists('d|xxxx')
    assert not DB.exists('w|de')
    assert not DB.exists('w|lilas')
    assert not DB.exists('w|un')  # Housenumber.
    assert not DB.exists('p|rue')
    assert not DB.exists('p|des')
    assert not DB.exists('p|lilas')
    assert not DB.exists('p|un')
    assert not DB.exists('g|u09dgm7')
    assert not DB.exists('n|lil')
    assert not DB.exists('n|and')
    assert not DB.exists('n|andr')
    assert not DB.exists('n|andre')
    assert not DB.exists('n|andres')
    assert not DB.exists('f|type|street')
    assert len(DB.keys()) == 0
Ejemplo n.º 21
0
def test_index_document():
    index_document(DOC.copy())
    assert DB.exists('d|xxxx')
    assert DB.type('d|xxxx') == b'hash'
    assert DB.exists('w|rue')
    assert b'd|xxxx' in DB.zrange('w|rue', 0, -1)
    assert DB.exists('w|des')
    assert DB.exists('w|lilas')
    assert DB.exists('w|andresy')
    assert DB.exists('w|un')  # Housenumber.
    assert DB.exists('p|rue')
    assert DB.exists('p|des')
    assert DB.exists('p|lilas')
    assert DB.exists('p|andresy')
    assert b'lilas' in DB.smembers('p|andresy')
    assert b'andresy' in DB.smembers('p|lilas')
    assert DB.exists('p|un')
    assert DB.exists('g|u09dgm7')
    assert b'd|xxxx' in DB.smembers('g|u09dgm7')
    assert DB.exists('n|lil')
    assert DB.exists('n|lila')
    assert DB.exists('n|and')
    assert b'andresy' in DB.smembers('n|and')
    assert DB.exists('n|andr')
    assert b'andresy' in DB.smembers('n|andr')
    assert DB.exists('n|andre')
    assert b'andresy' in DB.smembers('n|andre')
    assert DB.exists('n|andres')
    assert b'andresy' in DB.smembers('n|andres')
    assert b'lilas' in DB.smembers('n|lil')
    assert DB.exists('f|type|street')
    assert b'd|xxxx' in DB.smembers('f|type|street')
    assert DB.exists('f|type|housenumber')
    assert b'd|xxxx' in DB.smembers('f|type|housenumber')
    assert len(DB.keys()) == 20
Ejemplo n.º 22
0
def test_index_document():
    index_document(DOC.copy())
    assert DB.exists('d|xxxx')
    assert DB.type('d|xxxx') == b'hash'
    assert DB.exists('w|ru')
    assert b'd|xxxx' in DB.zrange('w|ru', 0, -1)
    assert DB.exists('w|de')
    assert DB.exists('w|lila')
    assert DB.exists('w|andrezi')
    assert DB.exists('w|un')  # Housenumber.
    assert DB.exists('p|ru')
    assert DB.exists('p|de')
    assert DB.exists('p|lila')
    assert DB.exists('p|andrezi')
    assert b'lila' in DB.smembers('p|andrezi')
    assert b'andrezi' in DB.smembers('p|lila')
    assert DB.exists('p|un')
    assert DB.exists('g|u09dgm7')
    assert b'd|xxxx' in DB.smembers('g|u09dgm7')
    assert DB.exists('n|lil')
    assert DB.exists('n|and')
    assert b'andrezi' in DB.smembers('n|and')
    assert DB.exists('n|andr')
    assert b'andrezi' in DB.smembers('n|andr')
    assert DB.exists('n|andre')
    assert b'andrezi' in DB.smembers('n|andre')
    assert DB.exists('n|andrez')
    assert b'andrezi' in DB.smembers('n|andrez')
    assert b'lila' in DB.smembers('n|lil')
    assert DB.exists('f|type|street')
    assert b'd|xxxx' in DB.smembers('f|type|street')
    assert DB.exists('f|type|housenumber')
    assert b'd|xxxx' in DB.smembers('f|type|housenumber')
    assert len(DB.keys()) == 19
Ejemplo n.º 23
0
def edge_ngram_deindexer(db, key, doc, tokens, **kwargs):
    if config.INDEX_EDGE_NGRAMS:
        for token in tokens:
            tkey = dbkeys.token_key(token)
            if not DB.exists(tkey):
                deindex_edge_ngrams(token)
Ejemplo n.º 24
0
def test_index_document():
    index_document(DOC.copy())
    assert ds._DB.exists('d|xxxx')
    assert ds._DB.type('d|xxxx') == b'string'
    assert DB.exists('w|rue')
    assert b'd|xxxx' in DB.zrange('w|rue', 0, -1)
    assert DB.exists('w|des')
    assert DB.exists('w|lilas')
    assert DB.exists('w|andresy')
    assert DB.exists('w|1')  # Housenumber.
    assert DB.exists('p|rue')
    assert DB.exists('p|des')
    assert DB.exists('p|lilas')
    assert DB.exists('p|andresy')
    assert b'lilas' in DB.smembers('p|andresy')
    assert b'andresy' in DB.smembers('p|lilas')
    assert DB.exists('p|1')
    assert DB.exists('g|u09dgm7')
    assert b'd|xxxx' in DB.smembers('g|u09dgm7')
    assert DB.exists('n|lil')
    assert DB.exists('n|lila')
    assert DB.exists('n|and')
    assert b'andresy' in DB.smembers('n|and')
    assert DB.exists('n|andr')
    assert b'andresy' in DB.smembers('n|andr')
    assert DB.exists('n|andre')
    assert b'andresy' in DB.smembers('n|andre')
    assert DB.exists('n|andres')
    assert b'andresy' in DB.smembers('n|andres')
    assert b'lilas' in DB.smembers('n|lil')
    assert DB.exists('f|type|street')
    assert b'd|xxxx' in DB.smembers('f|type|street')
    assert DB.exists('f|type|housenumber')
    assert b'd|xxxx' in DB.smembers('f|type|housenumber')
    assert len(DB.keys()) == 19
    assert len(ds._DB.keys()) == 1
Ejemplo n.º 25
0
 def search(self):
     if DB.exists(self.key):
         self.db_key = self.key
Ejemplo n.º 26
0
def test_deindex_document_should_deindex():
    index_document(DOC.copy())
    deindex_document(DOC['id'])
    assert not ds._DB.exists('d|xxxx')
    assert not DB.exists('w|de')
    assert not DB.exists('w|lilas')
    assert not DB.exists('w|1')  # Housenumber.
    assert not DB.exists('p|rue')
    assert not DB.exists('p|des')
    assert not DB.exists('p|lilas')
    assert not DB.exists('p|1')
    assert not DB.exists('g|u09dgm7')
    assert not DB.exists('n|lil')
    assert not DB.exists('n|and')
    assert not DB.exists('n|andr')
    assert not DB.exists('n|andre')
    assert not DB.exists('n|andres')
    assert not DB.exists('f|type|street')
    assert len(DB.keys()) == 0
    assert len(ds._DB.keys()) == 0
Ejemplo n.º 27
0
def test_deindex_document_should_not_affect_other_docs():
    DOC2 = {
        'id': 'xxxx2',
        '_id': 'yyyy2',
        'type': 'street',
        'name': 'rue des Lilas',
        'city': 'Paris',
        'lat': '49.32545',
        'lon': '4.2565',
        'housenumbers': {
            '1': {
                'lat': '48.325451',  # Same geohash as DOC.
                'lon': '2.25651'
            }
        }
    }
    DOC1 = json.loads(json.dumps(DOC))  # deepcopy.
    # Add new housenumber so we can check it's deindexed.
    DOC1['housenumbers']['2'] = {'lat': '48.325459', 'lon': '2.25659'}
    index_document(DOC1)
    index_document(DOC2)
    deindex_document(DOC1['_id'])
    assert not ds._DB.exists('d|yyyy')
    assert b'd|yyyy' not in DB.zrange('w|rue', 0, -1)
    assert b'd|yyyy' not in DB.zrange('w|des', 0, -1)
    assert b'd|yyyy' not in DB.zrange('w|lilas', 0, -1)
    assert DB.exists('g|u09dgm7')
    assert b'd|yyyy' not in DB.smembers('g|u09dgm7')
    assert DB.exists('w|des')
    assert DB.exists('w|lilas')
    assert DB.exists('p|rue')
    assert b'd|yyyy2' in DB.zrange('w|rue', 0, -1)
    assert b'd|yyyy2' in DB.zrange('w|des', 0, -1)
    assert b'd|yyyy2' in DB.zrange('w|lilas', 0, -1)
    assert b'd|yyyy2' in DB.smembers('g|u09dgm7')
    assert b'd|yyyy2' in DB.smembers('g|u0g08g7')
    assert DB.exists('p|des')
    assert DB.exists('p|lilas')
    assert not DB.exists('n|and')
    assert not DB.exists('n|andr')
    assert not DB.exists('n|andre')
    assert not DB.exists('n|andres')
    assert DB.exists('n|par')
    assert DB.exists('n|pari')
    assert DB.exists('n|lil')
    assert DB.exists('n|lila')
    assert b'lilas' in DB.smembers('n|lil')
    assert b'lilas' in DB.smembers('n|lila')
    assert DB.exists('f|type|street')
    assert b'd|yyyy2' in DB.smembers('f|type|street')
    assert DB.exists('f|type|housenumber')
    assert b'd|yyyy2' in DB.smembers('f|type|housenumber')
    assert len(DB.keys()) == 16
    assert len(ds._DB.keys()) == 1