def only_commons(helper): if len(helper.tokens) == len(helper.common): # Only common terms, shortcut to search keys = [t.db_key for t in helper.tokens] if helper.geohash_key: keys.append(helper.geohash_key) helper.debug('Adding geohash %s', helper.geohash_key) if len(keys) == 1 or helper.geohash_key: helper.add_to_bucket(keys) if helper.bucket_dry and len(keys) > 1: # Scan the less frequent token. helper.tokens.sort(key=lambda t: t.frequency) keys = [t.db_key for t in helper.tokens] first = helper.tokens[0] if first.frequency < config.INTERSECT_LIMIT: helper.debug('Under INTERSECT_LIMIT, force intersect.') helper.add_to_bucket(keys) else: helper.debug('INTERSECT_LIMIT hit, manual scan') if helper.filters: # Always consider filters when doing manual intersect. keys = keys + helper.filters # But, hey, can we brute force again? if any( DB.scard(k) < config.INTERSECT_LIMIT for k in helper.filters): helper.debug('Filters under INTERSECT_LIMIT, force') helper.add_to_bucket(keys) return helper.debug('manual scan on "%s"', first) ids = scripts.manual_scan(keys=keys, args=[helper.min]) helper.bucket.update(ids) helper.debug('%s results after scan', len(helper.bucket))
def test_manual_scan_with_filter(factory): vitry = factory(name="Vitry", type="city") factory(name="La monnaye", city="Saint-Loup-Cammas") street1 = factory(name="rue de la monnaie", city="Paris", importance=1) street2 = factory(name="rue de la monnaie", city="Condom", importance=0.9) results = scripts.manual_scan(keys=['w|rue', 'w|de', 'f|type|street'], args=[2]) assert results == [ 'd|{}'.format(street1['_id']).encode(), 'd|{}'.format(street2['_id']).encode() ] results = scripts.manual_scan(keys=['w|rue', 'w|de', 'f|type|whatever'], args=[2]) assert results == [] results = scripts.manual_scan(keys=['w|vitry', 'f|type|city'], args=[2]) assert results == ['d|{}'.format(vitry['_id']).encode()]
def test_manual_scan(factory): factory(name="rue de la monnaie", city="Vitry") factory(name="La monnaye", city="Saint-Loup-Cammas") street1 = factory(name="rue de la monnaie", city="Paris", importance=1) street2 = factory(name="rue de la monnaie", city="Condom", importance=0.9) results = scripts.manual_scan(keys=['w|monnaie', 'w|rue', 'w|de'], args=[2]) assert results == [ 'd|{}'.format(street1['id']).encode(), 'd|{}'.format(street2['id']).encode() ]