def only_commons(helper): if len(helper.tokens) == len(helper.common): # Only common terms, shortcut to search keys = [t.db_key for t in helper.tokens] if helper.geohash_key: keys.append(helper.geohash_key) helper.debug('Adding geohash %s', helper.geohash_key) if len(keys) == 1 or helper.geohash_key: helper.add_to_bucket(keys) if helper.bucket_dry and len(keys) > 1: count = 0 # Scan the less frequent token. helper.tokens.sort(key=lambda t: t.frequency) first = helper.tokens[0] if first.frequency < config.INTERSECT_LIMIT: helper.debug('Under INTERSECT_LIMIT, brut force.') keys = [t.db_key for t in helper.tokens] helper.add_to_bucket(keys) else: helper.debug('INTERSECT_LIMIT hit, manual scan on %s', first) others = [t.db_key for t in helper.tokens[1:]] ids = DB.zrevrange(first.db_key, 0, 500) for id_ in ids: count += 1 if all(DB.sismember(f, id_) for f in helper.filters) \ and all(DB.zrank(k, id_) for k in others): helper.bucket.add(id_) if helper.bucket_full: break helper.debug('%s results after scan (%s loops)', len(helper.bucket), count)
def only_commons(helper): if len(helper.tokens) == len(helper.common): # Only common terms, shortcut to search keys = [t.db_key for t in helper.tokens] if helper.geohash_key: keys.append(helper.geohash_key) helper.debug('Adding geohash %s', helper.geohash_key) if len(keys) == 1 or helper.geohash_key: helper.add_to_bucket(keys) if helper.bucket_dry and len(keys) > 1: count = 0 # Scan the less frequent token. helper.tokens.sort(key=lambda t: t.frequency) first = helper.tokens[0] if first.frequency < config.INTERSECT_LIMIT: helper.debug('Under INTERSECT_LIMIT, brut force.') keys = [t.db_key for t in helper.tokens] helper.add_to_bucket(keys) else: helper.debug('INTERSECT_LIMIT hit, manual scan on %s', first) others = [t.db_key for t in helper.tokens[1:]] ids = DB.zrevrange(first.db_key, 0, 500) for id_ in ids: count += 1 if all(DB.sismember(f, id_) for f in helper.filters) \ and all(DB.zrank(k, id_) for k in others): helper.bucket.add(id_) if helper.bucket_full: break helper.debug('%s results after scan (%s loops)', len(helper.bucket), count)
def _compute_onetomany_relations(tokens): relations = defaultdict(list) for token in tokens: for other in tokens: if other == token: continue if (token in relations[other] or DB.sismember(pair_key(token), other)): relations[token].append(other) return relations