Ejemplo n.º 1
0
def only_commons(helper):
    if len(helper.tokens) == len(helper.common):
        # Only common terms, shortcut to search
        keys = [t.db_key for t in helper.tokens]
        if helper.geohash_key:
            keys.append(helper.geohash_key)
            helper.debug('Adding geohash %s', helper.geohash_key)
        if len(keys) == 1 or helper.geohash_key:
            helper.add_to_bucket(keys)
        if helper.bucket_dry and len(keys) > 1:
            count = 0
            # Scan the less frequent token.
            helper.tokens.sort(key=lambda t: t.frequency)
            first = helper.tokens[0]
            if first.frequency < config.INTERSECT_LIMIT:
                helper.debug('Under INTERSECT_LIMIT, brut force.')
                keys = [t.db_key for t in helper.tokens]
                helper.add_to_bucket(keys)
            else:
                helper.debug('INTERSECT_LIMIT hit, manual scan on %s', first)
                others = [t.db_key for t in helper.tokens[1:]]
                ids = DB.zrevrange(first.db_key, 0, 500)
                for id_ in ids:
                    count += 1
                    if all(DB.sismember(f, id_) for f in helper.filters) \
                       and all(DB.zrank(k, id_) for k in others):
                        helper.bucket.add(id_)
                    if helper.bucket_full:
                        break
                helper.debug('%s results after scan (%s loops)',
                             len(helper.bucket), count)
Ejemplo n.º 2
0
def only_commons(helper):
    if len(helper.tokens) == len(helper.common):
        # Only common terms, shortcut to search
        keys = [t.db_key for t in helper.tokens]
        if helper.geohash_key:
            keys.append(helper.geohash_key)
            helper.debug('Adding geohash %s', helper.geohash_key)
        if len(keys) == 1 or helper.geohash_key:
            helper.add_to_bucket(keys)
        if helper.bucket_dry and len(keys) > 1:
            count = 0
            # Scan the less frequent token.
            helper.tokens.sort(key=lambda t: t.frequency)
            first = helper.tokens[0]
            if first.frequency < config.INTERSECT_LIMIT:
                helper.debug('Under INTERSECT_LIMIT, brut force.')
                keys = [t.db_key for t in helper.tokens]
                helper.add_to_bucket(keys)
            else:
                helper.debug('INTERSECT_LIMIT hit, manual scan on %s', first)
                others = [t.db_key for t in helper.tokens[1:]]
                ids = DB.zrevrange(first.db_key, 0, 500)
                for id_ in ids:
                    count += 1
                    if all(DB.sismember(f, id_) for f in helper.filters) \
                       and all(DB.zrank(k, id_) for k in others):
                        helper.bucket.add(id_)
                    if helper.bucket_full:
                        break
                helper.debug('%s results after scan (%s loops)',
                             len(helper.bucket), count)
Ejemplo n.º 3
0
def _compute_onetomany_relations(tokens):
    relations = defaultdict(list)
    for token in tokens:
        for other in tokens:
            if other == token:
                continue
            if (token in relations[other]
                    or DB.sismember(pair_key(token), other)):
                relations[token].append(other)
    return relations