def summarize_ranks(collection, labels, min_prop=None): """ :param Collection collection: :param list labels: :param float min_prop: :return tuple: """ summary = {} if min_prop: total_count = collection.find(verified_mongo).count() min_count = min_prop * float(total_count) labels = [ label for label in labels if collection.find( {'tags.label': label} ).count() > min_count ] query = { 'tags.label': {'$in': labels} } query.update(verified_mongo) cursor = collection.find( query, {'tags': True} ) selectors = { label: {'label': label} for label in labels } for article in cursor: positions, _ = compute_positions(article['tags'], selectors) if not positions: continue summary[article['_id']] = compute_ranks(positions) return summary, labels
def _filter_first(tags): positions, _ = compute_positions(tags, _filter_motreg_selectors) return positions['filter'] < positions['motreg']