def get_collections(data): collections = [] for coll_id in data.get('collections'): if isinstance(coll_id, dict): coll_id = coll_id.get('id') collections.append(coll_id) return Collection.all_by_ids(collections).all()
def peek_query(args): if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() q = text_query(text) filters = parse_filters(args) for entity in args.getlist('entity'): filters.append(('entities.id', entity)) q = filter_query(q, filters, []) q = add_filter( q, {'not': { 'terms': { 'collection_id': authz.collections(authz.READ) } }}) q = { 'query': q, 'size': 0, 'aggregations': { 'collections': { 'terms': { 'field': 'collection_id', 'size': 30 } } }, '_source': False } # import json # print json.dumps(q, indent=2) result = get_es().search(index=get_es_index(), body=q, doc_type=TYPE_DOCUMENT) aggs = result.get('aggregations', {}).get('collections', {}) buckets = aggs.get('buckets', []) q = Collection.all_by_ids([b['key'] for b in buckets]) q = q.filter(Collection.creator_id != None) # noqa objs = {o.id: o for o in q.all()} roles = {} for bucket in buckets: collection = objs.get(bucket.get('key')) if collection is None or collection.private: continue if collection.creator_id in roles: roles[collection.creator_id]['total'] += bucket.get('doc_count') else: roles[collection.creator_id] = { 'name': collection.creator.name, 'email': collection.creator.email, 'total': bucket.get('doc_count') } roles = sorted(roles.values(), key=lambda r: r['total'], reverse=True) roles = [format_total(r) for r in roles] total = result.get('hits', {}).get('total') return format_total({'roles': roles, 'active': total > 0, 'total': total})
def _iter_match_batch(batch, authz): entities = set() collections = set() for match in batch: entities.add(match.entity_id) entities.add(match.match_id) collections.add(match.match_collection_id) collections = Collection.all_by_ids(collections, authz=authz) collections = {c.id: c.label for c in collections} entities = iter_entities_by_ids(list(entities), authz=authz) entities = {e.get('id'): e for e in entities} for obj in batch: entity = entities.get(str(obj.entity_id)) match = entities.get(str(obj.match_id)) collection = collections.get(obj.match_collection_id) if entity is None or match is None or collection is None: continue eproxy = model.get_proxy(entity) mproxy = model.get_proxy(match) yield ( int(obj.score * 100), eproxy.caption, _format_date(eproxy), _format_country(eproxy), collection, mproxy.caption, _format_date(mproxy), _format_country(mproxy), entity_url(eproxy.id), entity_url(mproxy.id), )
def peek_query(args): if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() q = text_query(text) filters = parse_filters(args) for entity in args.getlist('entity'): filters.append(('entities.id', entity)) q = filter_query(q, filters, []) q = add_filter(q, { 'not': { 'terms': { 'collection_id': authz.collections(authz.READ) } } }) q = { 'query': q, 'size': 0, 'aggregations': { 'collections': { 'terms': {'field': 'collection_id', 'size': 30} } }, '_source': False } # import json # print json.dumps(q, indent=2) result = get_es().search(index=get_es_index(), body=q, doc_type=TYPE_DOCUMENT) aggs = result.get('aggregations', {}).get('collections', {}) buckets = aggs.get('buckets', []) q = Collection.all_by_ids([b['key'] for b in buckets]) q = q.filter(Collection.creator_id != None) # noqa objs = {o.id: o for o in q.all()} roles = {} for bucket in buckets: collection = objs.get(bucket.get('key')) if collection is None or collection.private: continue if collection.creator_id in roles: roles[collection.creator_id]['total'] += bucket.get('doc_count') else: roles[collection.creator_id] = { 'name': collection.creator.name, 'email': collection.creator.email, 'total': bucket.get('doc_count') } roles = sorted(roles.values(), key=lambda r: r['total'], reverse=True) roles = [format_total(r) for r in roles] total = result.get('hits', {}).get('total') return format_total({ 'roles': roles, 'active': total > 0, 'total': total })
def _resolve_collections(self, cache): collections = set() for (type_, id_) in cache.keys(): if type_ == Collection: collections.add(id_) if not len(collections): return for coll in Collection.all_by_ids(collections, deleted=True): cache[(Collection, str(coll.id))] = coll
def expand(self, keys): collections = {} for collection in Collection.all_by_ids(keys).all(): collections[six.text_type(collection.id)] = { 'label': collection.label, 'category': collection.category, 'public': self.state.authz.collection_public(collection.id) } return collections
def get_collections(data): collections = [] collection_id = data.get('collection_id') or [] if not isinstance(collection_id, (list, set, tuple)): collection_id = [collection_id] for coll_id in collection_id: if isinstance(coll_id, dict): coll_id = coll_id.get('id') collections.append(coll_id) return Collection.all_by_ids(collections).all()
def convert_collections(facet): output = {'values': []} ids = [b.get('key') for b in facet.get('buckets', [])] if not len(ids): return output collections = Collection.all_by_ids(ids).all() for bucket in facet.get('buckets', []): key = bucket.get('key') for collection in collections: if collection.id != key: continue output['values'].append({ 'id': key, 'label': collection.label, 'count': bucket.get('doc_count') }) return output
def convert_collections(facet): results = [] ids = [b.get('key') for b in facet.get('buckets', [])] if not len(ids): return {'values': []} collections = Collection.all_by_ids(ids).all() for bucket in facet.get('buckets', []): key = bucket.get('key') for collection in collections: if collection.id != key: continue results.append({ 'id': key, 'label': collection.label, 'category': collection.category, 'count': bucket.get('doc_count') }) return {'values': results}
def status(): """ --- get: summary: Get an overview of collections and exports being processed description: > List collections being processed currently and pending task counts responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/SystemStatusResponse' tags: - System """ require(request.authz.logged_in) request.rate_limit = None status = get_active_dataset_status() datasets = status.pop("datasets", {}) collections = (get_dataset_collection_id(d) for d in datasets.keys()) collections = (c for c in collections if c is not None) collections = Collection.all_by_ids(collections, deleted=True).all() collections = {c.id: c for c in collections} serializer = CollectionSerializer(reference=True) results = [] for dataset, status in sorted(datasets.items()): collection_id = get_dataset_collection_id(dataset) if not request.authz.can(collection_id, request.authz.READ): continue collection = collections.get(collection_id) if collection is not None: status["collection"] = serializer.serialize(collection.to_dict()) results.append(status) return jsonify({"results": results, "total": len(results)})
def index(): collections = authz.collections(authz.READ) enable_cache(vary_user=True, vary=collections) q = Collection.all_by_ids(collections) q = q.order_by(Collection.label.asc()) return jsonify(Pager(q))
def expand(self, keys): q = Collection.all_by_ids(keys, authz=self.parser.authz) self.collections = q.all()