def status(): enable_cache(vary_user=True) return jsonify({ 'logged_in': authz.logged_in(), 'api_key': request.auth_role.api_key if authz.logged_in() else None, 'role': request.auth_role, 'roles': list(request.auth_roles), 'permissions': { 'read': authz.collections(authz.READ), 'write': authz.collections(authz.WRITE) }, 'logout': url_for('.logout') })
def query(self): args = { 'acl': authz.collections(authz.READ), 'limit': self.limit, 'offset': self.offset, 'text': self.text(), 'context': self.context(), 'node_id': self.node_id(), 'collection_id': self.collection_id() } filters = [] filters.append('coll.alephCollection IN {collection_id}') if args['text'] is not None: filters.append('node.name =~ {text}') if len(args['context']): filters.append('NOT (node.id IN {context})') if len(args['node_id']): filters.append('node.id IN {node_id}') q = "MATCH (node:Aleph)-[:PART_OF]->(coll:Collection) " \ "WHERE %s " \ "RETURN node SKIP {offset} LIMIT {limit} " q = q % ' AND '.join(filters) # print args, q return q, args
def peek_query(args): if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() q = text_query(text) filters = parse_filters(args) for entity in args.getlist('entity'): filters.append(('entities.id', entity)) q = filter_query(q, filters, []) q = add_filter(q, { 'not': { 'terms': { 'collection_id': authz.collections(authz.READ) } } }) q = { 'query': q, 'size': 0, 'aggregations': { 'collections': { 'terms': {'field': 'collection_id', 'size': 30} } }, '_source': False } # import json # print json.dumps(q, indent=2) result = get_es().search(index=get_es_index(), body=q, doc_type=TYPE_DOCUMENT) aggs = result.get('aggregations', {}).get('collections', {}) buckets = aggs.get('buckets', []) q = Collection.all_by_ids([b['key'] for b in buckets]) q = q.filter(Collection.creator_id != None) # noqa objs = {o.id: o for o in q.all()} roles = {} for bucket in buckets: collection = objs.get(bucket.get('key')) if collection is None or collection.private: continue if collection.creator_id in roles: roles[collection.creator_id]['total'] += bucket.get('doc_count') else: roles[collection.creator_id] = { 'name': collection.creator.name, 'email': collection.creator.email, 'total': bucket.get('doc_count') } roles = sorted(roles.values(), key=lambda r: r['total'], reverse=True) roles = [format_total(r) for r in roles] total = result.get('hits', {}).get('total') return format_total({ 'roles': roles, 'active': total > 0, 'total': total })
def suggest_nodes(graph, collection_id, prefix, limit, offset): """Suggest nodes whose names match the given prefix. Returns the result sorted by the visible degree count of each node. """ collections = authz.collections(authz.READ) collection_id = collection_id if len(collection_id) else collections q = "MATCH (n)-[:PART_OF]->(c1:Collection) " \ "MATCH (n)-[r]-(p) " \ "MATCH (p)-[:PART_OF]->(c2:Collection) " \ "WHERE c1.alephCollection IN {collection_id} " \ "AND c2.alephCollection IN {acl} " \ "AND n.name =~ {regex} " \ "WITH n, count(r) AS deg " \ "ORDER BY deg DESC " \ "SKIP {offset} LIMIT {limit} " \ "RETURN n, deg " regex = '(?i).*%s.*' % prefix cursor = graph.run(q, regex=regex, acl=collections, collection_id=collection_id, limit=limit, offset=offset) nodes = [] for row in cursor: node = NodeType.dict(row.get('n')) node['$degree'] = row.get('deg') nodes.append(node) return _make_response(nodes, [], limit=limit, offset=offset)
def facet_entities(aggs, args): """Filter entities, facet for collections.""" entities = args.getlist('entity') collections = authz.collections(authz.READ) # This limits the entity facet collections to the same collections # which apply to the document part of the query. It is used by the # collections view to show only entity facets from the currently # selected collection. if 'collection' == args.get('scope'): filters = args.getlist('filter:collection_id') collections = [c for c in collections if str(c) in filters] flt = { 'bool': {'must': [{'terms': {'entities.collection_id': collections}}]} } if len(entities): flt['bool']['must'].append({'terms': {'entities.id': entities}}) aggs['entities'] = { 'nested': { 'path': 'entities' }, 'aggs': { 'inner': { 'filter': flt, 'aggs': { 'entities': { 'terms': {'field': 'entities.id', 'size': FACET_SIZE} } } } } } return aggs
def all(): q = Entity.all() q = q.filter(Entity.state == Entity.STATE_ACTIVE) clause = Collection.id.in_(authz.collections(authz.READ)) q = q.filter(Entity.collections.any(clause)) q = q.order_by(Entity.id.asc()) return jsonify(Pager(q, limit=100))
def query(self): args = { 'acl': authz.collections(authz.READ), 'limit': self.limit, 'offset': self.offset, 'text': self.text(), 'ignore': self.ignore(), 'collection_id': self.collection_id() } filters = [] filters.append('ncoll.alephCollection IN {collection_id}') filters.append('ocoll.alephCollection IN {acl}') if args['text'] is not None: filters.append('node.name =~ {text}') if len(args['ignore']): filters.append('NOT (node.id IN {ignore})') q = "MATCH (node)-[:PART_OF]->(ncoll:Collection) " \ "MATCH (node)-[r]-(other) " \ "MATCH (other)-[:PART_OF]->(ocoll:Collection) " \ "WHERE %s " \ "WITH node, count(r) AS degree " \ "ORDER BY degree DESC " \ "SKIP {offset} LIMIT {limit} " \ "RETURN node, degree " q = q % ' AND '.join(filters) # print args, q return q, args
def query(self): args = { 'acl': authz.collections(authz.READ), 'limit': self.limit, 'offset': self.offset, 'ignore': self.ignore(), 'source_collection_id': self.source_collection_id(), 'target_collection_id': self.target_collection_id(), 'source_id': self.source_id(), 'target_id': self.target_id() } directed = '>' if self._bool('directed') else '' filters = [] filters.append('sourcecoll.alephCollection IN {source_collection_id}') filters.append('targetcoll.alephCollection IN {target_collection_id}') if len(args['ignore']): filters.append('NOT (rel.id IN {ignore})') if len(args['source_id']): filters.append('source.id IN {source_id}') if len(args['target_id']): filters.append('target.id IN {target_id}') q = "MATCH (source)-[rel]-%s(target) " \ "MATCH (source)-[:PART_OF]->(sourcecoll:Collection) " \ "MATCH (target)-[:PART_OF]->(targetcoll:Collection) " \ "WHERE %s " \ "RETURN source.id AS source, rel, target.id AS target " \ "SKIP {offset} LIMIT {limit} " filters = ' AND '.join(filters) q = q % (directed, filters) return q, args
def alert_query(alert): """Execute the query and return a set of results.""" q = text_query(alert.query_text) q = authz_filter(q) if alert.entity_id: q = filter_query(q, [('entities.id', alert.entity_id)], OR_FIELDS) if alert.notified_at: q = add_filter(q, {"range": {"created_at": {"gt": alert.notified_at}}}) q = {'query': q, 'size': 150} result, hits, output = execute_basic(TYPE_DOCUMENT, q) collections = {} for doc in hits.get('hits', []): document = doc.get('_source') document['id'] = int(doc.get('_id')) document['collections'] = [] for coll in document['collection_id']: if coll not in authz.collections(authz.READ): continue if coll not in collections: collections[coll] = Collection.by_id(coll) if collections[coll] is None: continue document['collections'].append(collections[coll]) document['records'] = {'results': [], 'total': 0} output['results'].append(document) return output
def peek_query(args): if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() q = text_query(text) filters = parse_filters(args) for entity in args.getlist('entity'): filters.append(('entities.id', entity)) q = filter_query(q, filters, []) q = add_filter( q, {'not': { 'terms': { 'collection_id': authz.collections(authz.READ) } }}) q = { 'query': q, 'size': 0, 'aggregations': { 'collections': { 'terms': { 'field': 'collection_id', 'size': 30 } } }, '_source': False } # import json # print json.dumps(q, indent=2) result = get_es().search(index=get_es_index(), body=q, doc_type=TYPE_DOCUMENT) aggs = result.get('aggregations', {}).get('collections', {}) buckets = aggs.get('buckets', []) q = Collection.all_by_ids([b['key'] for b in buckets]) q = q.filter(Collection.creator_id != None) # noqa objs = {o.id: o for o in q.all()} roles = {} for bucket in buckets: collection = objs.get(bucket.get('key')) if collection is None or collection.private: continue if collection.creator_id in roles: roles[collection.creator_id]['total'] += bucket.get('doc_count') else: roles[collection.creator_id] = { 'name': collection.creator.name, 'email': collection.creator.email, 'total': bucket.get('doc_count') } roles = sorted(roles.values(), key=lambda r: r['total'], reverse=True) roles = [format_total(r) for r in roles] total = result.get('hits', {}).get('total') return format_total({'roles': roles, 'active': total > 0, 'total': total})
def suggest_nodes(graph, collection_id, prefix, limit, offset): """Suggest nodes whose names match the given prefix. Returns the result sorted by the visible degree count of each node. """ collections = authz.collections(authz.READ) collection_id = collection_id if len(collection_id) else collections q = ( "MATCH (n)-[:PART_OF]->(c1:Collection) " "MATCH (n)-[r]-(p) " "MATCH (p)-[:PART_OF]->(c2:Collection) " "WHERE c1.alephCollection IN {collection_id} " "AND c2.alephCollection IN {acl} " "AND n.name =~ {regex} " "WITH n, count(r) AS deg " "ORDER BY deg DESC " "SKIP {offset} LIMIT {limit} " "RETURN n, deg " ) regex = "(?i).*%s.*" % prefix cursor = graph.run(q, regex=regex, acl=collections, collection_id=collection_id, limit=limit, offset=offset) nodes = [] for row in cursor: node = NodeType.dict(row.get("n")) node["$degree"] = row.get("deg") nodes.append(node) return _make_response(nodes, [], limit=limit, offset=offset)
def statistics(): collections = authz.collections(authz.READ) enable_cache(vary=collections) query = documents_query(MultiDict()) query["size"] = 0 result = execute_documents_query(MultiDict(), query) # collections = Collection.category_statistics(collections) return jsonify({"document_count": result["total"], "collection_count": len(collections)})
def index(): collection_ids = match_ids('collection', authz.collections(authz.READ)) q = Document.all() clause = Collection.id.in_(collection_ids) q = q.filter(Document.collections.any(clause)) hashes = request.args.getlist('content_hash') if len(hashes): q = q.filter(Document.content_hash.in_(hashes)) return jsonify(Pager(q))
def references(document_id): doc = get_document(document_id) q = db.session.query(Reference) q = q.filter(Reference.document_id == doc.id) q = q.join(Entity) q = q.filter(Entity.state == Entity.STATE_ACTIVE) clause = Collection.id.in_(authz.collections(authz.READ)) q = q.filter(Entity.collections.any(clause)) q = q.order_by(Reference.weight.desc()) return jsonify({'results': q.all()})
def query(): enable_cache(vary_user=True, vary=authz.collections(authz.READ)) query = documents_query(request.args) query['size'] = get_limit(default=100) query['from'] = get_offset() result = execute_documents_query(request.args, query) params = next_params(request.args, result) if params is not None: result['next'] = url_for('search_api.query', **params) return jsonify(result)
def update(document_id): document = get_document(document_id) # This is a special requirement for documents, so # they cannot escalate privs: authz.require(authz.collection_write(document.source_collection_id)) data = request_data() document.update(data, writeable=authz.collections(authz.WRITE)) db.session.commit() log_event(request, document_id=document.id) update_document(document) return view(document_id)
def statistics(): collections = authz.collections(authz.READ) enable_cache(vary=collections) query = documents_query(MultiDict()) query['size'] = 0 result = execute_documents_query(MultiDict(), query) # collections = Collection.category_statistics(collections) return jsonify({ 'document_count': result['total'], 'collection_count': len(collections) })
def update_collections(document_id): document = get_document(document_id) data = request_data() if not isinstance(data, list) or \ False in [isinstance(d, int) for d in data]: raise BadRequest() document.update_collections(data, writeable=authz.collections(authz.WRITE)) db.session.commit() log_event(request, document_id=document.id) update_document(document) return view_collections(document_id)
def query(): enable_cache(vary_user=True, vary=authz.collections(authz.READ)) query = documents_query(request.args) query["size"] = get_limit(default=100) query["from"] = get_offset() # import json # print json.dumps(query, indent=2) result = execute_documents_query(request.args, query) params = next_params(request.args, result) log_event(request) if params is not None: result["next"] = url_for("search_api.query", **params) return jsonify(result)
def update(id): entity = obj_or_404(Entity.by_id(id)) check_authz(entity, authz.WRITE) data = request_data() data['id'] = entity.id possible_collections = authz.collections(authz.WRITE) possible_collections.extend([c.id for c in entity.collections]) data['collections'] = [c for c in get_collections(data) if c.id in possible_collections] entity = Entity.save(data, merge=arg_bool('merge')) db.session.commit() update_entity(entity) return view(entity.id)
def update(id): entity = obj_or_404(Entity.by_id(id)) check_authz(entity, authz.WRITE) data = request_data() data['id'] = entity.id possible_collections = authz.collections(authz.WRITE) possible_collections.extend([c.id for c in entity.collections]) data['collections'] = [ c for c in get_collections(data) if c.id in possible_collections ] entity = Entity.save(data, merge=arg_bool('merge')) db.session.commit() update_entity(entity) return view(entity.id)
def query(): creds = authz.collections(authz.READ), authz.sources(authz.READ) enable_cache(vary_user=True, vary=creds) query = documents_query(request.args) query['size'] = get_limit(default=100) query['from'] = get_offset() result = execute_documents_query(request.args, query) result['alert'] = None if authz.logged_in(): result['alert'] = Alert.exists(request.args, request.auth_role) params = next_params(request.args, result) if params is not None: result['next'] = url_for('search_api.query', **params) return jsonify(result)
def index(): try: authorized = authz.collections(authz.READ) collection_ids = [int(f) for f in request.args.getlist('collection')] collection_ids = collection_ids or authorized collection_ids = [c for c in collection_ids if c in authorized] except ValueError: raise BadRequest() q = Document.all() clause = Collection.id.in_(collection_ids) q = q.filter(Document.collections.any(clause)) hashes = request.args.getlist('content_hash') if len(hashes): q = q.filter(Document.content_hash.in_(hashes)) return jsonify(Pager(q))
def pending(): q = db.session.query(Entity) q = q.filter(Entity.state == Entity.STATE_PENDING) clause = Collection.id.in_(authz.collections(authz.READ)) q = q.filter(Entity.collections.any(clause)) ref = aliased(Reference) q = q.join(ref) q = q.group_by(Entity) q = q.order_by(func.sum(ref.weight).desc()) entity = q.first() if entity is None: return jsonify({'empty': True}) data = entity.to_dict() data['name_latin'] = latinize_text(data['name'], lowercase=False) return jsonify(data)
def _query(escape=False): ''' everything here should be applicable both to the internal and to the public api ''' creds = authz.collections(authz.READ), authz.sources(authz.READ) enable_cache(vary_user=True, vary=creds) query = documents_query(request.args, escape=escape) query['size'] = get_limit(default=100) query['from'] = get_offset() result = execute_documents_query(request.args, query) params = next_params(request.args, result) if params is not None: result['next'] = url_for('search_api.query', **params) return result
def paths(id): collection = obj_or_404(Collection.by_id(id)) authz.require(authz.collection_read(collection.id)) start_entity_id = request.args.get('entity_id') labels = request.args.getlist('label') types = request.args.getlist('type') collection_id = request.args.getlist('collection_id') end_collection_id = authz.collections_intersect(authz.READ, collection_id) q = Path.find(collection, start_entity_id=start_entity_id, labels=labels, types=types, end_collection_id=end_collection_id) data = Pager(q, id=collection.id).to_dict() data['facets'] = Path.facets(collection, start_entity_id=start_entity_id, labels=labels, types=types, end_collection_id=end_collection_id, collection_id=authz.collections(authz.READ)) return jsonify(data)
def entity_collections(q, aggs, args, filters): """Filter entities, facet for collections.""" entities = args.getlist('entity') collections = [] readable = authz.collections(authz.READ) requested = args.getlist('collection') or readable for collection_id in requested: collection_id = int(collection_id) if authz.collection_read(collection_id): collections.append(collection_id) flt = { 'or': [{ 'terms': { 'entities.collection_id': collections } }, { 'and': [{ 'terms': { 'entities.collection_id': readable }, 'terms': { 'entities.uuid': entities }, }] }] } aggs['entities'] = { 'nested': { 'path': 'entities' }, 'aggs': { 'inner': { 'filter': flt, 'aggs': { 'entities': { 'terms': { 'field': 'entities.uuid', 'size': FACET_SIZE } } } } } } return q
def update(id): entity = obj_or_404(Entity.by_id(id)) check_authz(entity, authz.WRITE) data = request_data() data["id"] = entity.id possible_collections = authz.collections(authz.WRITE) possible_collections.extend([c.id for c in entity.collections]) collections = [c for c in get_collections(data) if c.id in possible_collections] try: entity = Entity.save(data, collections, merge=arg_bool("merge")) except ValueError as ve: raise BadRequest(ve.message) for collection in entity.collections: collection.touch() db.session.commit() log_event(request, entity_id=entity.id) update_entity(entity) return view(entity.id)
def alert_query(alert): """Execute the query and return a set of results.""" q = text_query(alert.query_text) q = authz_filter(q) if alert.entity_id: q = filter_query(q, [('entities.id', alert.entity_id)], OR_FIELDS) if alert.notified_at: q = add_filter(q, { "range": { "created_at": { "gt": alert.notified_at } } }) q = { 'query': q, 'size': 150 } result, hits, output = execute_basic(TYPE_DOCUMENT, q) sub_queries = [] collections = {} for doc in hits.get('hits', []): document = doc.get('_source') document['id'] = int(doc.get('_id')) document['collections'] = [] for coll in document['collection_id']: if coll not in authz.collections(authz.READ): continue if coll not in collections: collections[coll] = Collection.by_id(coll) if collections[coll] is None: continue document['collections'].append(collections[coll]) document['records'] = {'results': [], 'total': 0} sq = records_query(document['id'], alert.to_query(), size=1) if sq is not None: sub_queries.append(json.dumps({})) sub_queries.append(json.dumps(sq)) output['results'].append(document) run_sub_queries(output, sub_queries) return output
def entity_collections(q, aggs, args, filters): """Filter entities, facet for collections.""" entities = args.getlist('entity') collections = [] readable = authz.collections(authz.READ) requested = args.getlist('collection') or readable for collection_id in requested: collection_id = int(collection_id) if authz.collection_read(collection_id): collections.append(collection_id) flt = { 'or': [ { 'terms': {'entities.collection_id': collections} }, { 'and': [ { 'terms': {'entities.collection_id': readable}, 'terms': {'entities.uuid': entities}, } ] } ] } aggs['entities'] = { 'nested': { 'path': 'entities' }, 'aggs': { 'inner': { 'filter': flt, 'aggs': { 'entities': { 'terms': {'field': 'entities.uuid', 'size': FACET_SIZE} } } } } } return q
def facet_entities(aggs, args): """Filter entities, facet for collections.""" entities = args.getlist('entity') collections = authz.collections(authz.READ) # This limits the entity facet collections to the same collections # which apply to the document part of the query. It is used by the # collections view to show only entity facets from the currently # selected collection. if 'collection' == args.get('scope'): filters = args.getlist('filter:collection_id') collections = [c for c in collections if str(c) in filters] flt = { 'bool': { 'must': [{ 'terms': { 'entities.collection_id': collections } }] } } if len(entities): flt['bool']['must'].append({'terms': {'entities.id': entities}}) aggs['entities'] = { 'nested': { 'path': 'entities' }, 'aggs': { 'inner': { 'filter': flt, 'aggs': { 'entities': { 'terms': { 'field': 'entities.id', 'size': FACET_SIZE } } } } } } return aggs
def pending(): q = db.session.query(Entity) skip_entities = request.args.getlist('skip') if len(skip_entities): q = q.filter(not_(Entity.id.in_(skip_entities))) q = q.filter(Entity.state == Entity.STATE_PENDING) clause = Collection.id.in_(authz.collections(authz.READ)) q = q.filter(Entity.collections.any(clause)) # this was too slow to actually work: # ref = aliased(Reference) # q = q.join(ref) # q = q.group_by(Entity) # q = q.order_by(func.count(ref.id).desc()) q = q.order_by(func.random()) q = q.limit(30) entities = [] for entity in q.all(): data = entity.to_dict() data['name_latin'] = latinize_text(entity.name, lowercase=False) entities.append(data) return jsonify({'results': entities, 'total': len(entities)})
def facet_entities(aggs, args): """Filter entities, facet for collections.""" entities = args.getlist('entity') collections = authz.collections(authz.READ) flt = { 'or': [ { 'terms': { 'entities.collection_id': collections } }, { 'and': [ { 'terms': {'entities.collection_id': collections}, 'terms': {'entities.id': entities}, } ] } ] } aggs['entities'] = { 'nested': { 'path': 'entities' }, 'aggs': { 'inner': { 'filter': flt, 'aggs': { 'entities': { 'terms': {'field': 'entities.id', 'size': FACET_SIZE} } } } } } return aggs
def load_nodes(graph, node_ids, labels, depth, limit, offset): collections = authz.collections(authz.READ)
def index(): collections = authz.collections(authz.READ) enable_cache(vary_user=True, vary=collections) q = Collection.all_by_ids(collections) q = q.order_by(Collection.label.asc()) return jsonify(Pager(q))
def suggest(): collections = authz.collections(authz.READ) enable_cache(vary=collections, server_side=False) prefix = request.args.get('prefix') results = Entity.suggest_prefix(prefix, collections) return jsonify({'results': results})
def index(): collection_ids = match_ids('collection', authz.collections(authz.READ)) q = Entity.all() q = q.filter(Entity.collection_id.in_(collection_ids)) return jsonify(Pager(q))
def peek(): enable_cache(vary_user=True, vary=authz.collections(authz.READ)) response = peek_query(request.args) if not authz.logged_in(): response.pop('roles', None) return jsonify(response)
def similar(id): entity = obj_or_404(Entity.by_id(id)) check_authz(entity, authz.READ) action = authz.WRITE if arg_bool('writeable') else authz.READ collections = authz.collections(action) return jsonify(similar_entities(entity, request.args, collections))
def suggest(): collections = authz.collections(authz.READ) enable_cache(vary=collections, server_side=False) prefix = request.args.get('prefix') min_count = int(request.args.get('min_count', 0)) return jsonify(suggest_entities(prefix, min_count))
def check_authz(entity, permission): permissions = authz.collections(permission) for collection in entity.collections: if collection.id in permissions: return authz.require(False)
def authz_filter(q): return add_filter(q, { "terms": {"collection_id": list(authz.collections(authz.READ))} })