def peek_query(args): if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() q = text_query(text) filters = parse_filters(args) for entity in args.getlist('entity'): filters.append(('entities.id', entity)) q = filter_query(q, filters, []) q = add_filter( q, {'not': { 'terms': { 'collection_id': authz.collections(authz.READ) } }}) q = { 'query': q, 'size': 0, 'aggregations': { 'collections': { 'terms': { 'field': 'collection_id', 'size': 30 } } }, '_source': False } # import json # print json.dumps(q, indent=2) result = get_es().search(index=get_es_index(), body=q, doc_type=TYPE_DOCUMENT) aggs = result.get('aggregations', {}).get('collections', {}) buckets = aggs.get('buckets', []) q = Collection.all_by_ids([b['key'] for b in buckets]) q = q.filter(Collection.creator_id != None) # noqa objs = {o.id: o for o in q.all()} roles = {} for bucket in buckets: collection = objs.get(bucket.get('key')) if collection is None or collection.private: continue if collection.creator_id in roles: roles[collection.creator_id]['total'] += bucket.get('doc_count') else: roles[collection.creator_id] = { 'name': collection.creator.name, 'email': collection.creator.email, 'total': bucket.get('doc_count') } roles = sorted(roles.values(), key=lambda r: r['total'], reverse=True) roles = [format_total(r) for r in roles] total = result.get('hits', {}).get('total') return format_total({'roles': roles, 'active': total > 0, 'total': total})
def peek_query(args): if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() q = text_query(text) filters = parse_filters(args) for entity in args.getlist('entity'): filters.append(('entities.id', entity)) q = filter_query(q, filters, []) q = add_filter(q, { 'not': { 'terms': { 'collection_id': authz.collections(authz.READ) } } }) q = { 'query': q, 'size': 0, 'aggregations': { 'collections': { 'terms': {'field': 'collection_id', 'size': 30} } }, '_source': False } # import json # print json.dumps(q, indent=2) result = get_es().search(index=get_es_index(), body=q, doc_type=TYPE_DOCUMENT) aggs = result.get('aggregations', {}).get('collections', {}) buckets = aggs.get('buckets', []) q = Collection.all_by_ids([b['key'] for b in buckets]) q = q.filter(Collection.creator_id != None) # noqa objs = {o.id: o for o in q.all()} roles = {} for bucket in buckets: collection = objs.get(bucket.get('key')) if collection is None or collection.private: continue if collection.creator_id in roles: roles[collection.creator_id]['total'] += bucket.get('doc_count') else: roles[collection.creator_id] = { 'name': collection.creator.name, 'email': collection.creator.email, 'total': bucket.get('doc_count') } roles = sorted(roles.values(), key=lambda r: r['total'], reverse=True) roles = [format_total(r) for r in roles] total = result.get('hits', {}).get('total') return format_total({ 'roles': roles, 'active': total > 0, 'total': total })
def entities_query(args, fields=None, facets=True): """Parse a user query string, compose and execute a query.""" if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() if text is None or not len(text): q = match_all() else: q = { "query_string": { "query": text, "fields": [ 'name^15', 'name_latin^5', 'terms^12', 'terms_latin^3', 'summary^10', 'summary_latin^7', 'description^5', 'description_latin^3' ], "default_operator": "AND", "use_dis_max": True } } q = authz_filter(q) filters = parse_filters(args) aggs = {'scoped': {'global': {}, 'aggs': {}}} if facets: facets = args.getlist('facet') if 'collections' in facets: aggs = facet_collections(q, aggs, filters) facets.remove('collections') aggs = aggregate(q, aggs, facets) sort_mode = args.get('sort', '').strip().lower() default_sort = 'score' if len(text) else 'doc_count' sort_mode = sort_mode or default_sort if sort_mode == 'doc_count': sort = [{'doc_count': 'desc'}, '_score'] elif sort_mode == 'alphabet': sort = [{'name': 'asc'}, '_score'] elif sort_mode == 'score': sort = ['_score'] return { 'sort': sort, 'query': filter_query(q, filters, OR_FIELDS), 'aggregations': aggs, '_source': fields or DEFAULT_FIELDS }
def documents_query(args, fields=None, facets=True, escape=True): """Parse a user query string, compose and execute a query. Escape -- filter input so it can be safely used in an ES query string """ if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() if escape: text = escape_query_string(text) q = text_query(text) q = authz_sources_filter(q) # Sorting -- should this be passed into search directly, instead of # these aliases? sort_mode = args.get('sort', '').strip().lower() if sort_mode == 'score': sort = ['_score'] elif sort_mode == 'newest_filed': sort = [{'filing_date': 'desc'}] elif sort_mode == 'oldest_filed': sort = [{'filing_date': 'asc'}] elif sort_mode == 'newest_added': sort = [{'dates': 'desc'}, {'created_at': 'desc'}, '_score'] elif sort_mode == 'oldest_added': sort = [{'dates': 'asc'}, {'created_at': 'asc'}, '_score'] else: sort = [{'updated_at': 'desc'}, {'created_at': 'desc'}, '_score'] filters = parse_filters(args) for entity in args.getlist('entity'): filters.append(('entities.uuid', entity)) aggs = {} if facets: aggs = aggregate(q, args) aggs = facet_source(q, aggs, filters) #aggs = facet_sector(q, aggs, filters) q = entity_collections(q, aggs, args, filters) # XXX this is where I should be hooking in openoil aggregations signals.document_query_process.send(q=q, args=args) return { 'sort': sort, 'query': filter_query(q, filters, OR_FIELDS), 'aggregations': aggs, '_source': fields or DEFAULT_FIELDS #'fields': fields or DEFAULT_FIELDS }
def entities_query(args, fields=None, facets=True): """Parse a user query string, compose and execute a query.""" if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() if text is None or not len(text): q = match_all() else: q = { "query_string": { "query": text, "fields": ['name^15', 'name_latin^5', 'terms^12', 'terms_latin^3', 'summary^10', 'summary_latin^7', 'description^5', 'description_latin^3'], "default_operator": "AND", "use_dis_max": True } } q = authz_filter(q) filters = parse_filters(args) aggs = {'scoped': {'global': {}, 'aggs': {}}} if facets: facets = args.getlist('facet') if 'collections' in facets: aggs = facet_collections(q, aggs, filters) facets.remove('collections') aggs = aggregate(q, aggs, facets) sort_mode = args.get('sort', '').strip().lower() default_sort = 'score' if len(text) else 'doc_count' sort_mode = sort_mode or default_sort if sort_mode == 'doc_count': sort = [{'doc_count': 'desc'}, '_score'] elif sort_mode == 'alphabet': sort = [{'name': 'asc'}, '_score'] elif sort_mode == 'score': sort = ['_score'] return { 'sort': sort, 'query': filter_query(q, filters, OR_FIELDS), 'aggregations': aggs, '_source': fields or DEFAULT_FIELDS }
def documents_query(args, fields=None, facets=True): """Parse a user query string, compose and execute a query.""" if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() q = text_query(text) q = authz_filter(q) # Sorting -- should this be passed into search directly, instead of # these aliases? sort_mode = args.get('sort', '').strip().lower() if text or sort_mode == 'score': sort = ['_score'] elif sort_mode == 'newest': sort = [{'dates': 'desc'}, {'created_at': 'desc'}, '_score'] elif sort_mode == 'oldest': sort = [{'dates': 'asc'}, {'created_at': 'asc'}, '_score'] else: sort = [{'updated_at': 'desc'}, {'created_at': 'desc'}, '_score'] filters = parse_filters(args) for entity in args.getlist('entity'): filters.append(('entities.id', entity)) aggs = {'scoped': {'global': {}, 'aggs': {}}} if facets: facets = args.getlist('facet') if 'collections' in facets: aggs = facet_collections(q, aggs, filters) facets.remove('collections') if 'entities' in facets: aggs = facet_entities(aggs, args) facets.remove('entities') aggs = aggregate(q, aggs, facets) signals.document_query_process.send(q=q, args=args) return { 'sort': sort, 'query': filter_query(q, filters, OR_FIELDS), 'aggregations': aggs, '_source': fields or DEFAULT_FIELDS }