Example #1
0
File: peek.py Project: tomjie/aleph
def peek_query(args):
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    q = text_query(text)

    filters = parse_filters(args)
    for entity in args.getlist('entity'):
        filters.append(('entities.id', entity))

    q = filter_query(q, filters, [])
    q = add_filter(
        q,
        {'not': {
            'terms': {
                'collection_id': authz.collections(authz.READ)
            }
        }})
    q = {
        'query': q,
        'size': 0,
        'aggregations': {
            'collections': {
                'terms': {
                    'field': 'collection_id',
                    'size': 30
                }
            }
        },
        '_source': False
    }
    # import json
    # print json.dumps(q, indent=2)
    result = get_es().search(index=get_es_index(),
                             body=q,
                             doc_type=TYPE_DOCUMENT)

    aggs = result.get('aggregations', {}).get('collections', {})
    buckets = aggs.get('buckets', [])
    q = Collection.all_by_ids([b['key'] for b in buckets])
    q = q.filter(Collection.creator_id != None)  # noqa
    objs = {o.id: o for o in q.all()}
    roles = {}
    for bucket in buckets:
        collection = objs.get(bucket.get('key'))
        if collection is None or collection.private:
            continue
        if collection.creator_id in roles:
            roles[collection.creator_id]['total'] += bucket.get('doc_count')
        else:
            roles[collection.creator_id] = {
                'name': collection.creator.name,
                'email': collection.creator.email,
                'total': bucket.get('doc_count')
            }

    roles = sorted(roles.values(), key=lambda r: r['total'], reverse=True)
    roles = [format_total(r) for r in roles]
    total = result.get('hits', {}).get('total')
    return format_total({'roles': roles, 'active': total > 0, 'total': total})
Example #2
0
def peek_query(args):
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    q = text_query(text)

    filters = parse_filters(args)
    for entity in args.getlist('entity'):
        filters.append(('entities.id', entity))

    q = filter_query(q, filters, [])
    q = add_filter(q, {
        'not': {
            'terms': {
                'collection_id': authz.collections(authz.READ)
            }
        }
    })
    q = {
        'query': q,
        'size': 0,
        'aggregations': {
            'collections': {
                'terms': {'field': 'collection_id', 'size': 30}
            }
        },
        '_source': False
    }
    # import json
    # print json.dumps(q, indent=2)
    result = get_es().search(index=get_es_index(), body=q,
                             doc_type=TYPE_DOCUMENT)

    aggs = result.get('aggregations', {}).get('collections', {})
    buckets = aggs.get('buckets', [])
    q = Collection.all_by_ids([b['key'] for b in buckets])
    q = q.filter(Collection.creator_id != None)  # noqa
    objs = {o.id: o for o in q.all()}
    roles = {}
    for bucket in buckets:
        collection = objs.get(bucket.get('key'))
        if collection is None or collection.private:
            continue
        if collection.creator_id in roles:
            roles[collection.creator_id]['total'] += bucket.get('doc_count')
        else:
            roles[collection.creator_id] = {
                'name': collection.creator.name,
                'email': collection.creator.email,
                'total': bucket.get('doc_count')
            }

    roles = sorted(roles.values(), key=lambda r: r['total'], reverse=True)
    roles = [format_total(r) for r in roles]
    total = result.get('hits', {}).get('total')
    return format_total({
        'roles': roles,
        'active': total > 0,
        'total': total
    })
Example #3
0
def entities_query(args, fields=None, facets=True):
    """Parse a user query string, compose and execute a query."""
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    if text is None or not len(text):
        q = match_all()
    else:
        q = {
            "query_string": {
                "query":
                text,
                "fields": [
                    'name^15', 'name_latin^5', 'terms^12', 'terms_latin^3',
                    'summary^10', 'summary_latin^7', 'description^5',
                    'description_latin^3'
                ],
                "default_operator":
                "AND",
                "use_dis_max":
                True
            }
        }

    q = authz_filter(q)
    filters = parse_filters(args)
    aggs = {'scoped': {'global': {}, 'aggs': {}}}
    if facets:
        facets = args.getlist('facet')
        if 'collections' in facets:
            aggs = facet_collections(q, aggs, filters)
            facets.remove('collections')
        aggs = aggregate(q, aggs, facets)

    sort_mode = args.get('sort', '').strip().lower()
    default_sort = 'score' if len(text) else 'doc_count'
    sort_mode = sort_mode or default_sort
    if sort_mode == 'doc_count':
        sort = [{'doc_count': 'desc'}, '_score']
    elif sort_mode == 'alphabet':
        sort = [{'name': 'asc'}, '_score']
    elif sort_mode == 'score':
        sort = ['_score']

    return {
        'sort': sort,
        'query': filter_query(q, filters, OR_FIELDS),
        'aggregations': aggs,
        '_source': fields or DEFAULT_FIELDS
    }
Example #4
0
def documents_query(args, fields=None, facets=True, escape=True):
    """Parse a user query string, compose and execute a query.
    Escape -- filter input so it can be safely used in an ES query string
    """
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    if escape:
        text = escape_query_string(text)
    q = text_query(text)
    q = authz_sources_filter(q)
    # Sorting -- should this be passed into search directly, instead of
    # these aliases?
    sort_mode = args.get('sort', '').strip().lower()
    if sort_mode == 'score':
        sort = ['_score']
    elif sort_mode == 'newest_filed':
        sort = [{'filing_date': 'desc'}]
    elif sort_mode == 'oldest_filed':
        sort = [{'filing_date': 'asc'}]
    elif sort_mode == 'newest_added':
        sort = [{'dates': 'desc'}, {'created_at': 'desc'}, '_score']
    elif sort_mode == 'oldest_added':
        sort = [{'dates': 'asc'}, {'created_at': 'asc'}, '_score']
    else:
        sort = [{'updated_at': 'desc'}, {'created_at': 'desc'}, '_score']

    filters = parse_filters(args)
    for entity in args.getlist('entity'):
        filters.append(('entities.uuid', entity))

    aggs = {}
    if facets:
        aggs = aggregate(q, args)
        aggs = facet_source(q, aggs, filters)
        #aggs = facet_sector(q, aggs, filters)
        q = entity_collections(q, aggs, args, filters)

    # XXX this is where I should be hooking in openoil aggregations
    signals.document_query_process.send(q=q, args=args)
    return {
        'sort': sort,
        'query': filter_query(q, filters, OR_FIELDS),
        'aggregations': aggs,
        '_source': fields or DEFAULT_FIELDS
        #'fields': fields or DEFAULT_FIELDS
    }
Example #5
0
def entities_query(args, fields=None, facets=True):
    """Parse a user query string, compose and execute a query."""
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    if text is None or not len(text):
        q = match_all()
    else:
        q = {
            "query_string": {
                "query": text,
                "fields": ['name^15', 'name_latin^5',
                           'terms^12', 'terms_latin^3',
                           'summary^10', 'summary_latin^7',
                           'description^5', 'description_latin^3'],
                "default_operator": "AND",
                "use_dis_max": True
            }
        }

    q = authz_filter(q)
    filters = parse_filters(args)
    aggs = {'scoped': {'global': {}, 'aggs': {}}}
    if facets:
        facets = args.getlist('facet')
        if 'collections' in facets:
            aggs = facet_collections(q, aggs, filters)
            facets.remove('collections')
        aggs = aggregate(q, aggs, facets)

    sort_mode = args.get('sort', '').strip().lower()
    default_sort = 'score' if len(text) else 'doc_count'
    sort_mode = sort_mode or default_sort
    if sort_mode == 'doc_count':
        sort = [{'doc_count': 'desc'}, '_score']
    elif sort_mode == 'alphabet':
        sort = [{'name': 'asc'}, '_score']
    elif sort_mode == 'score':
        sort = ['_score']

    return {
        'sort': sort,
        'query': filter_query(q, filters, OR_FIELDS),
        'aggregations': aggs,
        '_source': fields or DEFAULT_FIELDS
    }
Example #6
0
def documents_query(args, fields=None, facets=True):
    """Parse a user query string, compose and execute a query."""
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    q = text_query(text)
    q = authz_filter(q)

    # Sorting -- should this be passed into search directly, instead of
    # these aliases?
    sort_mode = args.get('sort', '').strip().lower()
    if text or sort_mode == 'score':
        sort = ['_score']
    elif sort_mode == 'newest':
        sort = [{'dates': 'desc'}, {'created_at': 'desc'}, '_score']
    elif sort_mode == 'oldest':
        sort = [{'dates': 'asc'}, {'created_at': 'asc'}, '_score']
    else:
        sort = [{'updated_at': 'desc'}, {'created_at': 'desc'}, '_score']

    filters = parse_filters(args)
    for entity in args.getlist('entity'):
        filters.append(('entities.id', entity))

    aggs = {'scoped': {'global': {}, 'aggs': {}}}
    if facets:
        facets = args.getlist('facet')
        if 'collections' in facets:
            aggs = facet_collections(q, aggs, filters)
            facets.remove('collections')
        if 'entities' in facets:
            aggs = facet_entities(aggs, args)
            facets.remove('entities')
        aggs = aggregate(q, aggs, facets)

    signals.document_query_process.send(q=q, args=args)
    return {
        'sort': sort,
        'query': filter_query(q, filters, OR_FIELDS),
        'aggregations': aggs,
        '_source': fields or DEFAULT_FIELDS
    }
Example #7
0
def documents_query(args, fields=None, facets=True):
    """Parse a user query string, compose and execute a query."""
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    q = text_query(text)
    q = authz_filter(q)

    # Sorting -- should this be passed into search directly, instead of
    # these aliases?
    sort_mode = args.get('sort', '').strip().lower()
    if text or sort_mode == 'score':
        sort = ['_score']
    elif sort_mode == 'newest':
        sort = [{'dates': 'desc'}, {'created_at': 'desc'}, '_score']
    elif sort_mode == 'oldest':
        sort = [{'dates': 'asc'}, {'created_at': 'asc'}, '_score']
    else:
        sort = [{'updated_at': 'desc'}, {'created_at': 'desc'}, '_score']

    filters = parse_filters(args)
    for entity in args.getlist('entity'):
        filters.append(('entities.id', entity))

    aggs = {'scoped': {'global': {}, 'aggs': {}}}
    if facets:
        facets = args.getlist('facet')
        if 'collections' in facets:
            aggs = facet_collections(q, aggs, filters)
            facets.remove('collections')
        if 'entities' in facets:
            aggs = facet_entities(aggs, args)
            facets.remove('entities')
        aggs = aggregate(q, aggs, facets)

    signals.document_query_process.send(q=q, args=args)
    return {
        'sort': sort,
        'query': filter_query(q, filters, OR_FIELDS),
        'aggregations': aggs,
        '_source': fields or DEFAULT_FIELDS
    }