Example #1
0
def alert_query(alert):
    """Execute the query and return a set of results."""
    q = text_query(alert.query_text)
    q = authz_filter(q)
    if alert.entity_id:
        q = filter_query(q, [('entities.id', alert.entity_id)], OR_FIELDS)
    if alert.notified_at:
        q = add_filter(q, {"range": {"created_at": {"gt": alert.notified_at}}})
    q = {'query': q, 'size': 150}

    result, hits, output = execute_basic(TYPE_DOCUMENT, q)
    collections = {}
    for doc in hits.get('hits', []):
        document = doc.get('_source')
        document['id'] = int(doc.get('_id'))
        document['collections'] = []
        for coll in document['collection_id']:
            if coll not in authz.collections(authz.READ):
                continue
            if coll not in collections:
                collections[coll] = Collection.by_id(coll)
            if collections[coll] is None:
                continue
            document['collections'].append(collections[coll])
        document['records'] = {'results': [], 'total': 0}
        output['results'].append(document)
    return output
Example #2
0
def execute_entities_query(args, query, doc_counts=False):
    """Execute the query and return a set of results."""
    result, hits, output = execute_basic(TYPE_ENTITY, query)
    convert_entity_aggregations(result, output, args)
    sub_queries = []
    for doc in hits.get('hits', []):
        entity = doc.get('_source')
        entity['id'] = doc.get('_id')
        entity['score'] = doc.get('_score')
        entity['api_url'] = url_for('entities_api.view', id=doc.get('_id'))
        output['results'].append(entity)

        sq = {'term': {'entities.id': entity['id']}}
        sq = authz_filter(sq)
        sq = {'size': 0, 'query': sq}
        sub_queries.append(json.dumps({}))
        sub_queries.append(json.dumps(sq))

    if doc_counts and len(sub_queries):
        res = get_es().msearch(index=get_es_index(),
                               doc_type=TYPE_DOCUMENT,
                               body='\n'.join(sub_queries))
        for (entity, res) in zip(output['results'], res.get('responses')):
            entity['doc_count'] = res.get('hits', {}).get('total')
    return output
Example #3
0
def suggest_entities(args):
    """Auto-complete API."""
    text = args.get('prefix')
    min_count = int(args.get('min_count', 0))
    options = []
    if text is not None and len(text.strip()):
        q = {
            'bool': {
                'must': [
                    {'match_phrase_prefix': {'terms': text.strip()}},
                    {'range': {'doc_count': {'gte': min_count}}}
                ]
            }
        }
        q = {
            'size': 5,
            'sort': [{'doc_count': 'desc'}, '_score'],
            'query': authz_filter(q),
            '_source': ['name', '$schema', 'terms', 'doc_count']
        }
        ref = latinize_text(text)
        result = get_es().search(index=get_es_index(), doc_type=TYPE_ENTITY,
                                 body=q)
        for res in result.get('hits', {}).get('hits', []):
            ent = res.get('_source')
            terms = [latinize_text(t) for t in ent.pop('terms', [])]
            ent['match'] = ref in terms
            ent['id'] = res.get('_id')
            options.append(ent)
    return {
        'text': text,
        'results': options
    }
Example #4
0
def execute_entities_query(args, query, doc_counts=False):
    """Execute the query and return a set of results."""
    result, hits, output = execute_basic(TYPE_ENTITY, query)
    convert_entity_aggregations(result, output, args)
    sub_queries = []
    for doc in hits.get('hits', []):
        entity = doc.get('_source')
        entity['id'] = doc.get('_id')
        entity['score'] = doc.get('_score')
        entity['api_url'] = url_for('entities_api.view', id=doc.get('_id'))
        output['results'].append(entity)

        sq = {'term': {'entities.id': entity['id']}}
        sq = authz_filter(sq)
        sq = {'size': 0, 'query': sq}
        sub_queries.append(json.dumps({}))
        sub_queries.append(json.dumps(sq))

    if doc_counts and len(sub_queries):
        res = get_es().msearch(index=get_es_index(),
                               doc_type=TYPE_DOCUMENT,
                               body='\n'.join(sub_queries))
        for (entity, res) in zip(output['results'], res.get('responses')):
            entity['doc_count'] = res.get('hits', {}).get('total')
    return output
Example #5
0
def suggest_entities(prefix, min_count=0, schemas=None, size=5):
    """Auto-complete API."""
    options = []
    if prefix is not None and len(prefix.strip()):
        q = {'match_phrase_prefix': {'terms': prefix.strip()}}
        if min_count > 0:
            q = add_filter(q, {'range': {'doc_count': {'gte': min_count}}})
        if schemas is not None and len(schemas):
            q = add_filter(q, {'terms': {'$schema': schemas}})
        q = {
            'size': size,
            'sort': [{
                'doc_count': 'desc'
            }, '_score'],
            'query': authz_filter(q),
            '_source': ['name', '$schema', 'terms', 'doc_count']
        }
        ref = latinize_text(prefix)
        result = get_es().search(index=get_es_index(),
                                 doc_type=TYPE_ENTITY,
                                 body=q)
        for res in result.get('hits', {}).get('hits', []):
            ent = res.get('_source')
            terms = [latinize_text(t) for t in ent.pop('terms', [])]
            ent['match'] = ref in terms
            ent['score'] = res.get('_score')
            ent['id'] = res.get('_id')
            options.append(ent)
    return {'prefix': prefix, 'results': options}
Example #6
0
def suggest_entities(prefix, min_count=0, schemas=None, size=5):
    """Auto-complete API."""
    options = []
    if prefix is not None and len(prefix.strip()):
        q = {
            'match_phrase_prefix': {'terms': prefix.strip()}
        }
        if min_count > 0:
            q = add_filter(q, {'range': {'doc_count': {'gte': min_count}}})
        if schemas is not None and len(schemas):
            q = add_filter(q, {'terms': {'$schema': schemas}})
        q = {
            'size': size,
            'sort': [{'doc_count': 'desc'}, '_score'],
            'query': authz_filter(q),
            '_source': ['name', '$schema', 'terms', 'doc_count']
        }
        ref = latinize_text(prefix)
        result = get_es().search(index=get_es_index(), doc_type=TYPE_ENTITY,
                                 body=q)
        for res in result.get('hits', {}).get('hits', []):
            ent = res.get('_source')
            terms = [latinize_text(t) for t in ent.pop('terms', [])]
            ent['match'] = ref in terms
            ent['score'] = res.get('_score')
            ent['id'] = res.get('_id')
            options.append(ent)
    return {
        'prefix': prefix,
        'results': options
    }
Example #7
0
def documents_query(args, fields=None, facets=True, newer_than=None):
    """Parse a user query string, compose and execute a query."""
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    q = text_query(text)
    q = authz_filter(q)

    if newer_than is not None:
        q = add_filter(q, {
            "range": {
                "created_at": {
                    "gt": newer_than
                }
            }
        })

    # Sorting -- should this be passed into search directly, instead of
    # these aliases?
    sort_mode = args.get('sort', '').strip().lower()
    if text or sort_mode == 'score':
        sort = ['_score']
    elif sort_mode == 'newest':
        sort = [{'dates': 'desc'}, {'created_at': 'desc'}, '_score']
    elif sort_mode == 'oldest':
        sort = [{'dates': 'asc'}, {'created_at': 'asc'}, '_score']
    else:
        sort = [{'updated_at': 'desc'}, {'created_at': 'desc'}, '_score']

    # Extract filters, given in the form: &filter:foo_field=bla_value
    filters = []
    for key in args.keys():
        for value in args.getlist(key):
            if not key.startswith('filter:'):
                continue
            _, field = key.split(':', 1)
            filters.append((field, value))

    for entity in args.getlist('entity'):
        filters.append(('entities.uuid', entity))

    aggs = {}
    if facets:
        aggs = aggregate(q, args, filters)
        aggs = facet_source(q, aggs, filters)
        q = entity_collections(q, aggs, args, filters)

    return {
        'sort': sort,
        'query': filter_query(q, filters),
        'aggregations': aggs,
        '_source': fields or DEFAULT_FIELDS
    }
Example #8
0
def similar_entities(entity, args, collections):
    """Merge suggestions API."""
    shoulds = []
    for term in entity.terms:
        shoulds.append({
            'multi_match': {
                "fields": ["name^50", "terms^25", "summary^5"],
                "query": term,
                "fuzziness": 2
            }
        })
        shoulds.append({
            'multi_match': {
                "fields": ["name_latin^10", "terms_latin^5", "summary_latin"],
                "query": latinize_text(term),
                "fuzziness": 2
            }
        })

    q = {
        "bool": {
            "should": shoulds,
            "must_not": {
                "ids": {
                    "values": [entity.id]
                }
            },
            "must": {
                "terms": {
                    "collection_id": collections
                }
            },
            "minimum_should_match": 1
        }
    }
    q = {
        'size': 10,
        'query': authz_filter(q),
        '_source': DEFAULT_FIELDS
    }
    options = []
    result = get_es().search(index=get_es_index(), doc_type=TYPE_ENTITY,
                             body=q)
    for res in result.get('hits', {}).get('hits', []):
        entity = res.get('_source')
        entity['id'] = res.get('_id')
        entity['score'] = res.get('_score')
        entity['api_url'] = url_for('entities_api.view', id=res.get('_id'))
        options.append(entity)
    return {
        'results': options
    }
Example #9
0
def entities_query(args, fields=None, facets=True):
    """Parse a user query string, compose and execute a query."""
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    if text is None or not len(text):
        q = match_all()
    else:
        q = {
            "query_string": {
                "query":
                text,
                "fields": [
                    'name^15', 'name_latin^5', 'terms^12', 'terms_latin^3',
                    'summary^10', 'summary_latin^7', 'description^5',
                    'description_latin^3'
                ],
                "default_operator":
                "AND",
                "use_dis_max":
                True
            }
        }

    q = authz_filter(q)
    filters = parse_filters(args)
    aggs = {'scoped': {'global': {}, 'aggs': {}}}
    if facets:
        facets = args.getlist('facet')
        if 'collections' in facets:
            aggs = facet_collections(q, aggs, filters)
            facets.remove('collections')
        aggs = aggregate(q, aggs, facets)

    sort_mode = args.get('sort', '').strip().lower()
    default_sort = 'score' if len(text) else 'doc_count'
    sort_mode = sort_mode or default_sort
    if sort_mode == 'doc_count':
        sort = [{'doc_count': 'desc'}, '_score']
    elif sort_mode == 'alphabet':
        sort = [{'name': 'asc'}, '_score']
    elif sort_mode == 'score':
        sort = ['_score']

    return {
        'sort': sort,
        'query': filter_query(q, filters, OR_FIELDS),
        'aggregations': aggs,
        '_source': fields or DEFAULT_FIELDS
    }
Example #10
0
def similar_entities(entity, args, collections):
    """Merge suggestions API."""
    shoulds = []
    for term in entity.terms:
        shoulds.append({
            'multi_match': {
                "fields": ["name^50", "terms^25", "summary^5"],
                "query": term,
                "fuzziness": 2
            }
        })
        shoulds.append({
            'multi_match': {
                "fields": ["name_latin^10", "terms_latin^5", "summary_latin"],
                "query": latinize_text(term),
                "fuzziness": 2
            }
        })

    q = {
        "bool": {
            "should": shoulds,
            "must_not": {
                "ids": {
                    "values": [entity.id]
                }
            },
            "must": {
                "terms": {
                    "collection_id": collections
                }
            },
            "minimum_should_match": 1
        }
    }
    q = {'size': 10, 'query': authz_filter(q), '_source': DEFAULT_FIELDS}
    options = []
    result = get_es().search(index=get_es_index(),
                             doc_type=TYPE_ENTITY,
                             body=q)
    for res in result.get('hits', {}).get('hits', []):
        entity = res.get('_source')
        entity['id'] = res.get('_id')
        entity['score'] = res.get('_score')
        entity['api_url'] = url_for('entities_api.view', id=res.get('_id'))
        options.append(entity)
    return {'results': options}
Example #11
0
def entities_query(args, fields=None, facets=True):
    """Parse a user query string, compose and execute a query."""
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    if text is None or not len(text):
        q = match_all()
    else:
        q = {
            "query_string": {
                "query": text,
                "fields": ['name^15', 'name_latin^5',
                           'terms^12', 'terms_latin^3',
                           'summary^10', 'summary_latin^7',
                           'description^5', 'description_latin^3'],
                "default_operator": "AND",
                "use_dis_max": True
            }
        }

    q = authz_filter(q)
    filters = parse_filters(args)
    aggs = {'scoped': {'global': {}, 'aggs': {}}}
    if facets:
        facets = args.getlist('facet')
        if 'collections' in facets:
            aggs = facet_collections(q, aggs, filters)
            facets.remove('collections')
        aggs = aggregate(q, aggs, facets)

    sort_mode = args.get('sort', '').strip().lower()
    default_sort = 'score' if len(text) else 'doc_count'
    sort_mode = sort_mode or default_sort
    if sort_mode == 'doc_count':
        sort = [{'doc_count': 'desc'}, '_score']
    elif sort_mode == 'alphabet':
        sort = [{'name': 'asc'}, '_score']
    elif sort_mode == 'score':
        sort = ['_score']

    return {
        'sort': sort,
        'query': filter_query(q, filters, OR_FIELDS),
        'aggregations': aggs,
        '_source': fields or DEFAULT_FIELDS
    }
Example #12
0
def alert_query(alert):
    """Execute the query and return a set of results."""
    q = text_query(alert.query_text)
    q = authz_filter(q)
    if alert.entity_id:
        q = filter_query(q, [('entities.id', alert.entity_id)], OR_FIELDS)
    if alert.notified_at:
        q = add_filter(q, {
            "range": {
                "created_at": {
                    "gt": alert.notified_at
                }
            }
        })
    q = {
        'query': q,
        'size': 150
    }

    result, hits, output = execute_basic(TYPE_DOCUMENT, q)
    sub_queries = []
    collections = {}
    for doc in hits.get('hits', []):
        document = doc.get('_source')
        document['id'] = int(doc.get('_id'))
        document['collections'] = []
        for coll in document['collection_id']:
            if coll not in authz.collections(authz.READ):
                continue
            if coll not in collections:
                collections[coll] = Collection.by_id(coll)
            if collections[coll] is None:
                continue
            document['collections'].append(collections[coll])
        document['records'] = {'results': [], 'total': 0}

        sq = records_query(document['id'], alert.to_query(), size=1)
        if sq is not None:
            sub_queries.append(json.dumps({}))
            sub_queries.append(json.dumps(sq))
        output['results'].append(document)

    run_sub_queries(output, sub_queries)
    return output
Example #13
0
def documents_query(args, fields=None, facets=True):
    """Parse a user query string, compose and execute a query."""
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    q = text_query(text)
    q = authz_filter(q)

    # Sorting -- should this be passed into search directly, instead of
    # these aliases?
    sort_mode = args.get('sort', '').strip().lower()
    if text or sort_mode == 'score':
        sort = ['_score']
    elif sort_mode == 'newest':
        sort = [{'dates': 'desc'}, {'created_at': 'desc'}, '_score']
    elif sort_mode == 'oldest':
        sort = [{'dates': 'asc'}, {'created_at': 'asc'}, '_score']
    else:
        sort = [{'updated_at': 'desc'}, {'created_at': 'desc'}, '_score']

    filters = parse_filters(args)
    for entity in args.getlist('entity'):
        filters.append(('entities.id', entity))

    aggs = {'scoped': {'global': {}, 'aggs': {}}}
    if facets:
        facets = args.getlist('facet')
        if 'collections' in facets:
            aggs = facet_collections(q, aggs, filters)
            facets.remove('collections')
        if 'entities' in facets:
            aggs = facet_entities(aggs, args)
            facets.remove('entities')
        aggs = aggregate(q, aggs, facets)

    signals.document_query_process.send(q=q, args=args)
    return {
        'sort': sort,
        'query': filter_query(q, filters, OR_FIELDS),
        'aggregations': aggs,
        '_source': fields or DEFAULT_FIELDS
    }
Example #14
0
def documents_query(args, fields=None, facets=True):
    """Parse a user query string, compose and execute a query."""
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    q = text_query(text)
    q = authz_filter(q)

    # Sorting -- should this be passed into search directly, instead of
    # these aliases?
    sort_mode = args.get('sort', '').strip().lower()
    if text or sort_mode == 'score':
        sort = ['_score']
    elif sort_mode == 'newest':
        sort = [{'dates': 'desc'}, {'created_at': 'desc'}, '_score']
    elif sort_mode == 'oldest':
        sort = [{'dates': 'asc'}, {'created_at': 'asc'}, '_score']
    else:
        sort = [{'updated_at': 'desc'}, {'created_at': 'desc'}, '_score']

    filters = parse_filters(args)
    for entity in args.getlist('entity'):
        filters.append(('entities.id', entity))

    aggs = {'scoped': {'global': {}, 'aggs': {}}}
    if facets:
        facets = args.getlist('facet')
        if 'collections' in facets:
            aggs = facet_collections(q, aggs, filters)
            facets.remove('collections')
        if 'entities' in facets:
            aggs = facet_entities(aggs, args)
            facets.remove('entities')
        aggs = aggregate(q, aggs, facets)

    signals.document_query_process.send(q=q, args=args)
    return {
        'sort': sort,
        'query': filter_query(q, filters, OR_FIELDS),
        'aggregations': aggs,
        '_source': fields or DEFAULT_FIELDS
    }
Example #15
0
def suggest_entities(args):
    """Auto-complete API."""
    text = args.get('prefix')
    min_count = int(args.get('min_count', 0))
    options = []
    if text is not None and len(text.strip()):
        q = {
            'bool': {
                'must': [{
                    'match_phrase_prefix': {
                        'terms': text.strip()
                    }
                }, {
                    'range': {
                        'doc_count': {
                            'gte': min_count
                        }
                    }
                }]
            }
        }
        q = {
            'size': 5,
            'sort': [{
                'doc_count': 'desc'
            }, '_score'],
            'query': authz_filter(q),
            '_source': ['name', '$schema', 'terms', 'doc_count']
        }
        ref = latinize_text(text)
        result = get_es().search(index=get_es_index(),
                                 doc_type=TYPE_ENTITY,
                                 body=q)
        for res in result.get('hits', {}).get('hits', []):
            ent = res.get('_source')
            terms = [latinize_text(t) for t in ent.pop('terms', [])]
            ent['match'] = ref in terms
            ent['id'] = res.get('_id')
            options.append(ent)
    return {'text': text, 'results': options}