def alert_query(alert): """Execute the query and return a set of results.""" q = text_query(alert.query_text) q = authz_filter(q) if alert.entity_id: q = filter_query(q, [('entities.id', alert.entity_id)], OR_FIELDS) if alert.notified_at: q = add_filter(q, {"range": {"created_at": {"gt": alert.notified_at}}}) q = {'query': q, 'size': 150} result, hits, output = execute_basic(TYPE_DOCUMENT, q) collections = {} for doc in hits.get('hits', []): document = doc.get('_source') document['id'] = int(doc.get('_id')) document['collections'] = [] for coll in document['collection_id']: if coll not in authz.collections(authz.READ): continue if coll not in collections: collections[coll] = Collection.by_id(coll) if collections[coll] is None: continue document['collections'].append(collections[coll]) document['records'] = {'results': [], 'total': 0} output['results'].append(document) return output
def execute_entities_query(args, query, doc_counts=False): """Execute the query and return a set of results.""" result, hits, output = execute_basic(TYPE_ENTITY, query) convert_entity_aggregations(result, output, args) sub_queries = [] for doc in hits.get('hits', []): entity = doc.get('_source') entity['id'] = doc.get('_id') entity['score'] = doc.get('_score') entity['api_url'] = url_for('entities_api.view', id=doc.get('_id')) output['results'].append(entity) sq = {'term': {'entities.id': entity['id']}} sq = authz_filter(sq) sq = {'size': 0, 'query': sq} sub_queries.append(json.dumps({})) sub_queries.append(json.dumps(sq)) if doc_counts and len(sub_queries): res = get_es().msearch(index=get_es_index(), doc_type=TYPE_DOCUMENT, body='\n'.join(sub_queries)) for (entity, res) in zip(output['results'], res.get('responses')): entity['doc_count'] = res.get('hits', {}).get('total') return output
def suggest_entities(args): """Auto-complete API.""" text = args.get('prefix') min_count = int(args.get('min_count', 0)) options = [] if text is not None and len(text.strip()): q = { 'bool': { 'must': [ {'match_phrase_prefix': {'terms': text.strip()}}, {'range': {'doc_count': {'gte': min_count}}} ] } } q = { 'size': 5, 'sort': [{'doc_count': 'desc'}, '_score'], 'query': authz_filter(q), '_source': ['name', '$schema', 'terms', 'doc_count'] } ref = latinize_text(text) result = get_es().search(index=get_es_index(), doc_type=TYPE_ENTITY, body=q) for res in result.get('hits', {}).get('hits', []): ent = res.get('_source') terms = [latinize_text(t) for t in ent.pop('terms', [])] ent['match'] = ref in terms ent['id'] = res.get('_id') options.append(ent) return { 'text': text, 'results': options }
def suggest_entities(prefix, min_count=0, schemas=None, size=5): """Auto-complete API.""" options = [] if prefix is not None and len(prefix.strip()): q = {'match_phrase_prefix': {'terms': prefix.strip()}} if min_count > 0: q = add_filter(q, {'range': {'doc_count': {'gte': min_count}}}) if schemas is not None and len(schemas): q = add_filter(q, {'terms': {'$schema': schemas}}) q = { 'size': size, 'sort': [{ 'doc_count': 'desc' }, '_score'], 'query': authz_filter(q), '_source': ['name', '$schema', 'terms', 'doc_count'] } ref = latinize_text(prefix) result = get_es().search(index=get_es_index(), doc_type=TYPE_ENTITY, body=q) for res in result.get('hits', {}).get('hits', []): ent = res.get('_source') terms = [latinize_text(t) for t in ent.pop('terms', [])] ent['match'] = ref in terms ent['score'] = res.get('_score') ent['id'] = res.get('_id') options.append(ent) return {'prefix': prefix, 'results': options}
def suggest_entities(prefix, min_count=0, schemas=None, size=5): """Auto-complete API.""" options = [] if prefix is not None and len(prefix.strip()): q = { 'match_phrase_prefix': {'terms': prefix.strip()} } if min_count > 0: q = add_filter(q, {'range': {'doc_count': {'gte': min_count}}}) if schemas is not None and len(schemas): q = add_filter(q, {'terms': {'$schema': schemas}}) q = { 'size': size, 'sort': [{'doc_count': 'desc'}, '_score'], 'query': authz_filter(q), '_source': ['name', '$schema', 'terms', 'doc_count'] } ref = latinize_text(prefix) result = get_es().search(index=get_es_index(), doc_type=TYPE_ENTITY, body=q) for res in result.get('hits', {}).get('hits', []): ent = res.get('_source') terms = [latinize_text(t) for t in ent.pop('terms', [])] ent['match'] = ref in terms ent['score'] = res.get('_score') ent['id'] = res.get('_id') options.append(ent) return { 'prefix': prefix, 'results': options }
def documents_query(args, fields=None, facets=True, newer_than=None): """Parse a user query string, compose and execute a query.""" if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() q = text_query(text) q = authz_filter(q) if newer_than is not None: q = add_filter(q, { "range": { "created_at": { "gt": newer_than } } }) # Sorting -- should this be passed into search directly, instead of # these aliases? sort_mode = args.get('sort', '').strip().lower() if text or sort_mode == 'score': sort = ['_score'] elif sort_mode == 'newest': sort = [{'dates': 'desc'}, {'created_at': 'desc'}, '_score'] elif sort_mode == 'oldest': sort = [{'dates': 'asc'}, {'created_at': 'asc'}, '_score'] else: sort = [{'updated_at': 'desc'}, {'created_at': 'desc'}, '_score'] # Extract filters, given in the form: &filter:foo_field=bla_value filters = [] for key in args.keys(): for value in args.getlist(key): if not key.startswith('filter:'): continue _, field = key.split(':', 1) filters.append((field, value)) for entity in args.getlist('entity'): filters.append(('entities.uuid', entity)) aggs = {} if facets: aggs = aggregate(q, args, filters) aggs = facet_source(q, aggs, filters) q = entity_collections(q, aggs, args, filters) return { 'sort': sort, 'query': filter_query(q, filters), 'aggregations': aggs, '_source': fields or DEFAULT_FIELDS }
def similar_entities(entity, args, collections): """Merge suggestions API.""" shoulds = [] for term in entity.terms: shoulds.append({ 'multi_match': { "fields": ["name^50", "terms^25", "summary^5"], "query": term, "fuzziness": 2 } }) shoulds.append({ 'multi_match': { "fields": ["name_latin^10", "terms_latin^5", "summary_latin"], "query": latinize_text(term), "fuzziness": 2 } }) q = { "bool": { "should": shoulds, "must_not": { "ids": { "values": [entity.id] } }, "must": { "terms": { "collection_id": collections } }, "minimum_should_match": 1 } } q = { 'size': 10, 'query': authz_filter(q), '_source': DEFAULT_FIELDS } options = [] result = get_es().search(index=get_es_index(), doc_type=TYPE_ENTITY, body=q) for res in result.get('hits', {}).get('hits', []): entity = res.get('_source') entity['id'] = res.get('_id') entity['score'] = res.get('_score') entity['api_url'] = url_for('entities_api.view', id=res.get('_id')) options.append(entity) return { 'results': options }
def entities_query(args, fields=None, facets=True): """Parse a user query string, compose and execute a query.""" if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() if text is None or not len(text): q = match_all() else: q = { "query_string": { "query": text, "fields": [ 'name^15', 'name_latin^5', 'terms^12', 'terms_latin^3', 'summary^10', 'summary_latin^7', 'description^5', 'description_latin^3' ], "default_operator": "AND", "use_dis_max": True } } q = authz_filter(q) filters = parse_filters(args) aggs = {'scoped': {'global': {}, 'aggs': {}}} if facets: facets = args.getlist('facet') if 'collections' in facets: aggs = facet_collections(q, aggs, filters) facets.remove('collections') aggs = aggregate(q, aggs, facets) sort_mode = args.get('sort', '').strip().lower() default_sort = 'score' if len(text) else 'doc_count' sort_mode = sort_mode or default_sort if sort_mode == 'doc_count': sort = [{'doc_count': 'desc'}, '_score'] elif sort_mode == 'alphabet': sort = [{'name': 'asc'}, '_score'] elif sort_mode == 'score': sort = ['_score'] return { 'sort': sort, 'query': filter_query(q, filters, OR_FIELDS), 'aggregations': aggs, '_source': fields or DEFAULT_FIELDS }
def similar_entities(entity, args, collections): """Merge suggestions API.""" shoulds = [] for term in entity.terms: shoulds.append({ 'multi_match': { "fields": ["name^50", "terms^25", "summary^5"], "query": term, "fuzziness": 2 } }) shoulds.append({ 'multi_match': { "fields": ["name_latin^10", "terms_latin^5", "summary_latin"], "query": latinize_text(term), "fuzziness": 2 } }) q = { "bool": { "should": shoulds, "must_not": { "ids": { "values": [entity.id] } }, "must": { "terms": { "collection_id": collections } }, "minimum_should_match": 1 } } q = {'size': 10, 'query': authz_filter(q), '_source': DEFAULT_FIELDS} options = [] result = get_es().search(index=get_es_index(), doc_type=TYPE_ENTITY, body=q) for res in result.get('hits', {}).get('hits', []): entity = res.get('_source') entity['id'] = res.get('_id') entity['score'] = res.get('_score') entity['api_url'] = url_for('entities_api.view', id=res.get('_id')) options.append(entity) return {'results': options}
def entities_query(args, fields=None, facets=True): """Parse a user query string, compose and execute a query.""" if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() if text is None or not len(text): q = match_all() else: q = { "query_string": { "query": text, "fields": ['name^15', 'name_latin^5', 'terms^12', 'terms_latin^3', 'summary^10', 'summary_latin^7', 'description^5', 'description_latin^3'], "default_operator": "AND", "use_dis_max": True } } q = authz_filter(q) filters = parse_filters(args) aggs = {'scoped': {'global': {}, 'aggs': {}}} if facets: facets = args.getlist('facet') if 'collections' in facets: aggs = facet_collections(q, aggs, filters) facets.remove('collections') aggs = aggregate(q, aggs, facets) sort_mode = args.get('sort', '').strip().lower() default_sort = 'score' if len(text) else 'doc_count' sort_mode = sort_mode or default_sort if sort_mode == 'doc_count': sort = [{'doc_count': 'desc'}, '_score'] elif sort_mode == 'alphabet': sort = [{'name': 'asc'}, '_score'] elif sort_mode == 'score': sort = ['_score'] return { 'sort': sort, 'query': filter_query(q, filters, OR_FIELDS), 'aggregations': aggs, '_source': fields or DEFAULT_FIELDS }
def alert_query(alert): """Execute the query and return a set of results.""" q = text_query(alert.query_text) q = authz_filter(q) if alert.entity_id: q = filter_query(q, [('entities.id', alert.entity_id)], OR_FIELDS) if alert.notified_at: q = add_filter(q, { "range": { "created_at": { "gt": alert.notified_at } } }) q = { 'query': q, 'size': 150 } result, hits, output = execute_basic(TYPE_DOCUMENT, q) sub_queries = [] collections = {} for doc in hits.get('hits', []): document = doc.get('_source') document['id'] = int(doc.get('_id')) document['collections'] = [] for coll in document['collection_id']: if coll not in authz.collections(authz.READ): continue if coll not in collections: collections[coll] = Collection.by_id(coll) if collections[coll] is None: continue document['collections'].append(collections[coll]) document['records'] = {'results': [], 'total': 0} sq = records_query(document['id'], alert.to_query(), size=1) if sq is not None: sub_queries.append(json.dumps({})) sub_queries.append(json.dumps(sq)) output['results'].append(document) run_sub_queries(output, sub_queries) return output
def documents_query(args, fields=None, facets=True): """Parse a user query string, compose and execute a query.""" if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() q = text_query(text) q = authz_filter(q) # Sorting -- should this be passed into search directly, instead of # these aliases? sort_mode = args.get('sort', '').strip().lower() if text or sort_mode == 'score': sort = ['_score'] elif sort_mode == 'newest': sort = [{'dates': 'desc'}, {'created_at': 'desc'}, '_score'] elif sort_mode == 'oldest': sort = [{'dates': 'asc'}, {'created_at': 'asc'}, '_score'] else: sort = [{'updated_at': 'desc'}, {'created_at': 'desc'}, '_score'] filters = parse_filters(args) for entity in args.getlist('entity'): filters.append(('entities.id', entity)) aggs = {'scoped': {'global': {}, 'aggs': {}}} if facets: facets = args.getlist('facet') if 'collections' in facets: aggs = facet_collections(q, aggs, filters) facets.remove('collections') if 'entities' in facets: aggs = facet_entities(aggs, args) facets.remove('entities') aggs = aggregate(q, aggs, facets) signals.document_query_process.send(q=q, args=args) return { 'sort': sort, 'query': filter_query(q, filters, OR_FIELDS), 'aggregations': aggs, '_source': fields or DEFAULT_FIELDS }
def suggest_entities(args): """Auto-complete API.""" text = args.get('prefix') min_count = int(args.get('min_count', 0)) options = [] if text is not None and len(text.strip()): q = { 'bool': { 'must': [{ 'match_phrase_prefix': { 'terms': text.strip() } }, { 'range': { 'doc_count': { 'gte': min_count } } }] } } q = { 'size': 5, 'sort': [{ 'doc_count': 'desc' }, '_score'], 'query': authz_filter(q), '_source': ['name', '$schema', 'terms', 'doc_count'] } ref = latinize_text(text) result = get_es().search(index=get_es_index(), doc_type=TYPE_ENTITY, body=q) for res in result.get('hits', {}).get('hits', []): ent = res.get('_source') terms = [latinize_text(t) for t in ent.pop('terms', [])] ent['match'] = ref in terms ent['id'] = res.get('_id') options.append(ent) return {'text': text, 'results': options}