def suggest_entities(prefix, authz, min_count=0, schemas=None, size=5): """Auto-complete API.""" options = [] if prefix is not None and len(prefix.strip()): q = {'match_phrase_prefix': {'name': prefix.strip()}} if min_count > 0: q = add_filter(q, {'range': {'doc_count': {'gte': min_count}}}) if schemas is not None and len(schemas): q = add_filter(q, {'terms': {'$schema': schemas}}) # TODO: is this correct? should we allow filter by dataset entities? q = add_filter(q, {'terms': {'collection_id': authz.collections_read}}) q = { 'size': size, 'sort': [{ 'doc_count': 'desc' }, '_score'], 'query': q, '_source': ['name', 'schema', 'fingerprints', 'doc_count'] } ref = ascii_text(prefix) result = es.search(index=es_index, doc_type=TYPE_ENTITY, body=q) for res in result.get('hits', {}).get('hits', []): ent = res.get('_source') terms = [ascii_text(t) for t in ent.pop('fingerprints', [])] ent['match'] = ref in terms ent['score'] = res.get('_score') ent['id'] = res.get('_id') options.append(ent) return {'prefix': prefix, 'results': options}
def suggest_entities(prefix, min_count=0, schemas=None, size=5): """Auto-complete API.""" options = [] if prefix is not None and len(prefix.strip()): q = {'match_phrase_prefix': {'terms': prefix.strip()}} if min_count > 0: q = add_filter(q, {'range': {'doc_count': {'gte': min_count}}}) if schemas is not None and len(schemas): q = add_filter(q, {'terms': {'$schema': schemas}}) q = { 'size': size, 'sort': [{ 'doc_count': 'desc' }, '_score'], 'query': authz_filter(q), '_source': ['name', '$schema', 'terms', 'doc_count'] } ref = latinize_text(prefix) result = get_es().search(index=get_es_index(), doc_type=TYPE_ENTITY, body=q) for res in result.get('hits', {}).get('hits', []): ent = res.get('_source') terms = [latinize_text(t) for t in ent.pop('terms', [])] ent['match'] = ref in terms ent['score'] = res.get('_score') ent['id'] = res.get('_id') options.append(ent) return {'prefix': prefix, 'results': options}
def suggest_entities(prefix, min_count=0, schemas=None, size=5): """Auto-complete API.""" options = [] if prefix is not None and len(prefix.strip()): q = { 'match_phrase_prefix': {'terms': prefix.strip()} } if min_count > 0: q = add_filter(q, {'range': {'doc_count': {'gte': min_count}}}) if schemas is not None and len(schemas): q = add_filter(q, {'terms': {'$schema': schemas}}) q = { 'size': size, 'sort': [{'doc_count': 'desc'}, '_score'], 'query': authz_filter(q), '_source': ['name', '$schema', 'terms', 'doc_count'] } ref = latinize_text(prefix) result = get_es().search(index=get_es_index(), doc_type=TYPE_ENTITY, body=q) for res in result.get('hits', {}).get('hits', []): ent = res.get('_source') terms = [latinize_text(t) for t in ent.pop('terms', [])] ent['match'] = ref in terms ent['score'] = res.get('_score') ent['id'] = res.get('_id') options.append(ent) return { 'prefix': prefix, 'results': options }
def links_query(origin, state): """Parse a user query string, compose and execute a query.""" if state.has_text: q = { "query_string": { "query": state.text, "fields": ['name^5', 'names^2', 'text'], "default_operator": "AND", "use_dis_max": True } } else: q = match_all() ids = origin.get('ids') or [origin.get('id')] q = add_filter(q, {'terms': {'origin.id': ids}}) q = authz_filter(q, state.authz, roles=True) aggs = {'scoped': {'global': {}, 'aggs': {}}} aggs = aggregate(state, q, aggs, state.facet_names) if state.sort == 'score': sort = ['_score'] else: sort = [{ 'properties.start_date': 'desc' }, { 'properties.end_date': 'desc' }] q = { 'sort': sort, 'query': filter_query(q, state.filters), 'aggregations': aggs, 'size': state.limit, 'from': state.offset, '_source': DEFAULT_FIELDS } result, hits, output = execute_basic(TYPE_LINK, q) output['facets'] = parse_facet_result(state, result) for doc in hits.get('hits', []): link = doc.get('_source') link['id'] = doc.get('_id') link['score'] = doc.get('_score') output['results'].append(link) return output
def entities_query(state, fields=None, facets=True, doc_counts=False): """Parse a user query string, compose and execute a query.""" if state.has_text: q = { "query_string": { "query": state.text, "fields": ['name^5', 'names^2', 'text'], "default_operator": "AND", "use_dis_max": True } } else: q = match_all() if state.raw_query: q = {"bool": {"must": [q, state.raw_query]}} q = authz_filter(q, state.authz, roles=True) aggs = {'scoped': {'global': {}, 'aggs': {}}} if facets: facets = list(state.facet_names) if 'collections' in facets: aggs = facet_collections(state, q, aggs) facets.remove('collections') aggs = aggregate(state, q, aggs, facets) if state.sort == 'doc_count': sort = [{'doc_count': 'desc'}, '_score'] elif state.sort == 'score': sort = ['_score', {'name_sort': 'asc'}] else: sort = [{'name_sort': 'asc'}] # pprint(q) q = { 'sort': sort, 'query': filter_query(q, state.filters), 'aggregations': aggs, 'size': state.limit, 'from': state.offset, '_source': fields or DEFAULT_FIELDS } result, hits, output = execute_basic(TYPE_ENTITY, q) output['facets'] = parse_facet_result(state, result) sub_queries = [] for doc in hits.get('hits', []): entity = doc.get('_source') entity['id'] = doc.get('_id') entity['score'] = doc.get('_score') entity['api_url'] = url_for('entities_api.view', id=doc.get('_id')) output['results'].append(entity) sq = {'term': {'entities.id': entity['id']}} sq = add_filter( sq, {'terms': { 'collection_id': state.authz.collections_read }}) sq = {'size': 0, 'query': sq} sub_queries.append(json.dumps({})) sub_queries.append(json.dumps(sq)) if doc_counts and len(sub_queries): # Get the number of matching documents for each entity. body = '\n'.join(sub_queries) res = es.msearch(index=es_index, doc_type=TYPE_DOCUMENT, body=body) for (entity, res) in zip(output['results'], res.get('responses')): entity['doc_count'] = res.get('hits', {}).get('total') return output