def get_n_latest_records(n_latest, field="last_updated", index=None): """ Gets latest N records from the index """ query = {"size": n_latest, "query": QueryBuilder.generate_query_string(), "sort": [{field: {"order": "desc"}}]} query_result = es.search(index=index, doc_type=CFG_PUB_TYPE, body=query) return query_result["hits"]["hits"]
def get_n_latest_records(n_latest, field="last_updated", index=None): """ Gets latest N records from the index """ query = { "size": n_latest, "query": QueryBuilder.generate_query_string(), "sort": [{ field: { "order": "desc" } }] } query_result = es.search(index=index, doc_type=CFG_PUB_TYPE, body=query) return query_result['hits']['hits']
def search(query, index=None, filters=list(), size=10, include="*", exclude="authors", offset=0, sort_field=None, sort_order='', post_filter=None): """ Perform a search query. :param query: [string] query string e.g. 'higgs boson' :param index: [string] name of the index. If None a default is used :param filters: [list of tuples] list of filters for the query. Currently supported: ('author', author_fullname), ('collaboration', collaboration_name), ('date', date) :param size: [int] max number of hits that should be returned :param offset: [int] offset for the results (used for pagination) :param sort_by: [string] sorting field. Currently supported fields: "title", "collaboration", "date", "relevance" :param sort_order: [string] order of the sorting either original (for a particular field) or reversed. Supported: '' or 'rev' :return: [dict] dictionary with processed results and facets """ # If empty query then sort by date if query == '' and not sort_field: sort_field = 'date' query = HEPDataQueryParser.parse_query(query) # Build core query data_query = QueryBuilder.generate_query_string(query) pub_query = QueryBuilder.generate_query_string(query) authors_query = QueryBuilder.generate_nested_query('authors', query) query_builder = QueryBuilder() query_builder.add_child_parent_relation( CFG_DATA_TYPE, relation="child", related_query=data_query, other_queries=[pub_query, authors_query]) # Add additional options query_builder.add_pagination(size=size, offset=offset) query_builder.add_sorting(sort_field=sort_field, sort_order=sort_order) query_builder.add_filters(filters) query_builder.add_post_filter(post_filter) query_builder.add_aggregations() query_builder.add_source_filter(include, exclude) if query: # Randomize search among the available shard copies. pub_result = es.search(index=index, body=query_builder.query, doc_type=CFG_PUB_TYPE) else: # Execute search only on the primary shards (to ensure no missing or duplicate results). pub_result = es.search(index=index, body=query_builder.query, doc_type=CFG_PUB_TYPE, preference="_primary") parent_filter = { "filtered": { "filter": { "terms": { "_id": [hit["_id"] for hit in pub_result['hits']['hits']] } } } } query_builder = QueryBuilder() query_builder.add_child_parent_relation(CFG_PUB_TYPE, relation="parent", related_query=parent_filter, must=True, other_queries=[data_query]) query_builder.add_pagination(size=size * 50) data_result = es.search(index=index, body=query_builder.query, doc_type=CFG_DATA_TYPE) merged_results = merge_results(pub_result, data_result) return map_result(merged_results)