def search(query, index=None, filters=list(), size=10, include="*", exclude="authors", offset=0, sort_field=None, sort_order='', post_filter=None): """ Perform a search query. :param query: [string] query string e.g. 'higgs boson' :param index: [string] name of the index. If None a default is used :param filters: [list of tuples] list of filters for the query. Currently supported: ('author', author_fullname), ('collaboration', collaboration_name), ('date', date) :param size: [int] max number of hits that should be returned :param offset: [int] offset for the results (used for pagination) :param sort_by: [string] sorting field. Currently supported fields: "title", "collaboration", "date", "relevance" :param sort_order: [string] order of the sorting either original (for a particular field) or reversed. Supported: '' or 'rev' :return: [dict] dictionary with processed results and facets """ # If empty query then sort by date if query == '' and not sort_field: sort_field = 'date' query = HEPDataQueryParser.parse_query(query) # Build core query data_query = get_query_by_type(CFG_DATA_TYPE, query) pub_query = get_query_by_type(CFG_PUB_TYPE, query) authors_query = get_authors_query(query) query_builder = QueryBuilder() query_builder.add_child_parent_relation( CFG_DATA_TYPE, relation="child", related_query=data_query, other_queries=[pub_query, authors_query]) # Add additional options query_builder.add_pagination(size=size, offset=offset) query_builder.add_sorting(sort_field=sort_field, sort_order=sort_order) query_builder.add_filters(filters) query_builder.add_post_filter(post_filter) query_builder.add_aggregations() query_builder.add_source_filter(include, exclude) pub_result = es.search(index=index, body=query_builder.query, doc_type=CFG_PUB_TYPE) parent_filter = { "filtered": { "filter": { "terms": { "_id": [hit["_id"] for hit in pub_result['hits']['hits']] } } } } query_builder = QueryBuilder() query_builder.add_child_parent_relation(CFG_PUB_TYPE, relation="parent", related_query=parent_filter, must=True, other_queries=[data_query]) query_builder.add_pagination(size=size * 50) data_result = es.search(index=index, body=query_builder.query, doc_type=CFG_DATA_TYPE) merged_results = merge_results(pub_result, data_result) return map_result(merged_results)
def search( query, index=None, filters=list(), size=10, include="*", exclude="", offset=0, sort_field=None, sort_order="", post_filter=None, ): """ Perform a search query. :param query: [string] query string e.g. 'higgs boson' :param index: [string] name of the index. If None a default is used :param filters: [list of tuples] list of filters for the query. Currently supported: ('author', author_fullname), ('collaboration', collaboration_name), ('date', date) :param size: [int] max number of hits that should be returned :param offset: [int] offset for the results (used for pagination) :param sort_by: [string] sorting field. Currently supported fields: "title", "collaboration", "date", "relevance" :param sort_order: [string] order of the sorting either original (for a particular field) or reversed. Supported: '' or 'rev' :return: [dict] dictionary with processed results and facets """ # If empty query then sort by date if query == "" and not sort_field: sort_field = "date" query = HEPDataQueryParser.parse_query(query) # Build core query data_query = get_query_by_type(CFG_DATA_TYPE, query) pub_query = get_query_by_type(CFG_PUB_TYPE, query) authors_query = get_authors_query(query) query_builder = QueryBuilder() query_builder.add_child_parent_relation( CFG_DATA_TYPE, relation="child", related_query=data_query, other_queries=[pub_query, authors_query] ) # Add additional options query_builder.add_pagination(size=size, offset=offset) query_builder.add_sorting(sort_field=sort_field, sort_order=sort_order) query_builder.add_filters(filters) query_builder.add_post_filter(post_filter) query_builder.add_aggregations() query_builder.add_source_filter(include, exclude) pub_result = es.search(index=index, body=query_builder.query, doc_type=CFG_PUB_TYPE) parent_filter = {"filtered": {"filter": {"terms": {"_id": [hit["_id"] for hit in pub_result["hits"]["hits"]]}}}} query_builder = QueryBuilder() query_builder.add_child_parent_relation( CFG_PUB_TYPE, relation="parent", related_query=parent_filter, must=True, other_queries=[data_query] ) query_builder.add_pagination(size=size * 50) data_result = es.search(index=index, body=query_builder.query, doc_type=CFG_DATA_TYPE) merged_results = merge_results(pub_result, data_result) return map_result(merged_results)