Ejemplo n.º 1
0
def search(query,
           index=None,
           filters=list(),
           size=10,
           include="*",
           exclude="authors",
           offset=0,
           sort_field=None,
           sort_order='',
           post_filter=None):
    """ Perform a search query.

    :param query: [string] query string e.g. 'higgs boson'
    :param index: [string] name of the index. If None a default is used
    :param filters: [list of tuples] list of filters for the query.
                    Currently supported: ('author', author_fullname),
                    ('collaboration', collaboration_name), ('date', date)
    :param size: [int] max number of hits that should be returned
    :param offset: [int] offset for the results (used for pagination)
    :param sort_by: [string] sorting field. Currently supported fields:
                    "title", "collaboration", "date", "relevance"
    :param sort_order: [string] order of the sorting either original
                    (for a particular field) or reversed. Supported:
                    '' or 'rev'

    :return: [dict] dictionary with processed results and facets
    """
    # If empty query then sort by date
    if query == '' and not sort_field:
        sort_field = 'date'

    query = HEPDataQueryParser.parse_query(query)

    # Build core query
    data_query = get_query_by_type(CFG_DATA_TYPE, query)
    pub_query = get_query_by_type(CFG_PUB_TYPE, query)
    authors_query = get_authors_query(query)

    query_builder = QueryBuilder()
    query_builder.add_child_parent_relation(
        CFG_DATA_TYPE,
        relation="child",
        related_query=data_query,
        other_queries=[pub_query, authors_query])

    # Add additional options
    query_builder.add_pagination(size=size, offset=offset)
    query_builder.add_sorting(sort_field=sort_field, sort_order=sort_order)
    query_builder.add_filters(filters)
    query_builder.add_post_filter(post_filter)
    query_builder.add_aggregations()
    query_builder.add_source_filter(include, exclude)

    pub_result = es.search(index=index,
                           body=query_builder.query,
                           doc_type=CFG_PUB_TYPE)

    parent_filter = {
        "filtered": {
            "filter": {
                "terms": {
                    "_id": [hit["_id"] for hit in pub_result['hits']['hits']]
                }
            }
        }
    }

    query_builder = QueryBuilder()
    query_builder.add_child_parent_relation(CFG_PUB_TYPE,
                                            relation="parent",
                                            related_query=parent_filter,
                                            must=True,
                                            other_queries=[data_query])
    query_builder.add_pagination(size=size * 50)

    data_result = es.search(index=index,
                            body=query_builder.query,
                            doc_type=CFG_DATA_TYPE)

    merged_results = merge_results(pub_result, data_result)

    return map_result(merged_results)
Ejemplo n.º 2
0
def search(
    query,
    index=None,
    filters=list(),
    size=10,
    include="*",
    exclude="",
    offset=0,
    sort_field=None,
    sort_order="",
    post_filter=None,
):
    """ Perform a search query.

    :param query: [string] query string e.g. 'higgs boson'
    :param index: [string] name of the index. If None a default is used
    :param filters: [list of tuples] list of filters for the query.
                    Currently supported: ('author', author_fullname),
                    ('collaboration', collaboration_name), ('date', date)
    :param size: [int] max number of hits that should be returned
    :param offset: [int] offset for the results (used for pagination)
    :param sort_by: [string] sorting field. Currently supported fields:
                    "title", "collaboration", "date", "relevance"
    :param sort_order: [string] order of the sorting either original
                    (for a particular field) or reversed. Supported:
                    '' or 'rev'

    :return: [dict] dictionary with processed results and facets
    """
    # If empty query then sort by date
    if query == "" and not sort_field:
        sort_field = "date"

    query = HEPDataQueryParser.parse_query(query)

    # Build core query
    data_query = get_query_by_type(CFG_DATA_TYPE, query)
    pub_query = get_query_by_type(CFG_PUB_TYPE, query)
    authors_query = get_authors_query(query)

    query_builder = QueryBuilder()
    query_builder.add_child_parent_relation(
        CFG_DATA_TYPE, relation="child", related_query=data_query, other_queries=[pub_query, authors_query]
    )

    # Add additional options
    query_builder.add_pagination(size=size, offset=offset)
    query_builder.add_sorting(sort_field=sort_field, sort_order=sort_order)
    query_builder.add_filters(filters)
    query_builder.add_post_filter(post_filter)
    query_builder.add_aggregations()
    query_builder.add_source_filter(include, exclude)

    pub_result = es.search(index=index, body=query_builder.query, doc_type=CFG_PUB_TYPE)

    parent_filter = {"filtered": {"filter": {"terms": {"_id": [hit["_id"] for hit in pub_result["hits"]["hits"]]}}}}

    query_builder = QueryBuilder()
    query_builder.add_child_parent_relation(
        CFG_PUB_TYPE, relation="parent", related_query=parent_filter, must=True, other_queries=[data_query]
    )
    query_builder.add_pagination(size=size * 50)

    data_result = es.search(index=index, body=query_builder.query, doc_type=CFG_DATA_TYPE)

    merged_results = merge_results(pub_result, data_result)

    return map_result(merged_results)