예제 #1
0
def get_entry_uuid_by_unique_field(index, dict_unique_field_value):
    rs = RecordsSearch(index=index)
    res = rs.query(Q('match', **dict_unique_field_value)).execute().hits.hits

    if not res:
        raise DepositDoesNotExist
    else:
        return res[0]['_id']
def get_entry_uuid_by_unique_field(index, dict_unique_field_value):
    """Return record by uuid."""
    rs = RecordsSearch(index=index)
    res = rs.query(Q('match',
                     **dict_unique_field_value)).execute().hits.hits

    if not res:
        raise DepositDoesNotExist
    else:
        return res[0]['_id']
예제 #3
0
def pending_in_holding_pen(obj, eng):
    """Check if a record exists in HP by looking in given KB."""
    from elasticsearch_dsl import Q
    from invenio_db import db
    from invenio_search import RecordsSearch
    from invenio_workflows.models import WorkflowObjectModel, ObjectStatus

    config = current_app.config['WORKFLOWS_UI_REST_ENDPOINT']
    index = config.get('search_index')
    doc_type = config.get('search_type')
    searcher = RecordsSearch(
        index=index, doc_type=doc_type
    ).params(version=True)

    identifiers = []
    for field, lookup in six.iteritems(
            current_app.config.get("HOLDING_PEN_MATCH_MAPPING", {})):
        # Add quotes around to make the search exact
        identifiers += ['{0}:"{1}"'.format(field, i)
                        for i in get_value(obj.data, lookup, [])]
    # Search for any existing record in Holding Pen, exclude self
    if identifiers:
        search = searcher.query(Q('query_string',
                                query=" OR ".join(identifiers),
                                allow_leading_wildcard=False))
        search_result = search.execute()
        id_list = [int(hit.id) for hit in search_result.hits]
        matches_excluding_self = set(id_list) - set([obj.id])
        if matches_excluding_self:
            obj.extra_data["holdingpen_ids"] = list(matches_excluding_self)
            pending_records = db.session.query(
                WorkflowObjectModel
            ).with_entities(WorkflowObjectModel.id).filter(
                WorkflowObjectModel.status != ObjectStatus.COMPLETED,
                WorkflowObjectModel.id.in_(matches_excluding_self)
            ).all()
            if pending_records:
                pending_ids = [o[0] for o in pending_records]
                obj.extra_data['pending_holdingpen_ids'] = pending_ids
                obj.log.info(
                    "Pending records already found in Holding Pen ({0})"
                    .format(
                        pending_ids
                    )
                )
                return True
    return False
예제 #4
0
def pending_in_holding_pen(obj, eng):
    """Check if a record exists in HP by looking in given KB."""
    from elasticsearch_dsl import Q
    from invenio_db import db
    from invenio_search import RecordsSearch
    from invenio_workflows.models import WorkflowObjectModel, ObjectStatus

    config = current_app.config['WORKFLOWS_UI_REST_ENDPOINT']
    index = config.get('search_index')
    doc_type = config.get('search_type')
    searcher = RecordsSearch(
        index=index, doc_type=doc_type
    ).params(version=True)

    identifiers = []
    for field, lookup in six.iteritems(
            current_app.config.get("HOLDING_PEN_MATCH_MAPPING", {})):
        # Add quotes around to make the search exact
        identifiers += ['{0}:"{1}"'.format(field, i)
                        for i in get_value(obj.data, lookup, [])]
    # Search for any existing record in Holding Pen, exclude self
    if identifiers:
        search = searcher.query(Q('query_string',
                                query=" OR ".join(identifiers),
                                allow_leading_wildcard=False))
        search_result = search.execute()
        id_list = [int(hit.id) for hit in search_result.hits]
        matches_excluding_self = set(id_list) - set([obj.id])
        if matches_excluding_self:
            obj.extra_data["holdingpen_ids"] = list(matches_excluding_self)
            pending_records = db.session.query(
                WorkflowObjectModel
            ).with_entities(WorkflowObjectModel.id).filter(
                WorkflowObjectModel.status != ObjectStatus.COMPLETED,
                WorkflowObjectModel.id.in_(matches_excluding_self)
            ).all()
            if pending_records:
                pending_ids = [o[0] for o in pending_records]
                obj.extra_data['pending_holdingpen_ids'] = pending_ids
                obj.log.info(
                    "Pending records already found in Holding Pen ({0})"
                    .format(
                        pending_ids
                    )
                )
                return True
    return False
def get_deposit_by_cadi_id(cadi_id):
    """Return deposit with given cadi id.

    :params str cadi_id: CADI identifier

    :rtype `cap.modules.deposits.api:CAPDeposit`
    """
    rs = RecordsSearch(index='deposits-records')

    res = rs.query(Q('match', basic_info__cadi_id__keyword=cadi_id)) \
        .execute().hits.hits

    if not res:
        raise DepositDoesNotExist
    else:
        uuid = res[0]['_id']
        deposit = CAPDeposit.get_record(uuid)

    return deposit
예제 #6
0
def get_deposit_by_cadi_id(cadi_id):
    """Return deposit with given cadi id.

    :params str cadi_id: CADI identifier

    :rtype `cap.modules.deposits.api:CAPDeposit`
    """
    rs = RecordsSearch(index='deposits-records')

    res = rs.query(Q('match', basic_info__cadi_id__keyword=cadi_id)) \
        .execute().hits.hits

    if not res:
        raise DepositDoesNotExist
    else:
        uuid = res[0]['_id']
        deposit = CAPDeposit.get_record(uuid)

    return deposit
예제 #7
0
파일: api.py 프로젝트: islahudinees/hepdata
def search(query,
           index=None,
           filters=list(),
           size=10,
           include="*",
           exclude="authors",
           offset=0,
           sort_field=None,
           sort_order='',
           post_filter=None):
    """ Perform a search query.

    :param query: [string] query string e.g. 'higgs boson'
    :param index: [string] name of the index. If None a default is used
    :param filters: [list of tuples] list of filters for the query.
                    Currently supported: ('author', author_fullname),
                    ('collaboration', collaboration_name), ('date', date)
    :param size: [int] max number of hits that should be returned
    :param offset: [int] offset for the results (used for pagination)
    :param sort_by: [string] sorting field. Currently supported fields:
                    "title", "collaboration", "date", "relevance"
    :param sort_order: [string] order of the sorting either original
                    (for a particular field) or reversed. Supported:
                    '' or 'rev'

    :return: [dict] dictionary with processed results and facets
    """
    # If empty query then sort by date
    if query == '' and not sort_field:
        sort_field = 'date'

    query = HEPDataQueryParser.parse_query(query)
    # Create search with preference param to ensure consistency of results across shards
    search = RecordsSearch(using=es, index=index).with_preference_param()

    if query:
        fuzzy_query = QueryString(query=query, fuzziness='AUTO')
        search.query = fuzzy_query | \
                       Q('nested', query=fuzzy_query, path='authors') | \
                       Q('has_child', type="child_datatable", query=fuzzy_query)

    search = search.filter("term", doc_type=CFG_PUB_TYPE)
    search = QueryBuilder.add_filters(search, filters)

    mapped_sort_field = sort_fields_mapping(sort_field)
    search = search.sort({mapped_sort_field : {"order" : calculate_sort_order(sort_order, sort_field)}})
    search = add_default_aggregations(search, filters)

    if post_filter:
        search = search.post_filter(post_filter)

    search = search.source(includes=include, excludes=exclude)
    search = search[offset:offset+size]
    pub_result = search.execute().to_dict()

    parent_filter = {
        "terms": {
                    "_id": [hit["_id"] for hit in pub_result['hits']['hits']]
        }
    }

    data_search = RecordsSearch(using=es, index=index)
    data_search = data_search.query('has_parent',
                                    parent_type="parent_publication",
                                    query=parent_filter)
    if query:
        data_search = data_search.query(QueryString(query=query))

    data_search = data_search[0:size*50]
    data_result = data_search.execute().to_dict()

    merged_results = merge_results(pub_result, data_result)
    return map_result(merged_results, filters)
예제 #8
0
def search(query,
           index=None,
           filters=list(),
           size=10,
           include="*",
           exclude="authors",
           offset=0,
           sort_field=None,
           sort_order='',
           post_filter=None):
    """ Perform a search query.

    :param query: [string] query string e.g. 'higgs boson'
    :param index: [string] name of the index. If None a default is used
    :param filters: [list of tuples] list of filters for the query.
                    Currently supported: ('author', author_fullname),
                    ('collaboration', collaboration_name), ('date', date)
    :param size: [int] max number of hits that should be returned
    :param offset: [int] offset for the results (used for pagination)
    :param sort_by: [string] sorting field. Currently supported fields:
                    "title", "collaboration", "date", "relevance"
    :param sort_order: [string] order of the sorting either original
                    (for a particular field) or reversed. Supported:
                    '' or 'rev'

    :return: [dict] dictionary with processed results and facets
    """
    # If empty query then sort by date
    if query == '' and not sort_field:
        sort_field = 'date'

    query = HEPDataQueryParser.parse_query(query)
    # Create search with preference param to ensure consistency of results across shards
    search = RecordsSearch(using=es, index=index).with_preference_param()

    if query:
        fuzzy_query = QueryString(query=query, fuzziness='AUTO')
        search.query = fuzzy_query | \
                       Q('nested', query=fuzzy_query, path='authors') | \
                       Q('has_child', type="child_datatable", query=fuzzy_query)

    search = search.filter("term", doc_type=CFG_PUB_TYPE)
    search = QueryBuilder.add_filters(search, filters)

    mapped_sort_field = sort_fields_mapping(sort_field)
    search = search.sort({
        mapped_sort_field: {
            "order": calculate_sort_order(sort_order, sort_field)
        }
    })
    search = add_default_aggregations(search, filters)

    if post_filter:
        search = search.post_filter(post_filter)

    search = search.source(includes=include, excludes=exclude)
    search = search[offset:offset + size]

    try:
        pub_result = search.execute().to_dict()

        parent_filter = {
            "terms": {
                "_id": [hit["_id"] for hit in pub_result['hits']['hits']]
            }
        }

        data_search = RecordsSearch(using=es, index=index)
        data_search = data_search.query('has_parent',
                                        parent_type="parent_publication",
                                        query=parent_filter)
        if query:
            data_search = data_search.query(QueryString(query=query))

        data_search_size = size * ELASTICSEARCH_MAX_RESULT_WINDOW // LIMIT_MAX_RESULTS_PER_PAGE
        data_search = data_search[0:data_search_size]
        data_result = data_search.execute().to_dict()

        merged_results = merge_results(pub_result, data_result)
        return map_result(merged_results, filters)
    except TransportError as e:
        # For search phase execution exceptions we pass the reason as it's
        # likely to be user error (e.g. invalid search query)
        if e.error == 'search_phase_execution_exception' and e.info \
                and "error" in e.info and isinstance(e.info['error'], dict):
            reason = e.info['error']['root_cause'][0]['reason']
        # Otherwise we hide the details from the user
        else:
            log.error(f'An unexpected error occurred when searching: {e}')
            reason = f'An unexpected error occurred: {e.error}'
        return {'error': reason}