def get_entry_uuid_by_unique_field(index, dict_unique_field_value): rs = RecordsSearch(index=index) res = rs.query(Q('match', **dict_unique_field_value)).execute().hits.hits if not res: raise DepositDoesNotExist else: return res[0]['_id']
def get_entry_uuid_by_unique_field(index, dict_unique_field_value): """Return record by uuid.""" rs = RecordsSearch(index=index) res = rs.query(Q('match', **dict_unique_field_value)).execute().hits.hits if not res: raise DepositDoesNotExist else: return res[0]['_id']
def pending_in_holding_pen(obj, eng): """Check if a record exists in HP by looking in given KB.""" from elasticsearch_dsl import Q from invenio_db import db from invenio_search import RecordsSearch from invenio_workflows.models import WorkflowObjectModel, ObjectStatus config = current_app.config['WORKFLOWS_UI_REST_ENDPOINT'] index = config.get('search_index') doc_type = config.get('search_type') searcher = RecordsSearch( index=index, doc_type=doc_type ).params(version=True) identifiers = [] for field, lookup in six.iteritems( current_app.config.get("HOLDING_PEN_MATCH_MAPPING", {})): # Add quotes around to make the search exact identifiers += ['{0}:"{1}"'.format(field, i) for i in get_value(obj.data, lookup, [])] # Search for any existing record in Holding Pen, exclude self if identifiers: search = searcher.query(Q('query_string', query=" OR ".join(identifiers), allow_leading_wildcard=False)) search_result = search.execute() id_list = [int(hit.id) for hit in search_result.hits] matches_excluding_self = set(id_list) - set([obj.id]) if matches_excluding_self: obj.extra_data["holdingpen_ids"] = list(matches_excluding_self) pending_records = db.session.query( WorkflowObjectModel ).with_entities(WorkflowObjectModel.id).filter( WorkflowObjectModel.status != ObjectStatus.COMPLETED, WorkflowObjectModel.id.in_(matches_excluding_self) ).all() if pending_records: pending_ids = [o[0] for o in pending_records] obj.extra_data['pending_holdingpen_ids'] = pending_ids obj.log.info( "Pending records already found in Holding Pen ({0})" .format( pending_ids ) ) return True return False
def get_deposit_by_cadi_id(cadi_id): """Return deposit with given cadi id. :params str cadi_id: CADI identifier :rtype `cap.modules.deposits.api:CAPDeposit` """ rs = RecordsSearch(index='deposits-records') res = rs.query(Q('match', basic_info__cadi_id__keyword=cadi_id)) \ .execute().hits.hits if not res: raise DepositDoesNotExist else: uuid = res[0]['_id'] deposit = CAPDeposit.get_record(uuid) return deposit
def search(query, index=None, filters=list(), size=10, include="*", exclude="authors", offset=0, sort_field=None, sort_order='', post_filter=None): """ Perform a search query. :param query: [string] query string e.g. 'higgs boson' :param index: [string] name of the index. If None a default is used :param filters: [list of tuples] list of filters for the query. Currently supported: ('author', author_fullname), ('collaboration', collaboration_name), ('date', date) :param size: [int] max number of hits that should be returned :param offset: [int] offset for the results (used for pagination) :param sort_by: [string] sorting field. Currently supported fields: "title", "collaboration", "date", "relevance" :param sort_order: [string] order of the sorting either original (for a particular field) or reversed. Supported: '' or 'rev' :return: [dict] dictionary with processed results and facets """ # If empty query then sort by date if query == '' and not sort_field: sort_field = 'date' query = HEPDataQueryParser.parse_query(query) # Create search with preference param to ensure consistency of results across shards search = RecordsSearch(using=es, index=index).with_preference_param() if query: fuzzy_query = QueryString(query=query, fuzziness='AUTO') search.query = fuzzy_query | \ Q('nested', query=fuzzy_query, path='authors') | \ Q('has_child', type="child_datatable", query=fuzzy_query) search = search.filter("term", doc_type=CFG_PUB_TYPE) search = QueryBuilder.add_filters(search, filters) mapped_sort_field = sort_fields_mapping(sort_field) search = search.sort({mapped_sort_field : {"order" : calculate_sort_order(sort_order, sort_field)}}) search = add_default_aggregations(search, filters) if post_filter: search = search.post_filter(post_filter) search = search.source(includes=include, excludes=exclude) search = search[offset:offset+size] pub_result = search.execute().to_dict() parent_filter = { "terms": { "_id": [hit["_id"] for hit in pub_result['hits']['hits']] } } data_search = RecordsSearch(using=es, index=index) data_search = data_search.query('has_parent', parent_type="parent_publication", query=parent_filter) if query: data_search = data_search.query(QueryString(query=query)) data_search = data_search[0:size*50] data_result = data_search.execute().to_dict() merged_results = merge_results(pub_result, data_result) return map_result(merged_results, filters)
def search(query, index=None, filters=list(), size=10, include="*", exclude="authors", offset=0, sort_field=None, sort_order='', post_filter=None): """ Perform a search query. :param query: [string] query string e.g. 'higgs boson' :param index: [string] name of the index. If None a default is used :param filters: [list of tuples] list of filters for the query. Currently supported: ('author', author_fullname), ('collaboration', collaboration_name), ('date', date) :param size: [int] max number of hits that should be returned :param offset: [int] offset for the results (used for pagination) :param sort_by: [string] sorting field. Currently supported fields: "title", "collaboration", "date", "relevance" :param sort_order: [string] order of the sorting either original (for a particular field) or reversed. Supported: '' or 'rev' :return: [dict] dictionary with processed results and facets """ # If empty query then sort by date if query == '' and not sort_field: sort_field = 'date' query = HEPDataQueryParser.parse_query(query) # Create search with preference param to ensure consistency of results across shards search = RecordsSearch(using=es, index=index).with_preference_param() if query: fuzzy_query = QueryString(query=query, fuzziness='AUTO') search.query = fuzzy_query | \ Q('nested', query=fuzzy_query, path='authors') | \ Q('has_child', type="child_datatable", query=fuzzy_query) search = search.filter("term", doc_type=CFG_PUB_TYPE) search = QueryBuilder.add_filters(search, filters) mapped_sort_field = sort_fields_mapping(sort_field) search = search.sort({ mapped_sort_field: { "order": calculate_sort_order(sort_order, sort_field) } }) search = add_default_aggregations(search, filters) if post_filter: search = search.post_filter(post_filter) search = search.source(includes=include, excludes=exclude) search = search[offset:offset + size] try: pub_result = search.execute().to_dict() parent_filter = { "terms": { "_id": [hit["_id"] for hit in pub_result['hits']['hits']] } } data_search = RecordsSearch(using=es, index=index) data_search = data_search.query('has_parent', parent_type="parent_publication", query=parent_filter) if query: data_search = data_search.query(QueryString(query=query)) data_search_size = size * ELASTICSEARCH_MAX_RESULT_WINDOW // LIMIT_MAX_RESULTS_PER_PAGE data_search = data_search[0:data_search_size] data_result = data_search.execute().to_dict() merged_results = merge_results(pub_result, data_result) return map_result(merged_results, filters) except TransportError as e: # For search phase execution exceptions we pass the reason as it's # likely to be user error (e.g. invalid search query) if e.error == 'search_phase_execution_exception' and e.info \ and "error" in e.info and isinstance(e.info['error'], dict): reason = e.info['error']['root_cause'][0]['reason'] # Otherwise we hide the details from the user else: log.error(f'An unexpected error occurred when searching: {e}') reason = f'An unexpected error occurred: {e.error}' return {'error': reason}