def _set_query_aggs(self, query): '''Helper Method for constructing query''' query_filters = query['post_filter'].pop('bool') filter_collector = {'post_filter': {'bool': query_filters}} used_filters = set_filters(self._request, filter_collector, self._result, filter_exclusion=self._filter_exclusion) filters = filter_collector['post_filter']['bool']['must'] negative_filters = filter_collector['post_filter']['bool']['must_not'] self._facets = [(field, facet) for field, facet in self._schema['facets'].items() if (field in self._matrix['x']['facets'] or field in self._matrix['y']['facets'])] # Display all audits if logged in, or all but INTERNAL_ACTION if logged out for audit_facet in self._audit_facets: if (self._search_audit and 'group.submitter' in self._principals or 'INTERNAL_ACTION' not in audit_facet[0]): self._facets.append(audit_facet) query['aggs'] = set_facets(self._facets, used_filters, self._principals, self._doc_types) self._construct_xygroupings(query, filters, negative_filters) return used_filters
def _set_query_aggs(self, query): '''Helper Method for constructing query''' query_filters = query['post_filter'].pop('bool') filter_collector = {'post_filter': {'bool': query_filters}} used_filters = set_filters( self._request, filter_collector, self._result, ) filters = filter_collector['post_filter']['bool']['must'] self._facets = [ (field, facet) for field, facet in self._schema['facets'].items() if ( field in self._summary['x']['facets'] or field in self._summary['y']['facets'] ) ] query['aggs'] = set_facets( self._facets, used_filters, self._principals, self._doc_types ) self._construct_xygroupings(query, filters) return used_filters
def test_set_filters_searchTerm(): request = FakeRequest((('searchTerm', 'value1'), )) query = { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [], 'must_not': [] } } } result = {'filters': []} used_filters = set_filters(request, query, result) assert used_filters == {} assert query == { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [], 'must_not': [] } } } assert result == { 'filters': [{ 'field': 'searchTerm', 'term': 'value1', 'remove': '/search/?' }] }
def test_set_filters_reserved_params(param): from snovault.helpers.helper import set_filters request = FakeRequest(( (param, 'foo'), )) query = { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [], 'must_not': [] } } } result = {'filters': []} used_filters = set_filters(request, query, result) assert used_filters == {} assert query == { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [], 'must_not': [] } } } assert result == { 'filters': [], }
def test_set_filters_reserved_params(param): request = FakeRequest(( (param, 'foo'), )) query = { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [], 'must_not': [] } } } result = {'filters': []} used_filters = set_filters(request, query, result) assert used_filters == {} assert query == { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [], 'must_not': [] } } } assert result == { 'filters': [], }
def test_set_filters_exists_missing(): request = FakeRequest(( ('field1', '*'), ('field2!', '*'), )) query = { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [], 'must_not': [] } } } result = {'filters': []} used_filters = set_filters(request, query, result) assert used_filters == { 'field1': ['*'], 'field2!': ['*'], } assert query == { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [ { 'exists': { 'field': 'embedded.field1' } }, ], 'must_not': [{ 'exists': { 'field': 'embedded.field2' } }] } } } assert result == { 'filters': [{ 'field': 'field1', 'term': '*', 'remove': '/search/?field2%21=%2A', }, { 'field': 'field2!', 'term': '*', 'remove': '/search/?field1=%2A', }], }
def test_set_filters_multivalued(): from snovault.helpers.helper import set_filters request = FakeRequest(( ('field1', 'value1'), ('field1', 'value2'), )) query = { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [], 'must_not': [] } } } result = {'filters': []} used_filters = set_filters(request, query, result) assert used_filters == {'field1': ['value1', 'value2']} assert query == { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [ { 'terms': { 'embedded.field1': ['value1', 'value2'] } } ], 'must_not': [] } } } assert result == { 'filters': [ { 'field': 'field1', 'term': 'value1', 'remove': '/search/?field1=value2' }, { 'field': 'field1', 'term': 'value2', 'remove': '/search/?field1=value1' } ] }
def test_set_filters_multivalued(): from snovault.helpers.helper import set_filters request = FakeRequest(( ('field1', 'value1'), ('field1', 'value2'), )) query = { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [], 'must_not': [] } } } result = {'filters': []} used_filters = set_filters(request, query, result) assert used_filters == {'field1': ['value1', 'value2']} assert query == { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [ { 'terms': { 'embedded.field1': ['value1', 'value2'] } } ], 'must_not': [] } } } assert result == { 'filters': [ { 'field': 'field1', 'term': 'value1', 'remove': '/search/?field1=value2' }, { 'field': 'field1', 'term': 'value2', 'remove': '/search/?field1=value1' } ] }
def _set_query_aggs(self, query): '''Helper method for preprocessing view''' # Setting filters. # Rather than setting them at the top level of the query # we collect them for use in aggregations later. query_filters = query['post_filter'].pop('bool') filter_collector = {'post_filter': {'bool': query_filters}} used_filters = set_filters( self._request, filter_collector, self._result, ) filters = filter_collector['post_filter']['bool']['must'] negative_filters = filter_collector['post_filter']['bool']['must_not'] self._facets = [ (field, facet) for field, facet in self._schema['facets'].items() if ( field in self._matrix['x']['facets'] or field in self._matrix['y']['facets'] ) ] for audit_facet in self._audit_facets: if (self._search_audit and 'group.submitter' in self._principals or 'INTERNAL_ACTION' not in audit_facet[0]): self._facets.append(audit_facet) audit_field_list_copy = [] audit_field_list = [] for item in self._facets: if item[0].rfind('audit.') > -1: audit_field_list.append(item) audit_field_list_copy = audit_field_list.copy() for item in audit_field_list_copy: temp = item[0] audit_field_list[audit_field_list.index(item)] = temp query['aggs'] = set_facets( self._facets, used_filters, self._principals, self._doc_types, ) self._construct_xygroupings( query, filters, negative_filters, audit_field_list ) return audit_field_list, used_filters
def test_set_filters_audit(): from snovault.helpers.helper import set_filters request = FakeRequest(( ('audit.foo', 'value1'), )) query = { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [], 'must_not': [] } } } result = {'filters': []} used_filters = set_filters(request, query, result) assert used_filters == {'audit.foo': ['value1']} assert query == { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [ { 'terms': { 'audit.foo': ['value1'] }, }, ], 'must_not': [] } } } assert result == { 'filters': [ { 'field': 'audit.foo', 'term': 'value1', 'remove': '/search/?' }, ], }
def test_set_filters_audit(): from snovault.helpers.helper import set_filters request = FakeRequest(( ('audit.foo', 'value1'), )) query = { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [], 'must_not': [] } } } result = {'filters': []} used_filters = set_filters(request, query, result) assert used_filters == {'audit.foo': ['value1']} assert query == { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [ { 'terms': { 'audit.foo': ['value1'] }, }, ], 'must_not': [] } } } assert result == { 'filters': [ { 'field': 'audit.foo', 'term': 'value1', 'remove': '/search/?' }, ], }
def test_set_filters(): request = FakeRequest(( ('field1', 'value1'), )) query = { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [], 'must_not': [] } } } result = {'filters': []} used_filters = set_filters(request, query, result) assert used_filters == {'field1': ['value1']} assert query == { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [ { 'terms': { 'embedded.field1': ['value1'], }, }, ], 'must_not': [] } } } assert result == { 'filters': [ { 'field': 'field1', 'term': 'value1', 'remove': '/search/?' } ] }
def test_set_filters_negated(): request = FakeRequest(( ('field1!', 'value1'), )) query = { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [], 'must_not': [] } } } result = {'filters': []} used_filters = set_filters(request, query, result) assert used_filters == {'field1!': ['value1']} assert query == { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must_not': [ { 'terms': { 'embedded.field1': ['value1'] } } ], 'must': [] } } } assert result == { 'filters': [ { 'field': 'field1!', 'term': 'value1', 'remove': '/search/?' }, ], }
def _set_query_aggs(self, query): '''Helper Method for constructing query''' query_filters = query['post_filter'].pop('bool') filter_collector = {'post_filter': {'bool': query_filters}} used_filters = set_filters( self._request, filter_collector, self._result, ) filters = filter_collector['post_filter']['bool']['must'] self._facets = [(field, facet) for field, facet in self._schema['facets'].items() if (field in self._summary['x']['facets'] or field in self._summary['y']['facets'])] query['aggs'] = set_facets(self._facets, used_filters, self._principals, self._doc_types) self._construct_xygroupings(query, filters) return used_filters
def _set_query_aggs(self, query): '''Helper method for preprocessing view''' # Setting filters. # Rather than setting them at the top level of the query # we collect them for use in aggregations later. query_filters = query['post_filter'].pop('bool') filter_collector = {'post_filter': {'bool': query_filters}} used_filters = set_filters( self._request, filter_collector, self._result, ) filters = filter_collector['post_filter']['bool']['must'] negative_filters = filter_collector['post_filter']['bool']['must_not'] self._facets = [(field, facet) for field, facet in self._schema['facets'].items() if (field in self._matrix['x']['facets'] or field in self._matrix['y']['facets'])] for audit_facet in self._audit_facets: if (self._search_audit and 'group.submitter' in self._principals or 'INTERNAL_ACTION' not in audit_facet[0]): self._facets.append(audit_facet) audit_field_list_copy = [] audit_field_list = [] for item in self._facets: if item[0].rfind('audit.') > -1: audit_field_list.append(item) audit_field_list_copy = audit_field_list.copy() for item in audit_field_list_copy: temp = item[0] audit_field_list[audit_field_list.index(item)] = temp query['aggs'] = set_facets( self._facets, used_filters, self._principals, self._doc_types, ) self._construct_xygroupings(query, filters, negative_filters, audit_field_list) return audit_field_list, used_filters
def _set_query_aggs(self, query, result_filters, matrix_x_y): '''Helper Method for constructing query''' query_filters = query['post_filter'].pop('bool') filter_collector = {'post_filter': {'bool': query_filters}} used_filters = set_filters( self._request, filter_collector, result_filters, filter_exclusion=self._filter_exclusion ) filters = filter_collector['post_filter']['bool']['must'] negative_filters = filter_collector['post_filter']['bool']['must_not'] self._facets = [ (field, facet) for field, facet in self._schema['facets'].items() if ( field in matrix_x_y['x']['facets'] or field in matrix_x_y['y']['facets'] ) ] # Display all audits if logged in, or all but INTERNAL_ACTION if logged out for audit_facet in self._audit_facets: if ( self._search_audit and 'group.submitter' in self._principals or 'INTERNAL_ACTION' not in audit_facet[0] ): self._facets.append(audit_facet) query['aggs'] = set_facets( self._facets, used_filters, self._principals, self._doc_types ) self._construct_xygroupings(query, filters, negative_filters, matrix_x_y) return used_filters
def region_search(context, request): """ Search files by region. """ types = request.registry[TYPES] result = { '@id': '/region-search/' + ('?' + request.query_string.split('&referrer')[0] if request.query_string else ''), '@type': ['region-search'], 'title': 'Search by region', 'facets': [], '@graph': [], 'columns': OrderedDict(), 'notification': '', 'filters': [] } principals = effective_principals(request) es = request.registry[ELASTIC_SEARCH] snp_es = request.registry['snp_search'] region = request.params.get('region', '*') region_inside_peak_status = False # handling limit size = request.params.get('limit', 25) if size in ('all', ''): size = 99999 else: try: size = int(size) except ValueError: size = 25 if region == '': region = '*' assembly = request.params.get('genome', '*') result['assembly'] = _GENOME_TO_ALIAS.get(assembly, 'GRCh38') annotation = request.params.get('annotation', '*') chromosome, start, end = ('', '', '') if annotation != '*': if annotation.lower().startswith('ens'): chromosome, start, end = get_ensemblid_coordinates( annotation, assembly) else: chromosome, start, end = get_annotation_coordinates( es, annotation, assembly) elif region != '*': region = region.lower() if region.startswith('rs'): sanitized_region = sanitize_rsid(region) chromosome, start, end = get_rsid_coordinates( sanitized_region, assembly) region_inside_peak_status = True elif region.startswith('ens'): chromosome, start, end = get_ensemblid_coordinates( region, assembly) elif region.startswith('chr'): chromosome, start, end = sanitize_coordinates(region) else: chromosome, start, end = ('', '', '') # Check if there are valid coordinates if not chromosome or not start or not end: result['notification'] = 'No annotations found' return result else: result['coordinates'] = '{chr}:{start}-{end}'.format(chr=chromosome, start=start, end=end) # Search for peaks for the coordinates we got try: # including inner hits is very slow # figure out how to distinguish browser requests from .embed method requests if 'peak_metadata' in request.query_string: peak_query = get_peak_query(start, end, with_inner_hits=True, within_peaks=region_inside_peak_status) else: peak_query = get_peak_query(start, end, within_peaks=region_inside_peak_status) peak_results = snp_es.search(body=peak_query, index=chromosome.lower(), doc_type=_GENOME_TO_ALIAS[assembly], size=99999) except Exception: result['notification'] = 'Error during search' return result file_uuids = [] for hit in peak_results['hits']['hits']: if hit['_id'] not in file_uuids: file_uuids.append(hit['_id']) file_uuids = list(set(file_uuids)) result['notification'] = 'No results found' # if more than one peak found return the experiments with those peak files uuid_count = len(file_uuids) if uuid_count > MAX_CLAUSES_FOR_ES: log.error("REGION_SEARCH WARNING: region with %d file_uuids is being restricted to %d" % \ (uuid_count, MAX_CLAUSES_FOR_ES)) file_uuids = file_uuids[:MAX_CLAUSES_FOR_ES] uuid_count = len(file_uuids) if uuid_count: query = get_filtered_query('', [], set(), principals, ['Experiment']) del query['query'] query['post_filter']['bool']['must'].append( {'terms': { 'embedded.files.uuid': file_uuids }}) used_filters = set_filters(request, query, result) used_filters['files.uuid'] = file_uuids query['aggs'] = set_facets(_FACETS, used_filters, principals, ['Experiment']) schemas = (types[item_type].schema for item_type in ['Experiment']) es_results = es.search(body=query, index='experiment', doc_type='experiment', size=size, request_timeout=60) result['@graph'] = list( format_results(request, es_results['hits']['hits'])) result['total'] = total = es_results['hits']['total'] result['facets'] = BaseView._format_facets(es_results, _FACETS, used_filters, schemas, total, principals) result['peaks'] = list(peak_results['hits']['hits']) result['download_elements'] = get_peak_metadata_links(request) if result['total'] > 0: result['notification'] = 'Success' position_for_browser = format_position(result['coordinates'], 200) result.update( search_result_actions(request, ['Experiment'], es_results, position=position_for_browser)) return result
def preprocess_view(self): ''' Main function to construct query and build view results json * Only publicly accessible function ''' self._result['@id'] = '/news/' + self._search_base self._result['@type'] = ['News'] self._result['notification'] = '' doc_types = ['Page'] search_fields, _ = get_search_fields(self._request, doc_types) query = get_filtered_query( '*', search_fields, sorted(list_result_fields(self._request, doc_types)), self._principals, doc_types ) del query['query']['query_string'] sort = OrderedDict() result_sort = OrderedDict() sort['embedded.date_created'] = result_sort['date_created'] = { 'order': 'desc', 'unmapped_type': 'keyword', } query['sort'] = result_sort self._result['sort'] = result_sort used_filters = set_filters( self._request, query, self._result, [('type', 'Page'), ('news', 'true'), ('status', 'released')] ) facets = [] if len(doc_types) == 1 and 'facets' in self._types[doc_types[0]].schema: facets.extend(self._types[doc_types[0]].schema['facets'].items()) query['aggs'] = set_facets(facets, used_filters, self._principals, doc_types) es_results = self._elastic_search.search( body=query, index=self._es_index, doc_type=self._es_index, from_=0, size=25) total = es_results['hits']['total'] if not total: self._request.response.status_code = 404 self._result['notification'] = 'No results found' self._result['@graph'] = [] return self._result self._result['notification'] = 'Success' self._result['total'] = total graph = format_results( self._request, es_results['hits']['hits'], self._result ) self._result['@graph'] = list(graph) schemas = [ self._types[doc_type].schema for doc_type in doc_types ] self._result['facets'] = self._format_facets( es_results, facets, used_filters, schemas, total, self._principals ) return self._result
def region_search(context, request): """ Search files by region. """ types = request.registry[TYPES] result = { '@id': '/region-search/' + ('?' + request.query_string.split('&referrer')[0] if request.query_string else ''), '@type': ['region-search'], 'title': 'Search by region', 'facets': [], '@graph': [], 'columns': OrderedDict(), 'notification': '', 'filters': [] } principals = effective_principals(request) es = request.registry[ELASTIC_SEARCH] snp_es = request.registry['snp_search'] region = request.params.get('region', '*') region_inside_peak_status = False # handling limit size = request.params.get('limit', 25) if size in ('all', ''): size = 99999 else: try: size = int(size) except ValueError: size = 25 if region == '': region = '*' assembly = request.params.get('genome', '*') result['assembly'] = _GENOME_TO_ALIAS.get(assembly,'GRCh38') annotation = request.params.get('annotation', '*') chromosome, start, end = ('', '', '') if annotation != '*': if annotation.lower().startswith('ens'): chromosome, start, end = get_ensemblid_coordinates(annotation, assembly) else: chromosome, start, end = get_annotation_coordinates(es, annotation, assembly) elif region != '*': region = region.lower() if region.startswith('rs'): sanitized_region = sanitize_rsid(region) chromosome, start, end = get_rsid_coordinates(sanitized_region, assembly) region_inside_peak_status = True elif region.startswith('ens'): chromosome, start, end = get_ensemblid_coordinates(region, assembly) elif region.startswith('chr'): chromosome, start, end = sanitize_coordinates(region) else: chromosome, start, end = ('', '', '') # Check if there are valid coordinates if not chromosome or not start or not end: result['notification'] = 'No annotations found' return result else: result['coordinates'] = '{chr}:{start}-{end}'.format( chr=chromosome, start=start, end=end ) # Search for peaks for the coordinates we got try: # including inner hits is very slow # figure out how to distinguish browser requests from .embed method requests if 'peak_metadata' in request.query_string: peak_query = get_peak_query(start, end, with_inner_hits=True, within_peaks=region_inside_peak_status) else: peak_query = get_peak_query(start, end, within_peaks=region_inside_peak_status) peak_results = snp_es.search(body=peak_query, index=chromosome.lower(), doc_type=_GENOME_TO_ALIAS[assembly], size=99999) except Exception: result['notification'] = 'Error during search' return result file_uuids = [] for hit in peak_results['hits']['hits']: if hit['_id'] not in file_uuids: file_uuids.append(hit['_id']) file_uuids = list(set(file_uuids)) result['notification'] = 'No results found' # if more than one peak found return the experiments with those peak files uuid_count = len(file_uuids) if uuid_count > MAX_CLAUSES_FOR_ES: log.error("REGION_SEARCH WARNING: region with %d file_uuids is being restricted to %d" % \ (uuid_count, MAX_CLAUSES_FOR_ES)) file_uuids = file_uuids[:MAX_CLAUSES_FOR_ES] uuid_count = len(file_uuids) if uuid_count: query = get_filtered_query('', [], set(), principals, ['Experiment']) del query['query'] query['post_filter']['bool']['must'].append({ 'terms': { 'embedded.files.uuid': file_uuids } }) used_filters = set_filters(request, query, result) used_filters['files.uuid'] = file_uuids query['aggs'] = set_facets(_FACETS, used_filters, principals, ['Experiment']) schemas = (types[item_type].schema for item_type in ['Experiment']) es_results = es.search( body=query, index='experiment', doc_type='experiment', size=size, request_timeout=60 ) result['@graph'] = list(format_results(request, es_results['hits']['hits'])) result['total'] = total = es_results['hits']['total'] result['facets'] = BaseView._format_facets(es_results, _FACETS, used_filters, schemas, total, principals) result['peaks'] = list(peak_results['hits']['hits']) result['download_elements'] = get_peak_metadata_links(request) if result['total'] > 0: result['notification'] = 'Success' position_for_browser = format_position(result['coordinates'], 200) result.update(search_result_actions(request, ['Experiment'], es_results, position=position_for_browser)) return result
def preprocess_view(self, views=None, search_result_actions=None): # pylint: disable=too-many-statements, too-many-branches, too-many-locals ''' Main function to construct query and build view results json * Only publicly accessible function ''' types = self._types search_base = normalize_query(self._request) result = { '@context': self._request.route_path('jsonld_context'), '@id': '/search/' + search_base, '@type': ['Search'], 'title': 'Search', 'filters': [], } es_index = RESOURCES_INDEX search_audit = self._request.has_permission('search_audit') from_, size = get_pagination(self._request) search_term = prepare_search_term(self._request) if ( hasattr(self._context, 'type_info') and hasattr(self._context.type_info, 'name') and self._context.type_info.name ): doc_types = [self._context.type_info.name] else: doc_types = self._request.params.getall('type') if '*' in doc_types: doc_types = ['Item'] # Normalize to item_type try: doc_types = sorted({types[name].name for name in doc_types}) except KeyError: # Check for invalid types bad_types = [t for t in doc_types if t not in types] msg = "Invalid type: {}".format(', '.join(bad_types)) raise HTTPBadRequest(explanation=msg) searchterm_specs = self._request.params.getall('searchTerm') searchterm_only = urlencode( [ ("searchTerm", searchterm) for searchterm in searchterm_specs ] ) if searchterm_only: clear_qs = searchterm_only else: clear_qs = urlencode([("type", typ) for typ in doc_types]) search_route = self._request.route_path('search', slash='/') clear_route = '?' + clear_qs if clear_qs else '' result['clear_filters'] = search_route + clear_route if not doc_types: if self._request.params.get('mode') == 'picker': doc_types = ['Item'] else: doc_types = self._default_doc_types else: for item_type in doc_types: t_thing = types[item_type] q_thing = urlencode( [ (k.encode('utf-8'), v.encode('utf-8')) for k, v in self._request.params.items() if not (k == 'type' and types['Item' if v == '*' else v] is t_thing) ] ) result['filters'].append({ 'field': 'type', 'term': t_thing.name, 'remove': '{}?{}'.format(self._request.path, q_thing) }) if views: result['views'] = views search_fields, _ = get_search_fields(self._request, doc_types) query = get_filtered_query( search_term, search_fields, sorted(list_result_fields(self._request, doc_types)), self._principals, doc_types, ) schemas = [types[doc_type].schema for doc_type in doc_types] columns = list_visible_columns_for_schemas(self._request, schemas) if columns: result['columns'] = columns if search_term == '*': del query['query']['query_string'] else: query['query']['query_string']['fields'].extend( ['_all', '*.uuid', '*.md5sum', '*.submitted_file_name'] ) set_sort_order(self._request, search_term, types, doc_types, query, result) used_filters = set_filters(self._request, query, result) facets = [ ('type', {'title': 'Data Type'}), ] if len(doc_types) == 1 and 'facets' in types[doc_types[0]].schema: facets.extend(types[doc_types[0]].schema['facets'].items()) for audit_facet in self._audit_facets: if ( search_audit and 'group.submitter' in self._principals or 'INTERNAL_ACTION' not in audit_facet[0] ): facets.append(audit_facet) query['aggs'] = set_facets(facets, used_filters, self._principals, doc_types) query = sort_query(query) do_scan = size is None or size > 1000 if not self._request.params.get('type') or 'Item' in doc_types: es_index = RESOURCES_INDEX else: es_index = [ types[type_name].item_type for type_name in doc_types if hasattr(types[type_name], 'item_type') ] if do_scan: es_results = self._elastic_search.search( body=query, index=es_index, search_type='query_then_fetch' ) else: es_results = self._elastic_search.search( body=query, index=es_index, from_=from_, size=size, request_cache=True ) total = es_results['hits']['total'] result['total'] = total schemas = (types[item_type].schema for item_type in doc_types) result['facets'] = self._format_facets( es_results, facets, used_filters, schemas, total, self._principals ) if search_result_actions: result.update( search_result_actions( self._request, doc_types, es_results ) ) if size is not None and size < result['total']: params = [(k, v) for k, v in self._request.params.items() if k != 'limit'] params.append(('limit', 'all')) result['all'] = '%s?%s' % ( self._request.resource_path(self._context), urlencode(params) ) if not result['total']: self._request.response.status_code = 404 result['notification'] = 'No results found' result['@graph'] = [] return result if not self._return_generator else [] result['notification'] = 'Success' if not do_scan: graph = format_results( self._request, es_results['hits']['hits'], result ) if self._return_generator: return graph result['@graph'] = list(graph) return result del query['aggs'] if size is None: hits = scan( self._elastic_search, query=query, index=es_index, preserve_order=False ) else: hits = scan( self._elastic_search, query=query, index=es_index, from_=from_, size=size, preserve_order=False ) graph = format_results(self._request, hits, result) if self._request.__parent__ is not None or self._return_generator: if self._return_generator: return graph result['@graph'] = list(graph) return result app_iter = iter_long_json('@graph', graph, result) self._request.response.content_type = 'application/json' if str is bytes: # Python 2 vs 3 wsgi differences self._request.response.app_iter = app_iter # Python 2 else: self._request.response.app_iter = ( item.encode('utf-8') for item in app_iter ) return self._request.response
def test_set_filters_exists_missing(): request = FakeRequest(( ('field1', '*'), ('field2!', '*'), )) query = { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [], 'must_not': [] } } } result = {'filters': []} used_filters = set_filters(request, query, result) assert used_filters == { 'field1': ['*'], 'field2!': ['*'], } assert query == { 'query': { 'query_string': {} }, 'post_filter': { 'bool': { 'must': [ { 'exists': { 'field': 'embedded.field1' } }, ], 'must_not': [ { 'exists': { 'field': 'embedded.field2' } } ] } } } assert result == { 'filters': [ { 'field': 'field1', 'term': '*', 'remove': '/search/?field2%21=%2A', }, { 'field': 'field2!', 'term': '*', 'remove': '/search/?field1=%2A', } ], }
def preprocess_view(self, views=None, search_result_actions=None): # pylint: disable=too-many-statements, too-many-branches, too-many-locals ''' Main function to construct query and build view results json * Only publicly accessible function ''' types = self._types search_base = normalize_query(self._request) result = { '@context': self._request.route_path('jsonld_context'), '@id': '/search/' + search_base, '@type': ['Search'], 'title': 'Search', 'filters': [], } es_index = RESOURCES_INDEX search_audit = self._request.has_permission('search_audit') from_, size = get_pagination(self._request) search_term = prepare_search_term(self._request) if (hasattr(self._context, 'type_info') and hasattr(self._context.type_info, 'name') and self._context.type_info.name): doc_types = [self._context.type_info.name] else: doc_types = self._request.params.getall('type') if '*' in doc_types: doc_types = ['Item'] # Normalize to item_type try: doc_types = sorted({types[name].name for name in doc_types}) except KeyError: # Check for invalid types bad_types = [t for t in doc_types if t not in types] msg = "Invalid type: {}".format(', '.join(bad_types)) raise HTTPBadRequest(explanation=msg) searchterm_specs = self._request.params.getall('searchTerm') searchterm_only = urlencode([("searchTerm", searchterm) for searchterm in searchterm_specs]) if searchterm_only: clear_qs = searchterm_only else: clear_qs = urlencode([("type", typ) for typ in doc_types]) search_route = self._request.route_path('search', slash='/') clear_route = '?' + clear_qs if clear_qs else '' result['clear_filters'] = search_route + clear_route if not doc_types: if self._request.params.get('mode') == 'picker': doc_types = ['Item'] else: doc_types = self._default_doc_types else: for item_type in doc_types: t_thing = types[item_type] q_thing = urlencode([ (k.encode('utf-8'), v.encode('utf-8')) for k, v in self._request.params.items() if not (k == 'type' and types['Item' if v == '*' else v] is t_thing) ]) result['filters'].append({ 'field': 'type', 'term': t_thing.name, 'remove': '{}?{}'.format(self._request.path, q_thing) }) if views: result['views'] = views search_fields, _ = get_search_fields(self._request, doc_types) query = get_filtered_query( search_term, search_fields, sorted(list_result_fields(self._request, doc_types)), self._principals, doc_types, ) schemas = [types[doc_type].schema for doc_type in doc_types] columns = list_visible_columns_for_schemas(self._request, schemas) if columns: result['columns'] = columns if search_term == '*': del query['query']['query_string'] else: query['query']['query_string']['fields'].extend( ['_all', '*.uuid', '*.md5sum', '*.submitted_file_name']) set_sort_order(self._request, search_term, types, doc_types, query, result) used_filters = set_filters(self._request, query, result) facets = [ ('type', { 'title': 'Data Type' }), ] if len(doc_types) == 1 and 'facets' in types[doc_types[0]].schema: facets.extend(types[doc_types[0]].schema['facets'].items()) for audit_facet in self._audit_facets: if (search_audit and 'group.submitter' in self._principals or 'INTERNAL_ACTION' not in audit_facet[0]): facets.append(audit_facet) query['aggs'] = set_facets(facets, used_filters, self._principals, doc_types) query = sort_query(query) do_scan = size is None or size > 1000 if not self._request.params.get('type') or 'Item' in doc_types: es_index = RESOURCES_INDEX else: es_index = [ types[type_name].item_type for type_name in doc_types if hasattr(types[type_name], 'item_type') ] if do_scan: es_results = self._elastic_search.search( body=query, index=es_index, search_type='query_then_fetch') else: es_results = self._elastic_search.search(body=query, index=es_index, from_=from_, size=size, request_cache=True) total = es_results['hits']['total'] result['total'] = total schemas = (types[item_type].schema for item_type in doc_types) result['facets'] = self._format_facets(es_results, facets, used_filters, schemas, total, self._principals) if search_result_actions: result.update( search_result_actions(self._request, doc_types, es_results)) if size is not None and size < result['total']: params = [(k, v) for k, v in self._request.params.items() if k != 'limit'] params.append(('limit', 'all')) result['all'] = '%s?%s' % (self._request.resource_path( self._context), urlencode(params)) if not result['total']: self._request.response.status_code = 404 result['notification'] = 'No results found' result['@graph'] = [] return result if not self._return_generator else [] result['notification'] = 'Success' if not do_scan: graph = format_results(self._request, es_results['hits']['hits'], result) if self._return_generator: return graph result['@graph'] = list(graph) return result del query['aggs'] if size is None: hits = scan(self._elastic_search, query=query, index=es_index, preserve_order=False) else: hits = scan(self._elastic_search, query=query, index=es_index, from_=from_, size=size, preserve_order=False) graph = format_results(self._request, hits, result) if self._request.__parent__ is not None or self._return_generator: if self._return_generator: return graph result['@graph'] = list(graph) return result app_iter = iter_long_json('@graph', graph, result) self._request.response.content_type = 'application/json' if str is bytes: # Python 2 vs 3 wsgi differences self._request.response.app_iter = app_iter # Python 2 else: self._request.response.app_iter = (item.encode('utf-8') for item in app_iter) return self._request.response