def test_get_search_fields(registry): request = FakeRequest((('key', 'value'),)) request.registry = registry doc_types = [['Experiment']] for doc_type in doc_types: fields, highlights = get_search_fields(request, doc_type) assert isinstance(highlights, dict) is True assert isinstance(fields, list) is True
def _construct_query(self): '''Helper method for preprocessing view''' search_fields, _ = get_search_fields(self._request, self._doc_types) query = get_filtered_query(self._search_term, search_fields, [], self._principals, self._doc_types) if self._search_term == '*': del query['query']['query_string'] else: query['query']['query_string']['fields'].extend( ['_all', '*.uuid', '*.md5sum', '*.submitted_file_name']) used_filters = self._set_query_aggs(query) return query, used_filters
def _construct_query(self, result_filters, matrix_x_y): '''Helper method for preprocessing view''' search_fields, _ = get_search_fields(self._request, self._doc_types) query = get_filtered_query( self._search_term, search_fields, [], self._principals, self._doc_types ) if self._search_term == '*': del query['query']['query_string'] else: query['query']['query_string']['fields'].extend( ['_all', '*.uuid', '*.md5sum', '*.submitted_file_name'] ) used_filters = self._set_query_aggs(query, result_filters, matrix_x_y) return query, used_filters
def preprocess_view(self, views=None, search_result_actions=None): # pylint: disable=too-many-statements, too-many-branches, too-many-locals ''' Main function to construct query and build view results json * Only publicly accessible function ''' types = self._types search_base = normalize_query(self._request) result = { '@context': self._request.route_path('jsonld_context'), '@id': '/search/' + search_base, '@type': ['Search'], 'title': 'Search', 'filters': [], } es_index = RESOURCES_INDEX search_audit = self._request.has_permission('search_audit') from_, size = get_pagination(self._request) search_term = prepare_search_term(self._request) if (hasattr(self._context, 'type_info') and hasattr(self._context.type_info, 'name') and self._context.type_info.name): doc_types = [self._context.type_info.name] else: doc_types = self._request.params.getall('type') if '*' in doc_types: doc_types = ['Item'] # Normalize to item_type try: doc_types = sorted({types[name].name for name in doc_types}) except KeyError: # Check for invalid types bad_types = [t for t in doc_types if t not in types] msg = "Invalid type: {}".format(', '.join(bad_types)) raise HTTPBadRequest(explanation=msg) searchterm_specs = self._request.params.getall('searchTerm') searchterm_only = urlencode([("searchTerm", searchterm) for searchterm in searchterm_specs]) if searchterm_only: clear_qs = searchterm_only else: clear_qs = urlencode([("type", typ) for typ in doc_types]) search_route = self._request.route_path('search', slash='/') clear_route = '?' + clear_qs if clear_qs else '' result['clear_filters'] = search_route + clear_route if not doc_types: if self._request.params.get('mode') == 'picker': doc_types = ['Item'] else: doc_types = self._default_doc_types else: for item_type in doc_types: t_thing = types[item_type] q_thing = urlencode([ (k.encode('utf-8'), v.encode('utf-8')) for k, v in self._request.params.items() if not (k == 'type' and types['Item' if v == '*' else v] is t_thing) ]) result['filters'].append({ 'field': 'type', 'term': t_thing.name, 'remove': '{}?{}'.format(self._request.path, q_thing) }) if views: result['views'] = views search_fields, _ = get_search_fields(self._request, doc_types) query = get_filtered_query( search_term, search_fields, sorted(list_result_fields(self._request, doc_types)), self._principals, doc_types, ) schemas = [types[doc_type].schema for doc_type in doc_types] columns = list_visible_columns_for_schemas(self._request, schemas) if columns: result['columns'] = columns if search_term == '*': del query['query']['query_string'] else: query['query']['query_string']['fields'].extend( ['_all', '*.uuid', '*.md5sum', '*.submitted_file_name']) set_sort_order(self._request, search_term, types, doc_types, query, result) used_filters = set_filters(self._request, query, result) facets = [ ('type', { 'title': 'Data Type' }), ] if len(doc_types) == 1 and 'facets' in types[doc_types[0]].schema: facets.extend(types[doc_types[0]].schema['facets'].items()) for audit_facet in self._audit_facets: if (search_audit and 'group.submitter' in self._principals or 'INTERNAL_ACTION' not in audit_facet[0]): facets.append(audit_facet) query['aggs'] = set_facets(facets, used_filters, self._principals, doc_types) query = sort_query(query) do_scan = size is None or size > 1000 if not self._request.params.get('type') or 'Item' in doc_types: es_index = RESOURCES_INDEX else: es_index = [ types[type_name].item_type for type_name in doc_types if hasattr(types[type_name], 'item_type') ] if do_scan: es_results = self._elastic_search.search( body=query, index=es_index, search_type='query_then_fetch') else: es_results = self._elastic_search.search(body=query, index=es_index, from_=from_, size=size, request_cache=True) total = es_results['hits']['total'] result['total'] = total schemas = (types[item_type].schema for item_type in doc_types) result['facets'] = self._format_facets(es_results, facets, used_filters, schemas, total, self._principals) if search_result_actions: result.update( search_result_actions(self._request, doc_types, es_results)) if size is not None and size < result['total']: params = [(k, v) for k, v in self._request.params.items() if k != 'limit'] params.append(('limit', 'all')) result['all'] = '%s?%s' % (self._request.resource_path( self._context), urlencode(params)) if not result['total']: self._request.response.status_code = 404 result['notification'] = 'No results found' result['@graph'] = [] return result if not self._return_generator else [] result['notification'] = 'Success' if not do_scan: graph = format_results(self._request, es_results['hits']['hits'], result) if self._return_generator: return graph result['@graph'] = list(graph) return result del query['aggs'] if size is None: hits = scan(self._elastic_search, query=query, index=es_index, preserve_order=False) else: hits = scan(self._elastic_search, query=query, index=es_index, from_=from_, size=size, preserve_order=False) graph = format_results(self._request, hits, result) if self._request.__parent__ is not None or self._return_generator: if self._return_generator: return graph result['@graph'] = list(graph) return result app_iter = iter_long_json('@graph', graph, result) self._request.response.content_type = 'application/json' if str is bytes: # Python 2 vs 3 wsgi differences self._request.response.app_iter = app_iter # Python 2 else: self._request.response.app_iter = (item.encode('utf-8') for item in app_iter) return self._request.response
def preprocess_view(self): ''' Main function to construct query and build view results json * Only publicly accessible function ''' self._result['@id'] = '/news/' + self._search_base self._result['@type'] = ['News'] self._result['notification'] = '' doc_types = ['Page'] search_fields, _ = get_search_fields(self._request, doc_types) query = get_filtered_query( '*', search_fields, sorted(list_result_fields(self._request, doc_types)), self._principals, doc_types ) del query['query']['query_string'] sort = OrderedDict() result_sort = OrderedDict() sort['embedded.date_created'] = result_sort['date_created'] = { 'order': 'desc', 'unmapped_type': 'keyword', } query['sort'] = result_sort self._result['sort'] = result_sort used_filters = set_filters( self._request, query, self._result, [('type', 'Page'), ('news', 'true'), ('status', 'released')] ) facets = [] if len(doc_types) == 1 and 'facets' in self._types[doc_types[0]].schema: facets.extend(self._types[doc_types[0]].schema['facets'].items()) query['aggs'] = set_facets(facets, used_filters, self._principals, doc_types) es_results = self._elastic_search.search( body=query, index=self._es_index, doc_type=self._es_index, from_=0, size=25) total = es_results['hits']['total'] if not total: self._request.response.status_code = 404 self._result['notification'] = 'No results found' self._result['@graph'] = [] return self._result self._result['notification'] = 'Success' self._result['total'] = total graph = format_results( self._request, es_results['hits']['hits'], self._result ) self._result['@graph'] = list(graph) schemas = [ self._types[doc_type].schema for doc_type in doc_types ] self._result['facets'] = self._format_facets( es_results, facets, used_filters, schemas, total, self._principals ) return self._result
def preprocess_view(self, views=None, search_result_actions=None): # pylint: disable=too-many-statements, too-many-branches, too-many-locals ''' Main function to construct query and build view results json * Only publicly accessible function ''' types = self._types search_base = normalize_query(self._request) result = { '@context': self._request.route_path('jsonld_context'), '@id': '/search/' + search_base, '@type': ['Search'], 'title': 'Search', 'filters': [], } es_index = RESOURCES_INDEX search_audit = self._request.has_permission('search_audit') from_, size = get_pagination(self._request) search_term = prepare_search_term(self._request) if ( hasattr(self._context, 'type_info') and hasattr(self._context.type_info, 'name') and self._context.type_info.name ): doc_types = [self._context.type_info.name] else: doc_types = self._request.params.getall('type') if '*' in doc_types: doc_types = ['Item'] # Normalize to item_type try: doc_types = sorted({types[name].name for name in doc_types}) except KeyError: # Check for invalid types bad_types = [t for t in doc_types if t not in types] msg = "Invalid type: {}".format(', '.join(bad_types)) raise HTTPBadRequest(explanation=msg) searchterm_specs = self._request.params.getall('searchTerm') searchterm_only = urlencode( [ ("searchTerm", searchterm) for searchterm in searchterm_specs ] ) if searchterm_only: clear_qs = searchterm_only else: clear_qs = urlencode([("type", typ) for typ in doc_types]) search_route = self._request.route_path('search', slash='/') clear_route = '?' + clear_qs if clear_qs else '' result['clear_filters'] = search_route + clear_route if not doc_types: if self._request.params.get('mode') == 'picker': doc_types = ['Item'] else: doc_types = self._default_doc_types else: for item_type in doc_types: t_thing = types[item_type] q_thing = urlencode( [ (k.encode('utf-8'), v.encode('utf-8')) for k, v in self._request.params.items() if not (k == 'type' and types['Item' if v == '*' else v] is t_thing) ] ) result['filters'].append({ 'field': 'type', 'term': t_thing.name, 'remove': '{}?{}'.format(self._request.path, q_thing) }) if views: result['views'] = views search_fields, _ = get_search_fields(self._request, doc_types) query = get_filtered_query( search_term, search_fields, sorted(list_result_fields(self._request, doc_types)), self._principals, doc_types, ) schemas = [types[doc_type].schema for doc_type in doc_types] columns = list_visible_columns_for_schemas(self._request, schemas) if columns: result['columns'] = columns if search_term == '*': del query['query']['query_string'] else: query['query']['query_string']['fields'].extend( ['_all', '*.uuid', '*.md5sum', '*.submitted_file_name'] ) set_sort_order(self._request, search_term, types, doc_types, query, result) used_filters = set_filters(self._request, query, result) facets = [ ('type', {'title': 'Data Type'}), ] if len(doc_types) == 1 and 'facets' in types[doc_types[0]].schema: facets.extend(types[doc_types[0]].schema['facets'].items()) for audit_facet in self._audit_facets: if ( search_audit and 'group.submitter' in self._principals or 'INTERNAL_ACTION' not in audit_facet[0] ): facets.append(audit_facet) query['aggs'] = set_facets(facets, used_filters, self._principals, doc_types) query = sort_query(query) do_scan = size is None or size > 1000 if not self._request.params.get('type') or 'Item' in doc_types: es_index = RESOURCES_INDEX else: es_index = [ types[type_name].item_type for type_name in doc_types if hasattr(types[type_name], 'item_type') ] if do_scan: es_results = self._elastic_search.search( body=query, index=es_index, search_type='query_then_fetch' ) else: es_results = self._elastic_search.search( body=query, index=es_index, from_=from_, size=size, request_cache=True ) total = es_results['hits']['total'] result['total'] = total schemas = (types[item_type].schema for item_type in doc_types) result['facets'] = self._format_facets( es_results, facets, used_filters, schemas, total, self._principals ) if search_result_actions: result.update( search_result_actions( self._request, doc_types, es_results ) ) if size is not None and size < result['total']: params = [(k, v) for k, v in self._request.params.items() if k != 'limit'] params.append(('limit', 'all')) result['all'] = '%s?%s' % ( self._request.resource_path(self._context), urlencode(params) ) if not result['total']: self._request.response.status_code = 404 result['notification'] = 'No results found' result['@graph'] = [] return result if not self._return_generator else [] result['notification'] = 'Success' if not do_scan: graph = format_results( self._request, es_results['hits']['hits'], result ) if self._return_generator: return graph result['@graph'] = list(graph) return result del query['aggs'] if size is None: hits = scan( self._elastic_search, query=query, index=es_index, preserve_order=False ) else: hits = scan( self._elastic_search, query=query, index=es_index, from_=from_, size=size, preserve_order=False ) graph = format_results(self._request, hits, result) if self._request.__parent__ is not None or self._return_generator: if self._return_generator: return graph result['@graph'] = list(graph) return result app_iter = iter_long_json('@graph', graph, result) self._request.response.content_type = 'application/json' if str is bytes: # Python 2 vs 3 wsgi differences self._request.response.app_iter = app_iter # Python 2 else: self._request.response.app_iter = ( item.encode('utf-8') for item in app_iter ) return self._request.response