def preprocess_view(self): ''' Main function to construct query and build view results json * Only publicly accessible function ''' matrix_route = self._request.route_path('matrix', slash='/') self._result['@id'] = matrix_route + self._search_base self._result['@type'] = ['Matrix'] self._result['notification'] = '' # TODO: Validate doc types in base class in one location # Now we do it here and in _validate_items type_info = None if len(self._doc_types) == 1: if self._doc_types[0] in self._types: type_info = self._types[self._doc_types[0]] self._schema = type_info.schema self._validate_items(type_info) self._result['title'] = self._set_result_title(type_info) self._result['matrix'] = type_info.factory.matrix.copy() self._matrix = self._result['matrix'] self._matrix['x']['limit'] = self._request.params.get('x.limit', 20) self._matrix['y']['limit'] = self._request.params.get('y.limit', 5) search_route = self._request.route_path('search', slash='/') self._matrix['search_base'] = search_route + self._search_base matrix_route = self._request.route_path('matrix', slash='/') self._matrix[ 'clear_matrix'] = matrix_route + '?type=' + self._doc_types[0] self._result['views'] = self._construct_result_views(type_info) query, used_filters = self._construct_query() es_results = self._elastic_search.search(body=query, index=self._es_index) aggregations = es_results['aggregations'] total = aggregations['matrix']['doc_count'] self._result['matrix']['doc_count'] = total self._result['matrix']['max_cell_doc_count'] = 0 self._result['facets'] = self._format_facets(es_results, self._facets, used_filters, (self._schema, ), total, self._principals) x_grouping = self._matrix['x']['group_by'] y_groupings = self._matrix['y']['group_by'] self._summarize_buckets(aggregations['matrix']['x']['buckets'], aggregations['matrix'], y_groupings + [x_grouping]) self._result['matrix']['y'][y_groupings[0]] = aggregations['matrix'][ y_groupings[0]] self._result['matrix']['x'].update(aggregations['matrix']['x']) self._result.update( search_result_actions(self._request, self._doc_types, es_results)) self._result['total'] = es_results['hits']['total'] if self._result['total']: self._result['notification'] = 'Success' else: self._request.response.status_code = 404 self._result['notification'] = 'No results found' return self._result
def region_search(context, request): """ Search files by region. """ types = request.registry[TYPES] result = { '@id': '/region-search/' + ('?' + request.query_string.split('&referrer')[0] if request.query_string else ''), '@type': ['region-search'], 'title': 'Search by region', 'facets': [], '@graph': [], 'columns': OrderedDict(), 'notification': '', 'filters': [] } principals = effective_principals(request) es = request.registry[ELASTIC_SEARCH] snp_es = request.registry['snp_search'] region = request.params.get('region', '*') region_inside_peak_status = False # handling limit size = request.params.get('limit', 25) if size in ('all', ''): size = 99999 else: try: size = int(size) except ValueError: size = 25 if region == '': region = '*' assembly = request.params.get('genome', '*') result['assembly'] = _GENOME_TO_ALIAS.get(assembly, 'GRCh38') annotation = request.params.get('annotation', '*') chromosome, start, end = ('', '', '') if annotation != '*': if annotation.lower().startswith('ens'): chromosome, start, end = get_ensemblid_coordinates( annotation, assembly) else: chromosome, start, end = get_annotation_coordinates( es, annotation, assembly) elif region != '*': region = region.lower() if region.startswith('rs'): sanitized_region = sanitize_rsid(region) chromosome, start, end = get_rsid_coordinates( sanitized_region, assembly) region_inside_peak_status = True elif region.startswith('ens'): chromosome, start, end = get_ensemblid_coordinates( region, assembly) elif region.startswith('chr'): chromosome, start, end = sanitize_coordinates(region) else: chromosome, start, end = ('', '', '') # Check if there are valid coordinates if not chromosome or not start or not end: result['notification'] = 'No annotations found' return result else: result['coordinates'] = '{chr}:{start}-{end}'.format(chr=chromosome, start=start, end=end) # Search for peaks for the coordinates we got try: # including inner hits is very slow # figure out how to distinguish browser requests from .embed method requests if 'peak_metadata' in request.query_string: peak_query = get_peak_query(start, end, with_inner_hits=True, within_peaks=region_inside_peak_status) else: peak_query = get_peak_query(start, end, within_peaks=region_inside_peak_status) peak_results = snp_es.search(body=peak_query, index=chromosome.lower(), doc_type=_GENOME_TO_ALIAS[assembly], size=99999) except Exception: result['notification'] = 'Error during search' return result file_uuids = [] for hit in peak_results['hits']['hits']: if hit['_id'] not in file_uuids: file_uuids.append(hit['_id']) file_uuids = list(set(file_uuids)) result['notification'] = 'No results found' # if more than one peak found return the experiments with those peak files uuid_count = len(file_uuids) if uuid_count > MAX_CLAUSES_FOR_ES: log.error("REGION_SEARCH WARNING: region with %d file_uuids is being restricted to %d" % \ (uuid_count, MAX_CLAUSES_FOR_ES)) file_uuids = file_uuids[:MAX_CLAUSES_FOR_ES] uuid_count = len(file_uuids) if uuid_count: query = get_filtered_query('', [], set(), principals, ['Experiment']) del query['query'] query['post_filter']['bool']['must'].append( {'terms': { 'embedded.files.uuid': file_uuids }}) used_filters = set_filters(request, query, result) used_filters['files.uuid'] = file_uuids query['aggs'] = set_facets(_FACETS, used_filters, principals, ['Experiment']) schemas = (types[item_type].schema for item_type in ['Experiment']) es_results = es.search(body=query, index='experiment', doc_type='experiment', size=size, request_timeout=60) result['@graph'] = list( format_results(request, es_results['hits']['hits'])) result['total'] = total = es_results['hits']['total'] result['facets'] = BaseView._format_facets(es_results, _FACETS, used_filters, schemas, total, principals) result['peaks'] = list(peak_results['hits']['hits']) result['download_elements'] = get_peak_metadata_links(request) if result['total'] > 0: result['notification'] = 'Success' position_for_browser = format_position(result['coordinates'], 200) result.update( search_result_actions(request, ['Experiment'], es_results, position=position_for_browser)) return result
def preprocess_view(self): ''' Main function to construct query and build view results json * Only publicly accessible function ''' matrix_route = self._request.route_path('matrix', slash='/') result_filters = {'filters': []} # TODO: Validate doc types in base class in one location # Now we do it here and in _validate_items type_info = None if len(self._doc_types) == 1: if self._doc_types[0] in self._types: type_info = self._types[self._doc_types[0]] self._schema = type_info.schema self._validate_items(type_info) matrix = type_info.factory.matrix.copy() matrix['x']['limit'] = self._request.params.get('x.limit', 20) matrix['y']['limit'] = self._request.params.get('y.limit', 5) search_route = self._request.route_path('search', slash='/') matrix['search_base'] = search_route + self._search_base matrix['clear_matrix'] = matrix_route + '?type=' + self._doc_types[0] matrix_x_y = { 'x': matrix['x'], 'y': matrix['y'], } search_query, used_filters = self._construct_query( result_filters, matrix_x_y ) es_results = self._elastic_search.search( body=search_query, index=self._es_index ) aggregations = es_results['aggregations'] aggregations_total = aggregations['matrix']['doc_count'] matrix['doc_count'] = aggregations_total matrix['max_cell_doc_count'] = 0 x_grouping = matrix['x']['group_by'] y_groupings = matrix['y']['group_by'] matrix['y'][y_groupings[0]] = aggregations['matrix'][y_groupings[0]] matrix['x'].update(aggregations['matrix']['x']) self._summarize_buckets( aggregations['matrix']['x']['buckets'], aggregations['matrix'], y_groupings + [x_grouping], matrix ) facets = self._format_facets( es_results, self._facets, used_filters, (self._schema,), aggregations_total, self._principals ) es_total = es_results['hits']['total'] notification = 'Success' if not es_total: self._request.response.status_code = 404 notification = 'No results found' search_result = { '@context': self._request.route_path('jsonld_context'), 'filters': result_filters['filters'], '@id': matrix_route + self._search_base, '@type': ['Matrix'], 'title': self._set_result_title(type_info), 'matrix': matrix, 'views': self._construct_result_views(type_info), 'facets': facets, 'total': es_total, 'notification': notification, } search_result.update( search_result_actions( self._request, self._doc_types, es_results ) ) return search_result
def preprocess_view(self): ''' Main function to construct query and build view results json * Only publicly accessible function ''' matrix_route = self._request.route_path('matrix', slash='/') result_filters = {'filters': []} # TODO: Validate doc types in base class in one location # Now we do it here and in _validate_items type_info = None if len(self._doc_types) == 1: if self._doc_types[0] in self._types: type_info = self._types[self._doc_types[0]] self._schema = type_info.schema self._validate_items(type_info) matrix = type_info.factory.matrix.copy() matrix['x']['limit'] = self._request.params.get('x.limit', 20) matrix['y']['limit'] = self._request.params.get('y.limit', 5) search_route = self._request.route_path('search', slash='/') matrix['search_base'] = search_route + self._search_base matrix['clear_matrix'] = matrix_route + '?type=' + self._doc_types[0] matrix_x_y = { 'x': matrix['x'], 'y': matrix['y'], } search_query, used_filters = self._construct_query( result_filters, matrix_x_y) es_results = self._elastic_search.search(body=search_query, index=self._es_index) aggregations = es_results['aggregations'] aggregations_total = aggregations['matrix']['doc_count'] matrix['doc_count'] = aggregations_total matrix['max_cell_doc_count'] = 0 x_grouping = matrix['x']['group_by'] y_groupings = matrix['y']['group_by'] matrix['y'][y_groupings[0]] = aggregations['matrix'][y_groupings[0]] matrix['x'].update(aggregations['matrix']['x']) self._summarize_buckets(aggregations['matrix']['x']['buckets'], aggregations['matrix'], y_groupings + [x_grouping], matrix) facets = self._format_facets(es_results, self._facets, used_filters, (self._schema, ), aggregations_total, self._principals) es_total = es_results['hits']['total'] notification = 'Success' if not es_total: self._request.response.status_code = 404 notification = 'No results found' search_result = { '@context': self._request.route_path('jsonld_context'), 'filters': result_filters['filters'], '@id': matrix_route + self._search_base, '@type': ['Matrix'], 'title': self._set_result_title(type_info), 'matrix': matrix, 'views': self._construct_result_views(type_info), 'facets': facets, 'total': es_total, 'notification': notification, } search_result.update( search_result_actions(self._request, self._doc_types, es_results)) return search_result
def preprocess_view(self): ''' Main function to construct query and build view results json * Only publicly accessible function ''' audit_route = self._request.route_path('audit', slash='/') self._result['@id'] = audit_route + self._search_base self._result['@type'] = ['AuditMatrix'] self._result['notification'] = '' # TODO: Validate doc types in base class in one location # Now we do it here and in _validate_items type_info = None if len(self._doc_types) == 1: if self._doc_types[0] in self._types: type_info = self._types[self._doc_types[0]] self._schema = type_info.schema self._validate_items(type_info) self._result['title'] = self._set_result_title(type_info) # Because the formatting of the query edits the sub-objects of the matrix, we need to # deepcopy the matrix so the original type_info.factory.matrix is not modified, allowing # /matrix to get the correct data and to not be able to access the /audit data. self._result['matrix'] = copy.deepcopy(type_info.factory.matrix) self._matrix = self._result['matrix'] self._matrix['x']['limit'] = self._request.params.get('x.limit', 20) self._matrix['y']['limit'] = self._request.params.get('y.limit', 5) search_route = self._request.route_path('search', slash='/') self._matrix['search_base'] = search_route + self._search_base self._matrix['clear_matrix'] = '{}?type={}'.format( self._request.route_path('audit', slash='/'), self._doc_types[0], ) self._result['views'] = [ self._view_item.result_list, self._view_item.tabular_report ] query, audit_field_list, used_filters = self._construct_query() es_results = self._elastic_search.search(body=query, index=self._es_index) aggregations = es_results['aggregations'] total = aggregations['matrix']['doc_count'] self._result['matrix']['doc_count'] = total self._result['matrix']['max_cell_doc_count'] = 0 self._result['facets'] = self._format_facets( es_results, self._facets, used_filters, (self._schema,), total, self._principals ) doc_type = self._doc_types[0].lower() bucket_key = ('annotation_type' if doc_type == 'annotation' else 'assay_title') self._summarize_buckets( aggregations['matrix']['x']['buckets'], aggregations['matrix'], audit_field_list, bucket_key) self._summarize_no_audits( aggregations['matrix']['x']['buckets'], aggregations['matrix'], self._no_audits_groupings, aggregations['matrix'], bucket_key) self._update_aggregations(aggregations) self._result.update( search_result_actions( self._request, self._doc_types, es_results ) ) self._result['total'] = es_results['hits']['total'] if self._result['total']: self._result['notification'] = 'Success' else: self._request.response.status_code = 404 self._result['notification'] = 'No results found' return self._result
def region_search(context, request): """ Search files by region. """ types = request.registry[TYPES] result = { '@id': '/region-search/' + ('?' + request.query_string.split('&referrer')[0] if request.query_string else ''), '@type': ['region-search'], 'title': 'Search by region', 'facets': [], '@graph': [], 'columns': OrderedDict(), 'notification': '', 'filters': [] } principals = effective_principals(request) es = request.registry[ELASTIC_SEARCH] snp_es = request.registry['snp_search'] region = request.params.get('region', '*') region_inside_peak_status = False # handling limit size = request.params.get('limit', 25) if size in ('all', ''): size = 99999 else: try: size = int(size) except ValueError: size = 25 if region == '': region = '*' assembly = request.params.get('genome', '*') result['assembly'] = _GENOME_TO_ALIAS.get(assembly,'GRCh38') annotation = request.params.get('annotation', '*') chromosome, start, end = ('', '', '') if annotation != '*': if annotation.lower().startswith('ens'): chromosome, start, end = get_ensemblid_coordinates(annotation, assembly) else: chromosome, start, end = get_annotation_coordinates(es, annotation, assembly) elif region != '*': region = region.lower() if region.startswith('rs'): sanitized_region = sanitize_rsid(region) chromosome, start, end = get_rsid_coordinates(sanitized_region, assembly) region_inside_peak_status = True elif region.startswith('ens'): chromosome, start, end = get_ensemblid_coordinates(region, assembly) elif region.startswith('chr'): chromosome, start, end = sanitize_coordinates(region) else: chromosome, start, end = ('', '', '') # Check if there are valid coordinates if not chromosome or not start or not end: result['notification'] = 'No annotations found' return result else: result['coordinates'] = '{chr}:{start}-{end}'.format( chr=chromosome, start=start, end=end ) # Search for peaks for the coordinates we got try: # including inner hits is very slow # figure out how to distinguish browser requests from .embed method requests if 'peak_metadata' in request.query_string: peak_query = get_peak_query(start, end, with_inner_hits=True, within_peaks=region_inside_peak_status) else: peak_query = get_peak_query(start, end, within_peaks=region_inside_peak_status) peak_results = snp_es.search(body=peak_query, index=chromosome.lower(), doc_type=_GENOME_TO_ALIAS[assembly], size=99999) except Exception: result['notification'] = 'Error during search' return result file_uuids = [] for hit in peak_results['hits']['hits']: if hit['_id'] not in file_uuids: file_uuids.append(hit['_id']) file_uuids = list(set(file_uuids)) result['notification'] = 'No results found' # if more than one peak found return the experiments with those peak files uuid_count = len(file_uuids) if uuid_count > MAX_CLAUSES_FOR_ES: log.error("REGION_SEARCH WARNING: region with %d file_uuids is being restricted to %d" % \ (uuid_count, MAX_CLAUSES_FOR_ES)) file_uuids = file_uuids[:MAX_CLAUSES_FOR_ES] uuid_count = len(file_uuids) if uuid_count: query = get_filtered_query('', [], set(), principals, ['Experiment']) del query['query'] query['post_filter']['bool']['must'].append({ 'terms': { 'embedded.files.uuid': file_uuids } }) used_filters = set_filters(request, query, result) used_filters['files.uuid'] = file_uuids query['aggs'] = set_facets(_FACETS, used_filters, principals, ['Experiment']) schemas = (types[item_type].schema for item_type in ['Experiment']) es_results = es.search( body=query, index='experiment', doc_type='experiment', size=size, request_timeout=60 ) result['@graph'] = list(format_results(request, es_results['hits']['hits'])) result['total'] = total = es_results['hits']['total'] result['facets'] = BaseView._format_facets(es_results, _FACETS, used_filters, schemas, total, principals) result['peaks'] = list(peak_results['hits']['hits']) result['download_elements'] = get_peak_metadata_links(request) if result['total'] > 0: result['notification'] = 'Success' position_for_browser = format_position(result['coordinates'], 200) result.update(search_result_actions(request, ['Experiment'], es_results, position=position_for_browser)) return result
def preprocess_view(self): ''' Main function to construct query and build view results json * Only publicly accessible function ''' audit_route = self._request.route_path('audit', slash='/') self._result['@id'] = audit_route + self._search_base self._result['@type'] = ['AuditMatrix'] self._result['notification'] = '' # TODO: Validate doc types in base class in one location # Now we do it here and in _validate_items type_info = None if len(self._doc_types) == 1: if self._doc_types[0] in self._types: type_info = self._types[self._doc_types[0]] self._schema = type_info.schema self._validate_items(type_info) self._result['title'] = self._set_result_title(type_info) # Because the formatting of the query edits the sub-objects of the matrix, we need to # deepcopy the matrix so the original type_info.factory.matrix is not modified, allowing # /matrix to get the correct data and to not be able to access the /audit data. self._result['matrix'] = copy.deepcopy(type_info.factory.matrix) self._matrix = self._result['matrix'] self._matrix['x']['limit'] = self._request.params.get('x.limit', 20) self._matrix['y']['limit'] = self._request.params.get('y.limit', 5) search_route = self._request.route_path('search', slash='/') self._matrix['search_base'] = search_route + self._search_base self._matrix['clear_matrix'] = '{}?type={}'.format( self._request.route_path('matrix', slash='/'), self._doc_types[0], ) self._result['views'] = [ self._view_item.result_list, self._view_item.tabular_report ] query, audit_field_list, used_filters = self._construct_query() es_results = self._elastic_search.search(body=query, index=self._es_index) aggregations = es_results['aggregations'] total = aggregations['matrix']['doc_count'] self._result['matrix']['doc_count'] = total self._result['matrix']['max_cell_doc_count'] = 0 self._result['facets'] = self._format_facets(es_results, self._facets, used_filters, (self._schema, ), total, self._principals) self._summarize_buckets(aggregations['matrix']['x']['buckets'], aggregations['matrix'], audit_field_list) self._summarize_no_audits(aggregations['matrix']['x']['buckets'], aggregations['matrix'], self._no_audits_groupings, aggregations['matrix']) self._update_aggregations(aggregations) self._result.update( search_result_actions(self._request, self._doc_types, es_results)) self._result['total'] = es_results['hits']['total'] if self._result['total']: self._result['notification'] = 'Success' else: self._request.response.status_code = 404 self._result['notification'] = 'No results found' return self._result