예제 #1
0
 def preprocess_view(self):
     '''
     Main function to construct query and build view results json
     * Only publicly accessible function
     '''
     matrix_route = self._request.route_path('matrix', slash='/')
     self._result['@id'] = matrix_route + self._search_base
     self._result['@type'] = ['Matrix']
     self._result['notification'] = ''
     # TODO: Validate doc types in base class in one location
     # Now we do it here and in _validate_items
     type_info = None
     if len(self._doc_types) == 1:
         if self._doc_types[0] in self._types:
             type_info = self._types[self._doc_types[0]]
             self._schema = type_info.schema
     self._validate_items(type_info)
     self._result['title'] = self._set_result_title(type_info)
     self._result['matrix'] = type_info.factory.matrix.copy()
     self._matrix = self._result['matrix']
     self._matrix['x']['limit'] = self._request.params.get('x.limit', 20)
     self._matrix['y']['limit'] = self._request.params.get('y.limit', 5)
     search_route = self._request.route_path('search', slash='/')
     self._matrix['search_base'] = search_route + self._search_base
     matrix_route = self._request.route_path('matrix', slash='/')
     self._matrix[
         'clear_matrix'] = matrix_route + '?type=' + self._doc_types[0]
     self._result['views'] = self._construct_result_views(type_info)
     query, used_filters = self._construct_query()
     es_results = self._elastic_search.search(body=query,
                                              index=self._es_index)
     aggregations = es_results['aggregations']
     total = aggregations['matrix']['doc_count']
     self._result['matrix']['doc_count'] = total
     self._result['matrix']['max_cell_doc_count'] = 0
     self._result['facets'] = self._format_facets(es_results, self._facets,
                                                  used_filters,
                                                  (self._schema, ), total,
                                                  self._principals)
     x_grouping = self._matrix['x']['group_by']
     y_groupings = self._matrix['y']['group_by']
     self._summarize_buckets(aggregations['matrix']['x']['buckets'],
                             aggregations['matrix'],
                             y_groupings + [x_grouping])
     self._result['matrix']['y'][y_groupings[0]] = aggregations['matrix'][
         y_groupings[0]]
     self._result['matrix']['x'].update(aggregations['matrix']['x'])
     self._result.update(
         search_result_actions(self._request, self._doc_types, es_results))
     self._result['total'] = es_results['hits']['total']
     if self._result['total']:
         self._result['notification'] = 'Success'
     else:
         self._request.response.status_code = 404
         self._result['notification'] = 'No results found'
     return self._result
예제 #2
0
def region_search(context, request):
    """
    Search files by region.
    """
    types = request.registry[TYPES]
    result = {
        '@id':
        '/region-search/' + ('?' + request.query_string.split('&referrer')[0]
                             if request.query_string else ''),
        '@type': ['region-search'],
        'title':
        'Search by region',
        'facets': [],
        '@graph': [],
        'columns':
        OrderedDict(),
        'notification':
        '',
        'filters': []
    }
    principals = effective_principals(request)
    es = request.registry[ELASTIC_SEARCH]
    snp_es = request.registry['snp_search']
    region = request.params.get('region', '*')
    region_inside_peak_status = False

    # handling limit
    size = request.params.get('limit', 25)
    if size in ('all', ''):
        size = 99999
    else:
        try:
            size = int(size)
        except ValueError:
            size = 25
    if region == '':
        region = '*'

    assembly = request.params.get('genome', '*')
    result['assembly'] = _GENOME_TO_ALIAS.get(assembly, 'GRCh38')
    annotation = request.params.get('annotation', '*')
    chromosome, start, end = ('', '', '')

    if annotation != '*':
        if annotation.lower().startswith('ens'):
            chromosome, start, end = get_ensemblid_coordinates(
                annotation, assembly)
        else:
            chromosome, start, end = get_annotation_coordinates(
                es, annotation, assembly)
    elif region != '*':
        region = region.lower()
        if region.startswith('rs'):
            sanitized_region = sanitize_rsid(region)
            chromosome, start, end = get_rsid_coordinates(
                sanitized_region, assembly)
            region_inside_peak_status = True
        elif region.startswith('ens'):
            chromosome, start, end = get_ensemblid_coordinates(
                region, assembly)
        elif region.startswith('chr'):
            chromosome, start, end = sanitize_coordinates(region)
    else:
        chromosome, start, end = ('', '', '')
    # Check if there are valid coordinates
    if not chromosome or not start or not end:
        result['notification'] = 'No annotations found'
        return result
    else:
        result['coordinates'] = '{chr}:{start}-{end}'.format(chr=chromosome,
                                                             start=start,
                                                             end=end)

    # Search for peaks for the coordinates we got
    try:
        # including inner hits is very slow
        # figure out how to distinguish browser requests from .embed method requests
        if 'peak_metadata' in request.query_string:
            peak_query = get_peak_query(start,
                                        end,
                                        with_inner_hits=True,
                                        within_peaks=region_inside_peak_status)
        else:
            peak_query = get_peak_query(start,
                                        end,
                                        within_peaks=region_inside_peak_status)
        peak_results = snp_es.search(body=peak_query,
                                     index=chromosome.lower(),
                                     doc_type=_GENOME_TO_ALIAS[assembly],
                                     size=99999)
    except Exception:
        result['notification'] = 'Error during search'
        return result
    file_uuids = []
    for hit in peak_results['hits']['hits']:
        if hit['_id'] not in file_uuids:
            file_uuids.append(hit['_id'])
    file_uuids = list(set(file_uuids))
    result['notification'] = 'No results found'

    # if more than one peak found return the experiments with those peak files
    uuid_count = len(file_uuids)
    if uuid_count > MAX_CLAUSES_FOR_ES:
        log.error("REGION_SEARCH WARNING: region with %d file_uuids is being restricted to %d" % \
                                                            (uuid_count, MAX_CLAUSES_FOR_ES))
        file_uuids = file_uuids[:MAX_CLAUSES_FOR_ES]
        uuid_count = len(file_uuids)

    if uuid_count:
        query = get_filtered_query('', [], set(), principals, ['Experiment'])
        del query['query']
        query['post_filter']['bool']['must'].append(
            {'terms': {
                'embedded.files.uuid': file_uuids
            }})
        used_filters = set_filters(request, query, result)
        used_filters['files.uuid'] = file_uuids
        query['aggs'] = set_facets(_FACETS, used_filters, principals,
                                   ['Experiment'])
        schemas = (types[item_type].schema for item_type in ['Experiment'])
        es_results = es.search(body=query,
                               index='experiment',
                               doc_type='experiment',
                               size=size,
                               request_timeout=60)
        result['@graph'] = list(
            format_results(request, es_results['hits']['hits']))
        result['total'] = total = es_results['hits']['total']
        result['facets'] = BaseView._format_facets(es_results, _FACETS,
                                                   used_filters, schemas,
                                                   total, principals)
        result['peaks'] = list(peak_results['hits']['hits'])
        result['download_elements'] = get_peak_metadata_links(request)
        if result['total'] > 0:
            result['notification'] = 'Success'
            position_for_browser = format_position(result['coordinates'], 200)
            result.update(
                search_result_actions(request, ['Experiment'],
                                      es_results,
                                      position=position_for_browser))

    return result
예제 #3
0
 def preprocess_view(self):
     '''
     Main function to construct query and build view results json
     * Only publicly accessible function
     '''
     matrix_route = self._request.route_path('matrix', slash='/')
     result_filters = {'filters': []}
     # TODO: Validate doc types in base class in one location
     # Now we do it here and in _validate_items
     type_info = None
     if len(self._doc_types) == 1:
         if self._doc_types[0] in self._types:
             type_info = self._types[self._doc_types[0]]
             self._schema = type_info.schema
     self._validate_items(type_info)
     matrix = type_info.factory.matrix.copy()
     matrix['x']['limit'] = self._request.params.get('x.limit', 20)
     matrix['y']['limit'] = self._request.params.get('y.limit', 5)
     search_route = self._request.route_path('search', slash='/')
     matrix['search_base'] = search_route + self._search_base
     matrix['clear_matrix'] = matrix_route + '?type=' + self._doc_types[0]
     matrix_x_y = {
         'x': matrix['x'],
         'y': matrix['y'],
     }
     search_query, used_filters = self._construct_query(
         result_filters,
         matrix_x_y
     )
     es_results = self._elastic_search.search(
         body=search_query,
         index=self._es_index
     )
     aggregations = es_results['aggregations']
     aggregations_total = aggregations['matrix']['doc_count']
     matrix['doc_count'] = aggregations_total
     matrix['max_cell_doc_count'] = 0
     x_grouping = matrix['x']['group_by']
     y_groupings = matrix['y']['group_by']
     matrix['y'][y_groupings[0]] = aggregations['matrix'][y_groupings[0]]
     matrix['x'].update(aggregations['matrix']['x'])
     self._summarize_buckets(
         aggregations['matrix']['x']['buckets'],
         aggregations['matrix'],
         y_groupings + [x_grouping],
         matrix
     )
     facets = self._format_facets(
         es_results,
         self._facets,
         used_filters,
         (self._schema,),
         aggregations_total,
         self._principals
     )
     es_total = es_results['hits']['total']
     notification = 'Success'
     if not es_total:
         self._request.response.status_code = 404
         notification = 'No results found'
     search_result = {
         '@context': self._request.route_path('jsonld_context'),
         'filters': result_filters['filters'],
         '@id': matrix_route + self._search_base,
         '@type': ['Matrix'],
         'title': self._set_result_title(type_info),
         'matrix': matrix,
         'views': self._construct_result_views(type_info),
         'facets': facets,
         'total': es_total,
         'notification': notification,
     }
     search_result.update(
         search_result_actions(
             self._request,
             self._doc_types,
             es_results
         )
     )
     return search_result
예제 #4
0
 def preprocess_view(self):
     '''
     Main function to construct query and build view results json
     * Only publicly accessible function
     '''
     matrix_route = self._request.route_path('matrix', slash='/')
     result_filters = {'filters': []}
     # TODO: Validate doc types in base class in one location
     # Now we do it here and in _validate_items
     type_info = None
     if len(self._doc_types) == 1:
         if self._doc_types[0] in self._types:
             type_info = self._types[self._doc_types[0]]
             self._schema = type_info.schema
     self._validate_items(type_info)
     matrix = type_info.factory.matrix.copy()
     matrix['x']['limit'] = self._request.params.get('x.limit', 20)
     matrix['y']['limit'] = self._request.params.get('y.limit', 5)
     search_route = self._request.route_path('search', slash='/')
     matrix['search_base'] = search_route + self._search_base
     matrix['clear_matrix'] = matrix_route + '?type=' + self._doc_types[0]
     matrix_x_y = {
         'x': matrix['x'],
         'y': matrix['y'],
     }
     search_query, used_filters = self._construct_query(
         result_filters, matrix_x_y)
     es_results = self._elastic_search.search(body=search_query,
                                              index=self._es_index)
     aggregations = es_results['aggregations']
     aggregations_total = aggregations['matrix']['doc_count']
     matrix['doc_count'] = aggregations_total
     matrix['max_cell_doc_count'] = 0
     x_grouping = matrix['x']['group_by']
     y_groupings = matrix['y']['group_by']
     matrix['y'][y_groupings[0]] = aggregations['matrix'][y_groupings[0]]
     matrix['x'].update(aggregations['matrix']['x'])
     self._summarize_buckets(aggregations['matrix']['x']['buckets'],
                             aggregations['matrix'],
                             y_groupings + [x_grouping], matrix)
     facets = self._format_facets(es_results, self._facets, used_filters,
                                  (self._schema, ), aggregations_total,
                                  self._principals)
     es_total = es_results['hits']['total']
     notification = 'Success'
     if not es_total:
         self._request.response.status_code = 404
         notification = 'No results found'
     search_result = {
         '@context': self._request.route_path('jsonld_context'),
         'filters': result_filters['filters'],
         '@id': matrix_route + self._search_base,
         '@type': ['Matrix'],
         'title': self._set_result_title(type_info),
         'matrix': matrix,
         'views': self._construct_result_views(type_info),
         'facets': facets,
         'total': es_total,
         'notification': notification,
     }
     search_result.update(
         search_result_actions(self._request, self._doc_types, es_results))
     return search_result
예제 #5
0
 def preprocess_view(self):
     '''
     Main function to construct query and build view results json
     * Only publicly accessible function
     '''
     audit_route = self._request.route_path('audit', slash='/')
     self._result['@id'] = audit_route + self._search_base
     self._result['@type'] = ['AuditMatrix']
     self._result['notification'] = ''
     # TODO: Validate doc types in base class in one location
     # Now we do it here and in _validate_items
     type_info = None
     if len(self._doc_types) == 1:
         if self._doc_types[0] in self._types:
             type_info = self._types[self._doc_types[0]]
             self._schema = type_info.schema
     self._validate_items(type_info)
     self._result['title'] = self._set_result_title(type_info)
     # Because the formatting of the query edits the sub-objects of the matrix, we need to
     # deepcopy the matrix so the original type_info.factory.matrix is not modified, allowing
     # /matrix to get the correct data and to not be able to access the /audit data.
     self._result['matrix'] = copy.deepcopy(type_info.factory.matrix)
     self._matrix = self._result['matrix']
     self._matrix['x']['limit'] = self._request.params.get('x.limit', 20)
     self._matrix['y']['limit'] = self._request.params.get('y.limit', 5)
     search_route = self._request.route_path('search', slash='/')
     self._matrix['search_base'] = search_route + self._search_base
     self._matrix['clear_matrix'] = '{}?type={}'.format(
         self._request.route_path('audit', slash='/'),
         self._doc_types[0],
     )
     self._result['views'] = [
         self._view_item.result_list,
         self._view_item.tabular_report
     ]
     query, audit_field_list, used_filters = self._construct_query()
     es_results = self._elastic_search.search(body=query, index=self._es_index)
     aggregations = es_results['aggregations']
     total = aggregations['matrix']['doc_count']
     self._result['matrix']['doc_count'] = total
     self._result['matrix']['max_cell_doc_count'] = 0
     self._result['facets'] = self._format_facets(
         es_results,
         self._facets,
         used_filters,
         (self._schema,),
         total,
         self._principals
     )
     doc_type = self._doc_types[0].lower()
     bucket_key = ('annotation_type' if doc_type == 'annotation' else 'assay_title')
     self._summarize_buckets(
         aggregations['matrix']['x']['buckets'],
         aggregations['matrix'],
         audit_field_list,
         bucket_key)
     self._summarize_no_audits(
         aggregations['matrix']['x']['buckets'],
         aggregations['matrix'],
         self._no_audits_groupings,
         aggregations['matrix'],
         bucket_key)
     self._update_aggregations(aggregations)
     self._result.update(
         search_result_actions(
             self._request,
             self._doc_types,
             es_results
         )
     )
     self._result['total'] = es_results['hits']['total']
     if self._result['total']:
         self._result['notification'] = 'Success'
     else:
         self._request.response.status_code = 404
         self._result['notification'] = 'No results found'
     return self._result
예제 #6
0
def region_search(context, request):
    """
    Search files by region.
    """
    types = request.registry[TYPES]
    result = {
        '@id': '/region-search/' + ('?' + request.query_string.split('&referrer')[0] if request.query_string else ''),
        '@type': ['region-search'],
        'title': 'Search by region',
        'facets': [],
        '@graph': [],
        'columns': OrderedDict(),
        'notification': '',
        'filters': []
    }
    principals = effective_principals(request)
    es = request.registry[ELASTIC_SEARCH]
    snp_es = request.registry['snp_search']
    region = request.params.get('region', '*')
    region_inside_peak_status = False


    # handling limit
    size = request.params.get('limit', 25)
    if size in ('all', ''):
        size = 99999
    else:
        try:
            size = int(size)
        except ValueError:
            size = 25
    if region == '':
        region = '*'

    assembly = request.params.get('genome', '*')
    result['assembly'] = _GENOME_TO_ALIAS.get(assembly,'GRCh38')
    annotation = request.params.get('annotation', '*')
    chromosome, start, end = ('', '', '')

    if annotation != '*':
        if annotation.lower().startswith('ens'):
            chromosome, start, end = get_ensemblid_coordinates(annotation, assembly)
        else:
            chromosome, start, end = get_annotation_coordinates(es, annotation, assembly)
    elif region != '*':
        region = region.lower()
        if region.startswith('rs'):
            sanitized_region = sanitize_rsid(region)
            chromosome, start, end = get_rsid_coordinates(sanitized_region, assembly)
            region_inside_peak_status = True
        elif region.startswith('ens'):
            chromosome, start, end = get_ensemblid_coordinates(region, assembly)
        elif region.startswith('chr'):
            chromosome, start, end = sanitize_coordinates(region)
    else:
        chromosome, start, end = ('', '', '')
    # Check if there are valid coordinates
    if not chromosome or not start or not end:
        result['notification'] = 'No annotations found'
        return result
    else:
        result['coordinates'] = '{chr}:{start}-{end}'.format(
            chr=chromosome, start=start, end=end
        )

    # Search for peaks for the coordinates we got
    try:
        # including inner hits is very slow
        # figure out how to distinguish browser requests from .embed method requests
        if 'peak_metadata' in request.query_string:
            peak_query = get_peak_query(start, end, with_inner_hits=True, within_peaks=region_inside_peak_status)
        else:
            peak_query = get_peak_query(start, end, within_peaks=region_inside_peak_status)
        peak_results = snp_es.search(body=peak_query,
                                     index=chromosome.lower(),
                                     doc_type=_GENOME_TO_ALIAS[assembly],
                                     size=99999)
    except Exception:
        result['notification'] = 'Error during search'
        return result
    file_uuids = []
    for hit in peak_results['hits']['hits']:
        if hit['_id'] not in file_uuids:
            file_uuids.append(hit['_id'])
    file_uuids = list(set(file_uuids))
    result['notification'] = 'No results found'


    # if more than one peak found return the experiments with those peak files
    uuid_count = len(file_uuids)
    if uuid_count > MAX_CLAUSES_FOR_ES:
        log.error("REGION_SEARCH WARNING: region with %d file_uuids is being restricted to %d" % \
                                                            (uuid_count, MAX_CLAUSES_FOR_ES))
        file_uuids = file_uuids[:MAX_CLAUSES_FOR_ES]
        uuid_count = len(file_uuids)

    if uuid_count:
        query = get_filtered_query('', [], set(), principals, ['Experiment'])
        del query['query']
        query['post_filter']['bool']['must'].append({
            'terms': {
                'embedded.files.uuid': file_uuids
            }
        })
        used_filters = set_filters(request, query, result)
        used_filters['files.uuid'] = file_uuids
        query['aggs'] = set_facets(_FACETS, used_filters, principals, ['Experiment'])
        schemas = (types[item_type].schema for item_type in ['Experiment'])
        es_results = es.search(
            body=query, index='experiment', doc_type='experiment', size=size, request_timeout=60
        )
        result['@graph'] = list(format_results(request, es_results['hits']['hits']))
        result['total'] = total = es_results['hits']['total']
        result['facets'] = BaseView._format_facets(es_results, _FACETS, used_filters, schemas, total, principals)
        result['peaks'] = list(peak_results['hits']['hits'])
        result['download_elements'] = get_peak_metadata_links(request)
        if result['total'] > 0:
            result['notification'] = 'Success'
            position_for_browser = format_position(result['coordinates'], 200)
            result.update(search_result_actions(request, ['Experiment'], es_results, position=position_for_browser))

    return result
예제 #7
0
 def preprocess_view(self):
     '''
     Main function to construct query and build view results json
     * Only publicly accessible function
     '''
     audit_route = self._request.route_path('audit', slash='/')
     self._result['@id'] = audit_route + self._search_base
     self._result['@type'] = ['AuditMatrix']
     self._result['notification'] = ''
     # TODO: Validate doc types in base class in one location
     # Now we do it here and in _validate_items
     type_info = None
     if len(self._doc_types) == 1:
         if self._doc_types[0] in self._types:
             type_info = self._types[self._doc_types[0]]
             self._schema = type_info.schema
     self._validate_items(type_info)
     self._result['title'] = self._set_result_title(type_info)
     # Because the formatting of the query edits the sub-objects of the matrix, we need to
     # deepcopy the matrix so the original type_info.factory.matrix is not modified, allowing
     # /matrix to get the correct data and to not be able to access the /audit data.
     self._result['matrix'] = copy.deepcopy(type_info.factory.matrix)
     self._matrix = self._result['matrix']
     self._matrix['x']['limit'] = self._request.params.get('x.limit', 20)
     self._matrix['y']['limit'] = self._request.params.get('y.limit', 5)
     search_route = self._request.route_path('search', slash='/')
     self._matrix['search_base'] = search_route + self._search_base
     self._matrix['clear_matrix'] = '{}?type={}'.format(
         self._request.route_path('matrix', slash='/'),
         self._doc_types[0],
     )
     self._result['views'] = [
         self._view_item.result_list, self._view_item.tabular_report
     ]
     query, audit_field_list, used_filters = self._construct_query()
     es_results = self._elastic_search.search(body=query,
                                              index=self._es_index)
     aggregations = es_results['aggregations']
     total = aggregations['matrix']['doc_count']
     self._result['matrix']['doc_count'] = total
     self._result['matrix']['max_cell_doc_count'] = 0
     self._result['facets'] = self._format_facets(es_results, self._facets,
                                                  used_filters,
                                                  (self._schema, ), total,
                                                  self._principals)
     self._summarize_buckets(aggregations['matrix']['x']['buckets'],
                             aggregations['matrix'], audit_field_list)
     self._summarize_no_audits(aggregations['matrix']['x']['buckets'],
                               aggregations['matrix'],
                               self._no_audits_groupings,
                               aggregations['matrix'])
     self._update_aggregations(aggregations)
     self._result.update(
         search_result_actions(self._request, self._doc_types, es_results))
     self._result['total'] = es_results['hits']['total']
     if self._result['total']:
         self._result['notification'] = 'Success'
     else:
         self._request.response.status_code = 404
         self._result['notification'] = 'No results found'
     return self._result