Ejemplo n.º 1
0
    def _set_query_aggs(self, query):
        '''Helper Method for constructing query'''
        query_filters = query['post_filter'].pop('bool')
        filter_collector = {'post_filter': {'bool': query_filters}}
        used_filters = set_filters(self._request,
                                   filter_collector,
                                   self._result,
                                   filter_exclusion=self._filter_exclusion)
        filters = filter_collector['post_filter']['bool']['must']
        negative_filters = filter_collector['post_filter']['bool']['must_not']
        self._facets = [(field, facet)
                        for field, facet in self._schema['facets'].items()
                        if (field in self._matrix['x']['facets']
                            or field in self._matrix['y']['facets'])]

        # Display all audits if logged in, or all but INTERNAL_ACTION if logged out
        for audit_facet in self._audit_facets:
            if (self._search_audit and 'group.submitter' in self._principals
                    or 'INTERNAL_ACTION' not in audit_facet[0]):
                self._facets.append(audit_facet)

        query['aggs'] = set_facets(self._facets, used_filters,
                                   self._principals, self._doc_types)
        self._construct_xygroupings(query, filters, negative_filters)
        return used_filters
Ejemplo n.º 2
0
 def _set_query_aggs(self, query):
     '''Helper Method for constructing query'''
     query_filters = query['post_filter'].pop('bool')
     filter_collector = {'post_filter': {'bool': query_filters}}
     used_filters = set_filters(
         self._request,
         filter_collector,
         self._result,
     )
     filters = filter_collector['post_filter']['bool']['must']
     self._facets = [
         (field, facet)
         for field, facet in self._schema['facets'].items()
         if (
             field in self._summary['x']['facets'] or
             field in self._summary['y']['facets']
         )
     ]
     query['aggs'] = set_facets(
         self._facets,
         used_filters,
         self._principals,
         self._doc_types
     )
     self._construct_xygroupings(query, filters)
     return used_filters
Ejemplo n.º 3
0
def test_set_filters_searchTerm():
    request = FakeRequest((('searchTerm', 'value1'), ))
    query = {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [],
                'must_not': []
            }
        }
    }
    result = {'filters': []}
    used_filters = set_filters(request, query, result)

    assert used_filters == {}
    assert query == {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [],
                'must_not': []
            }
        }
    }
    assert result == {
        'filters': [{
            'field': 'searchTerm',
            'term': 'value1',
            'remove': '/search/?'
        }]
    }
Ejemplo n.º 4
0
def test_set_filters_reserved_params(param):
    from snovault.helpers.helper import set_filters

    request = FakeRequest((
        (param, 'foo'),
    ))
    query = {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [],
                'must_not': []
            }
        }
    }
    result = {'filters': []}
    used_filters = set_filters(request, query, result)

    assert used_filters == {}
    assert query == {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [],
                'must_not': []
            }
        }
    }
    assert result == {
        'filters': [],
    }
Ejemplo n.º 5
0
def test_set_filters_reserved_params(param):
    request = FakeRequest((
        (param, 'foo'),
    ))
    query = {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [],
                'must_not': []
            }
        }
    }
    result = {'filters': []}
    used_filters = set_filters(request, query, result)

    assert used_filters == {}
    assert query == {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [],
                'must_not': []
            }
        }
    }
    assert result == {
        'filters': [],
    }
Ejemplo n.º 6
0
def test_set_filters_exists_missing():
    request = FakeRequest((
        ('field1', '*'),
        ('field2!', '*'),
    ))
    query = {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [],
                'must_not': []
            }
        }
    }
    result = {'filters': []}
    used_filters = set_filters(request, query, result)

    assert used_filters == {
        'field1': ['*'],
        'field2!': ['*'],
    }

    assert query == {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [
                    {
                        'exists': {
                            'field': 'embedded.field1'
                        }
                    },
                ],
                'must_not': [{
                    'exists': {
                        'field': 'embedded.field2'
                    }
                }]
            }
        }
    }

    assert result == {
        'filters': [{
            'field': 'field1',
            'term': '*',
            'remove': '/search/?field2%21=%2A',
        }, {
            'field': 'field2!',
            'term': '*',
            'remove': '/search/?field1=%2A',
        }],
    }
Ejemplo n.º 7
0
def test_set_filters_multivalued():
    from snovault.helpers.helper import set_filters

    request = FakeRequest((
        ('field1', 'value1'),
        ('field1', 'value2'),
    ))
    query = {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [],
                'must_not': []
            }
        }
    }
    result = {'filters': []}
    used_filters = set_filters(request, query, result)

    assert used_filters == {'field1': ['value1', 'value2']}
    assert query == {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [
                    {
                        'terms': {
                            'embedded.field1': ['value1', 'value2']
                        }
                    }
                ],
                'must_not': []
            }
        }
    }
    assert result == {
        'filters': [
            {
                'field': 'field1',
                'term': 'value1',
                'remove': '/search/?field1=value2'
            },
            {
                'field': 'field1',
                'term': 'value2',
                'remove': '/search/?field1=value1'
            }
        ]
    }
Ejemplo n.º 8
0
def test_set_filters_multivalued():
    from snovault.helpers.helper import set_filters

    request = FakeRequest((
        ('field1', 'value1'),
        ('field1', 'value2'),
    ))
    query = {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [],
                'must_not': []
            }
        }
    }
    result = {'filters': []}
    used_filters = set_filters(request, query, result)

    assert used_filters == {'field1': ['value1', 'value2']}
    assert query == {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [
                    {
                        'terms': {
                            'embedded.field1': ['value1', 'value2']
                        }
                    }
                ],
                'must_not': []
            }
        }
    }
    assert result == {
        'filters': [
            {
                'field': 'field1',
                'term': 'value1',
                'remove': '/search/?field1=value2'
            },
            {
                'field': 'field1',
                'term': 'value2',
                'remove': '/search/?field1=value1'
            }
        ]
    }
Ejemplo n.º 9
0
 def _set_query_aggs(self, query):
     '''Helper method for preprocessing view'''
     # Setting filters.
     # Rather than setting them at the top level of the query
     # we collect them for use in aggregations later.
     query_filters = query['post_filter'].pop('bool')
     filter_collector = {'post_filter': {'bool': query_filters}}
     used_filters = set_filters(
         self._request,
         filter_collector,
         self._result,
     )
     filters = filter_collector['post_filter']['bool']['must']
     negative_filters = filter_collector['post_filter']['bool']['must_not']
     self._facets = [
         (field, facet)
         for field, facet in self._schema['facets'].items()
         if (
             field in self._matrix['x']['facets'] or
             field in self._matrix['y']['facets']
         )
     ]
     for audit_facet in self._audit_facets:
         if (self._search_audit and 'group.submitter' in self._principals or
                 'INTERNAL_ACTION' not in audit_facet[0]):
             self._facets.append(audit_facet)
     audit_field_list_copy = []
     audit_field_list = []
     for item in self._facets:
         if item[0].rfind('audit.') > -1:
             audit_field_list.append(item)
     audit_field_list_copy = audit_field_list.copy()
     for item in audit_field_list_copy:
         temp = item[0]
         audit_field_list[audit_field_list.index(item)] = temp
     query['aggs'] = set_facets(
         self._facets,
         used_filters,
         self._principals,
         self._doc_types,
     )
     self._construct_xygroupings(
         query,
         filters,
         negative_filters,
         audit_field_list
     )
     return audit_field_list, used_filters
Ejemplo n.º 10
0
def test_set_filters_audit():
    from snovault.helpers.helper import set_filters

    request = FakeRequest((
        ('audit.foo', 'value1'),
    ))
    query = {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [],
                'must_not': []
            }
        }
    }
    result = {'filters': []}
    used_filters = set_filters(request, query, result)

    assert used_filters == {'audit.foo': ['value1']}
    assert query == {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [
                    {
                        'terms': {
                            'audit.foo': ['value1']
                        },
                    },
                ],
                'must_not': []
            }
        }
    }
    assert result == {
        'filters': [
            {
                'field': 'audit.foo',
                'term': 'value1',
                'remove': '/search/?'
            },
        ],
    }
Ejemplo n.º 11
0
def test_set_filters_audit():
    from snovault.helpers.helper import set_filters

    request = FakeRequest((
        ('audit.foo', 'value1'),
    ))
    query = {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [],
                'must_not': []
            }
        }
    }
    result = {'filters': []}
    used_filters = set_filters(request, query, result)

    assert used_filters == {'audit.foo': ['value1']}
    assert query == {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [
                    {
                        'terms': {
                            'audit.foo': ['value1']
                        },
                    },
                ],
                'must_not': []
            }
        }
    }
    assert result == {
        'filters': [
            {
                'field': 'audit.foo',
                'term': 'value1',
                'remove': '/search/?'
            },
        ],
    }
Ejemplo n.º 12
0
def test_set_filters():

    request = FakeRequest((
        ('field1', 'value1'),
    ))
    query = {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [],
                'must_not': []
            }
        }
    }
    result = {'filters': []}
    used_filters = set_filters(request, query, result)

    assert used_filters == {'field1': ['value1']}
    assert query == {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [
                    {
                        'terms': {
                            'embedded.field1': ['value1'],
                        },
                    },
                ],
                'must_not': []
            }
        }
    }
    assert result == {
        'filters': [
            {
                'field': 'field1',
                'term': 'value1',
                'remove': '/search/?'
            }
        ]
    }
Ejemplo n.º 13
0
def test_set_filters_negated():
    request = FakeRequest((
        ('field1!', 'value1'),
    ))
    query = {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [],
                'must_not': []
            }
        }
    }
    result = {'filters': []}
    used_filters = set_filters(request, query, result)

    assert used_filters == {'field1!': ['value1']}
    assert query == {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must_not': [
                    {
                        'terms': {
                            'embedded.field1': ['value1']
                        }
                    }
                ],
                'must': []
            }
        }
    }
    assert result == {
        'filters': [
            {
                'field': 'field1!',
                'term': 'value1',
                'remove': '/search/?'
            },
        ],
    }
Ejemplo n.º 14
0
 def _set_query_aggs(self, query):
     '''Helper Method for constructing query'''
     query_filters = query['post_filter'].pop('bool')
     filter_collector = {'post_filter': {'bool': query_filters}}
     used_filters = set_filters(
         self._request,
         filter_collector,
         self._result,
     )
     filters = filter_collector['post_filter']['bool']['must']
     self._facets = [(field, facet)
                     for field, facet in self._schema['facets'].items()
                     if (field in self._summary['x']['facets']
                         or field in self._summary['y']['facets'])]
     query['aggs'] = set_facets(self._facets, used_filters,
                                self._principals, self._doc_types)
     self._construct_xygroupings(query, filters)
     return used_filters
Ejemplo n.º 15
0
 def _set_query_aggs(self, query):
     '''Helper method for preprocessing view'''
     # Setting filters.
     # Rather than setting them at the top level of the query
     # we collect them for use in aggregations later.
     query_filters = query['post_filter'].pop('bool')
     filter_collector = {'post_filter': {'bool': query_filters}}
     used_filters = set_filters(
         self._request,
         filter_collector,
         self._result,
     )
     filters = filter_collector['post_filter']['bool']['must']
     negative_filters = filter_collector['post_filter']['bool']['must_not']
     self._facets = [(field, facet)
                     for field, facet in self._schema['facets'].items()
                     if (field in self._matrix['x']['facets']
                         or field in self._matrix['y']['facets'])]
     for audit_facet in self._audit_facets:
         if (self._search_audit and 'group.submitter' in self._principals
                 or 'INTERNAL_ACTION' not in audit_facet[0]):
             self._facets.append(audit_facet)
     audit_field_list_copy = []
     audit_field_list = []
     for item in self._facets:
         if item[0].rfind('audit.') > -1:
             audit_field_list.append(item)
     audit_field_list_copy = audit_field_list.copy()
     for item in audit_field_list_copy:
         temp = item[0]
         audit_field_list[audit_field_list.index(item)] = temp
     query['aggs'] = set_facets(
         self._facets,
         used_filters,
         self._principals,
         self._doc_types,
     )
     self._construct_xygroupings(query, filters, negative_filters,
                                 audit_field_list)
     return audit_field_list, used_filters
Ejemplo n.º 16
0
    def _set_query_aggs(self, query, result_filters, matrix_x_y):
        '''Helper Method for constructing query'''
        query_filters = query['post_filter'].pop('bool')
        filter_collector = {'post_filter': {'bool': query_filters}}
        used_filters = set_filters(
            self._request,
            filter_collector,
            result_filters,
            filter_exclusion=self._filter_exclusion
        )
        filters = filter_collector['post_filter']['bool']['must']
        negative_filters = filter_collector['post_filter']['bool']['must_not']
        self._facets = [
            (field, facet)
            for field, facet in self._schema['facets'].items()
            if (
                field in matrix_x_y['x']['facets'] or
                field in matrix_x_y['y']['facets']
                )
        ]

        # Display all audits if logged in, or all but INTERNAL_ACTION if logged out
        for audit_facet in self._audit_facets:
            if (
                    self._search_audit and
                    'group.submitter' in self._principals or
                    'INTERNAL_ACTION' not in audit_facet[0]
                ):
                self._facets.append(audit_facet)

        query['aggs'] = set_facets(
            self._facets,
            used_filters,
            self._principals,
            self._doc_types
        )
        self._construct_xygroupings(query, filters, negative_filters, matrix_x_y)
        return used_filters
Ejemplo n.º 17
0
def region_search(context, request):
    """
    Search files by region.
    """
    types = request.registry[TYPES]
    result = {
        '@id':
        '/region-search/' + ('?' + request.query_string.split('&referrer')[0]
                             if request.query_string else ''),
        '@type': ['region-search'],
        'title':
        'Search by region',
        'facets': [],
        '@graph': [],
        'columns':
        OrderedDict(),
        'notification':
        '',
        'filters': []
    }
    principals = effective_principals(request)
    es = request.registry[ELASTIC_SEARCH]
    snp_es = request.registry['snp_search']
    region = request.params.get('region', '*')
    region_inside_peak_status = False

    # handling limit
    size = request.params.get('limit', 25)
    if size in ('all', ''):
        size = 99999
    else:
        try:
            size = int(size)
        except ValueError:
            size = 25
    if region == '':
        region = '*'

    assembly = request.params.get('genome', '*')
    result['assembly'] = _GENOME_TO_ALIAS.get(assembly, 'GRCh38')
    annotation = request.params.get('annotation', '*')
    chromosome, start, end = ('', '', '')

    if annotation != '*':
        if annotation.lower().startswith('ens'):
            chromosome, start, end = get_ensemblid_coordinates(
                annotation, assembly)
        else:
            chromosome, start, end = get_annotation_coordinates(
                es, annotation, assembly)
    elif region != '*':
        region = region.lower()
        if region.startswith('rs'):
            sanitized_region = sanitize_rsid(region)
            chromosome, start, end = get_rsid_coordinates(
                sanitized_region, assembly)
            region_inside_peak_status = True
        elif region.startswith('ens'):
            chromosome, start, end = get_ensemblid_coordinates(
                region, assembly)
        elif region.startswith('chr'):
            chromosome, start, end = sanitize_coordinates(region)
    else:
        chromosome, start, end = ('', '', '')
    # Check if there are valid coordinates
    if not chromosome or not start or not end:
        result['notification'] = 'No annotations found'
        return result
    else:
        result['coordinates'] = '{chr}:{start}-{end}'.format(chr=chromosome,
                                                             start=start,
                                                             end=end)

    # Search for peaks for the coordinates we got
    try:
        # including inner hits is very slow
        # figure out how to distinguish browser requests from .embed method requests
        if 'peak_metadata' in request.query_string:
            peak_query = get_peak_query(start,
                                        end,
                                        with_inner_hits=True,
                                        within_peaks=region_inside_peak_status)
        else:
            peak_query = get_peak_query(start,
                                        end,
                                        within_peaks=region_inside_peak_status)
        peak_results = snp_es.search(body=peak_query,
                                     index=chromosome.lower(),
                                     doc_type=_GENOME_TO_ALIAS[assembly],
                                     size=99999)
    except Exception:
        result['notification'] = 'Error during search'
        return result
    file_uuids = []
    for hit in peak_results['hits']['hits']:
        if hit['_id'] not in file_uuids:
            file_uuids.append(hit['_id'])
    file_uuids = list(set(file_uuids))
    result['notification'] = 'No results found'

    # if more than one peak found return the experiments with those peak files
    uuid_count = len(file_uuids)
    if uuid_count > MAX_CLAUSES_FOR_ES:
        log.error("REGION_SEARCH WARNING: region with %d file_uuids is being restricted to %d" % \
                                                            (uuid_count, MAX_CLAUSES_FOR_ES))
        file_uuids = file_uuids[:MAX_CLAUSES_FOR_ES]
        uuid_count = len(file_uuids)

    if uuid_count:
        query = get_filtered_query('', [], set(), principals, ['Experiment'])
        del query['query']
        query['post_filter']['bool']['must'].append(
            {'terms': {
                'embedded.files.uuid': file_uuids
            }})
        used_filters = set_filters(request, query, result)
        used_filters['files.uuid'] = file_uuids
        query['aggs'] = set_facets(_FACETS, used_filters, principals,
                                   ['Experiment'])
        schemas = (types[item_type].schema for item_type in ['Experiment'])
        es_results = es.search(body=query,
                               index='experiment',
                               doc_type='experiment',
                               size=size,
                               request_timeout=60)
        result['@graph'] = list(
            format_results(request, es_results['hits']['hits']))
        result['total'] = total = es_results['hits']['total']
        result['facets'] = BaseView._format_facets(es_results, _FACETS,
                                                   used_filters, schemas,
                                                   total, principals)
        result['peaks'] = list(peak_results['hits']['hits'])
        result['download_elements'] = get_peak_metadata_links(request)
        if result['total'] > 0:
            result['notification'] = 'Success'
            position_for_browser = format_position(result['coordinates'], 200)
            result.update(
                search_result_actions(request, ['Experiment'],
                                      es_results,
                                      position=position_for_browser))

    return result
Ejemplo n.º 18
0
 def preprocess_view(self):
     '''
     Main function to construct query and build view results json
     * Only publicly accessible function
     '''
     self._result['@id'] = '/news/' + self._search_base
     self._result['@type'] = ['News']
     self._result['notification'] = ''
     doc_types = ['Page']
     search_fields, _ = get_search_fields(self._request, doc_types)
     query = get_filtered_query(
         '*',
         search_fields,
         sorted(list_result_fields(self._request, doc_types)),
         self._principals,
         doc_types
     )
     del query['query']['query_string']
     sort = OrderedDict()
     result_sort = OrderedDict()
     sort['embedded.date_created'] = result_sort['date_created'] = {
         'order': 'desc',
         'unmapped_type': 'keyword',
     }
     query['sort'] = result_sort
     self._result['sort'] = result_sort
     used_filters = set_filters(
         self._request,
         query,
         self._result,
         [('type', 'Page'), ('news', 'true'), ('status', 'released')]
     )
     facets = []
     if len(doc_types) == 1 and 'facets' in self._types[doc_types[0]].schema:
         facets.extend(self._types[doc_types[0]].schema['facets'].items())
     query['aggs'] = set_facets(facets, used_filters, self._principals, doc_types)
     es_results = self._elastic_search.search(
         body=query,
         index=self._es_index,
         doc_type=self._es_index,
         from_=0,
         size=25)
     total = es_results['hits']['total']
     if not total:
         self._request.response.status_code = 404
         self._result['notification'] = 'No results found'
         self._result['@graph'] = []
         return self._result
     self._result['notification'] = 'Success'
     self._result['total'] = total
     graph = format_results(
         self._request,
         es_results['hits']['hits'],
         self._result
     )
     self._result['@graph'] = list(graph)
     schemas = [
         self._types[doc_type].schema
         for doc_type in doc_types
     ]
     self._result['facets'] = self._format_facets(
         es_results,
         facets,
         used_filters,
         schemas,
         total,
         self._principals
     )
     return self._result
Ejemplo n.º 19
0
def region_search(context, request):
    """
    Search files by region.
    """
    types = request.registry[TYPES]
    result = {
        '@id': '/region-search/' + ('?' + request.query_string.split('&referrer')[0] if request.query_string else ''),
        '@type': ['region-search'],
        'title': 'Search by region',
        'facets': [],
        '@graph': [],
        'columns': OrderedDict(),
        'notification': '',
        'filters': []
    }
    principals = effective_principals(request)
    es = request.registry[ELASTIC_SEARCH]
    snp_es = request.registry['snp_search']
    region = request.params.get('region', '*')
    region_inside_peak_status = False


    # handling limit
    size = request.params.get('limit', 25)
    if size in ('all', ''):
        size = 99999
    else:
        try:
            size = int(size)
        except ValueError:
            size = 25
    if region == '':
        region = '*'

    assembly = request.params.get('genome', '*')
    result['assembly'] = _GENOME_TO_ALIAS.get(assembly,'GRCh38')
    annotation = request.params.get('annotation', '*')
    chromosome, start, end = ('', '', '')

    if annotation != '*':
        if annotation.lower().startswith('ens'):
            chromosome, start, end = get_ensemblid_coordinates(annotation, assembly)
        else:
            chromosome, start, end = get_annotation_coordinates(es, annotation, assembly)
    elif region != '*':
        region = region.lower()
        if region.startswith('rs'):
            sanitized_region = sanitize_rsid(region)
            chromosome, start, end = get_rsid_coordinates(sanitized_region, assembly)
            region_inside_peak_status = True
        elif region.startswith('ens'):
            chromosome, start, end = get_ensemblid_coordinates(region, assembly)
        elif region.startswith('chr'):
            chromosome, start, end = sanitize_coordinates(region)
    else:
        chromosome, start, end = ('', '', '')
    # Check if there are valid coordinates
    if not chromosome or not start or not end:
        result['notification'] = 'No annotations found'
        return result
    else:
        result['coordinates'] = '{chr}:{start}-{end}'.format(
            chr=chromosome, start=start, end=end
        )

    # Search for peaks for the coordinates we got
    try:
        # including inner hits is very slow
        # figure out how to distinguish browser requests from .embed method requests
        if 'peak_metadata' in request.query_string:
            peak_query = get_peak_query(start, end, with_inner_hits=True, within_peaks=region_inside_peak_status)
        else:
            peak_query = get_peak_query(start, end, within_peaks=region_inside_peak_status)
        peak_results = snp_es.search(body=peak_query,
                                     index=chromosome.lower(),
                                     doc_type=_GENOME_TO_ALIAS[assembly],
                                     size=99999)
    except Exception:
        result['notification'] = 'Error during search'
        return result
    file_uuids = []
    for hit in peak_results['hits']['hits']:
        if hit['_id'] not in file_uuids:
            file_uuids.append(hit['_id'])
    file_uuids = list(set(file_uuids))
    result['notification'] = 'No results found'


    # if more than one peak found return the experiments with those peak files
    uuid_count = len(file_uuids)
    if uuid_count > MAX_CLAUSES_FOR_ES:
        log.error("REGION_SEARCH WARNING: region with %d file_uuids is being restricted to %d" % \
                                                            (uuid_count, MAX_CLAUSES_FOR_ES))
        file_uuids = file_uuids[:MAX_CLAUSES_FOR_ES]
        uuid_count = len(file_uuids)

    if uuid_count:
        query = get_filtered_query('', [], set(), principals, ['Experiment'])
        del query['query']
        query['post_filter']['bool']['must'].append({
            'terms': {
                'embedded.files.uuid': file_uuids
            }
        })
        used_filters = set_filters(request, query, result)
        used_filters['files.uuid'] = file_uuids
        query['aggs'] = set_facets(_FACETS, used_filters, principals, ['Experiment'])
        schemas = (types[item_type].schema for item_type in ['Experiment'])
        es_results = es.search(
            body=query, index='experiment', doc_type='experiment', size=size, request_timeout=60
        )
        result['@graph'] = list(format_results(request, es_results['hits']['hits']))
        result['total'] = total = es_results['hits']['total']
        result['facets'] = BaseView._format_facets(es_results, _FACETS, used_filters, schemas, total, principals)
        result['peaks'] = list(peak_results['hits']['hits'])
        result['download_elements'] = get_peak_metadata_links(request)
        if result['total'] > 0:
            result['notification'] = 'Success'
            position_for_browser = format_position(result['coordinates'], 200)
            result.update(search_result_actions(request, ['Experiment'], es_results, position=position_for_browser))

    return result
Ejemplo n.º 20
0
    def preprocess_view(self, views=None, search_result_actions=None):  # pylint: disable=too-many-statements, too-many-branches, too-many-locals
        '''
        Main function to construct query and build view results json
        * Only publicly accessible function
        '''
        types = self._types
        search_base = normalize_query(self._request)
        result = {
            '@context': self._request.route_path('jsonld_context'),
            '@id': '/search/' + search_base,
            '@type': ['Search'],
            'title': 'Search',
            'filters': [],
        }
        es_index = RESOURCES_INDEX
        search_audit = self._request.has_permission('search_audit')
        from_, size = get_pagination(self._request)
        search_term = prepare_search_term(self._request)
        if (
                hasattr(self._context, 'type_info') and
                hasattr(self._context.type_info, 'name') and
                self._context.type_info.name
            ):
            doc_types = [self._context.type_info.name]
        else:
            doc_types = self._request.params.getall('type')
            if '*' in doc_types:
                doc_types = ['Item']

        # Normalize to item_type
        try:
            doc_types = sorted({types[name].name for name in doc_types})
        except KeyError:
            # Check for invalid types
            bad_types = [t for t in doc_types if t not in types]
            msg = "Invalid type: {}".format(', '.join(bad_types))
            raise HTTPBadRequest(explanation=msg)
        searchterm_specs = self._request.params.getall('searchTerm')
        searchterm_only = urlencode(
            [
                ("searchTerm", searchterm)
                for searchterm in searchterm_specs
            ]
        )
        if searchterm_only:
            clear_qs = searchterm_only
        else:
            clear_qs = urlencode([("type", typ) for typ in doc_types])
        search_route = self._request.route_path('search', slash='/')
        clear_route = '?' + clear_qs if clear_qs else ''
        result['clear_filters'] = search_route + clear_route
        if not doc_types:
            if self._request.params.get('mode') == 'picker':
                doc_types = ['Item']
            else:
                doc_types = self._default_doc_types
        else:
            for item_type in doc_types:
                t_thing = types[item_type]
                q_thing = urlencode(
                    [
                        (k.encode('utf-8'), v.encode('utf-8'))
                        for k, v in self._request.params.items()
                        if not (k == 'type' and types['Item' if v == '*' else v] is t_thing)
                    ]
                )
                result['filters'].append({
                    'field': 'type',
                    'term': t_thing.name,
                    'remove': '{}?{}'.format(self._request.path, q_thing)
                })
            if views:
                result['views'] = views
        search_fields, _ = get_search_fields(self._request, doc_types)
        query = get_filtered_query(
            search_term,
            search_fields,
            sorted(list_result_fields(self._request, doc_types)),
            self._principals,
            doc_types,
        )
        schemas = [types[doc_type].schema for doc_type in doc_types]
        columns = list_visible_columns_for_schemas(self._request, schemas)
        if columns:
            result['columns'] = columns
        if search_term == '*':
            del query['query']['query_string']
        else:
            query['query']['query_string']['fields'].extend(
                ['_all', '*.uuid', '*.md5sum', '*.submitted_file_name']
            )
        set_sort_order(self._request, search_term, types, doc_types, query, result)
        used_filters = set_filters(self._request, query, result)
        facets = [
            ('type', {'title': 'Data Type'}),
        ]
        if len(doc_types) == 1 and 'facets' in types[doc_types[0]].schema:
            facets.extend(types[doc_types[0]].schema['facets'].items())
        for audit_facet in self._audit_facets:
            if (
                    search_audit and
                    'group.submitter' in self._principals or
                    'INTERNAL_ACTION' not in audit_facet[0]
                ):
                facets.append(audit_facet)
        query['aggs'] = set_facets(facets, used_filters, self._principals, doc_types)
        query = sort_query(query)
        do_scan = size is None or size > 1000
        if not self._request.params.get('type') or 'Item' in doc_types:
            es_index = RESOURCES_INDEX
        else:
            es_index = [
                types[type_name].item_type
                for type_name in doc_types
                if hasattr(types[type_name], 'item_type')
            ]
        if do_scan:
            es_results = self._elastic_search.search(
                body=query,
                index=es_index,
                search_type='query_then_fetch'
            )
        else:
            es_results = self._elastic_search.search(
                body=query,
                index=es_index,
                from_=from_, size=size,
                request_cache=True
            )
        total = es_results['hits']['total']
        result['total'] = total
        schemas = (types[item_type].schema for item_type in doc_types)
        result['facets'] = self._format_facets(
            es_results,
            facets,
            used_filters,
            schemas,
            total,
            self._principals
        )
        if search_result_actions:
            result.update(
                search_result_actions(
                    self._request, doc_types, es_results
                )
            )
        if size is not None and size < result['total']:
            params = [(k, v) for k, v in self._request.params.items() if k != 'limit']
            params.append(('limit', 'all'))
            result['all'] = '%s?%s' % (
                self._request.resource_path(self._context),
                urlencode(params)
            )
        if not result['total']:
            self._request.response.status_code = 404
            result['notification'] = 'No results found'
            result['@graph'] = []
            return result if not self._return_generator else []
        result['notification'] = 'Success'
        if not do_scan:
            graph = format_results(
                self._request,
                es_results['hits']['hits'],
                result
            )
            if self._return_generator:
                return graph
            result['@graph'] = list(graph)
            return result
        del query['aggs']
        if size is None:
            hits = scan(
                self._elastic_search,
                query=query,
                index=es_index,
                preserve_order=False
            )
        else:
            hits = scan(
                self._elastic_search,
                query=query,
                index=es_index,
                from_=from_,
                size=size,
                preserve_order=False
            )
        graph = format_results(self._request, hits, result)
        if self._request.__parent__ is not None or self._return_generator:
            if self._return_generator:
                return graph
            result['@graph'] = list(graph)
            return result
        app_iter = iter_long_json('@graph', graph, result)
        self._request.response.content_type = 'application/json'
        if str is bytes:  # Python 2 vs 3 wsgi differences
            self._request.response.app_iter = app_iter  # Python 2
        else:
            self._request.response.app_iter = (
                item.encode('utf-8') for item in app_iter
            )
        return self._request.response
Ejemplo n.º 21
0
def test_set_filters_exists_missing():
    request = FakeRequest((
        ('field1', '*'),
        ('field2!', '*'),
    ))
    query = {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [],
                'must_not': []
            }
        }
    }
    result = {'filters': []}
    used_filters = set_filters(request, query, result)

    assert used_filters == {
        'field1': ['*'],
        'field2!': ['*'],
    }

    assert query == {
        'query': {
            'query_string': {}
        },
        'post_filter': {
            'bool': {
                'must': [
                    {
                        'exists': {
                            'field': 'embedded.field1'
                        }
                    },
                ],
                'must_not': [
                    {
                        'exists': {
                            'field': 'embedded.field2'
                        }
                    }

                ]
            }
        }
    }

    assert result == {
        'filters': [
            {
                'field': 'field1',
                'term': '*',
                'remove': '/search/?field2%21=%2A',
            },
            {
                'field': 'field2!',
                'term': '*',
                'remove': '/search/?field1=%2A',
            }
        ],
    }
Ejemplo n.º 22
0
    def preprocess_view(self, views=None, search_result_actions=None):  # pylint: disable=too-many-statements, too-many-branches, too-many-locals
        '''
        Main function to construct query and build view results json
        * Only publicly accessible function
        '''
        types = self._types
        search_base = normalize_query(self._request)
        result = {
            '@context': self._request.route_path('jsonld_context'),
            '@id': '/search/' + search_base,
            '@type': ['Search'],
            'title': 'Search',
            'filters': [],
        }
        es_index = RESOURCES_INDEX
        search_audit = self._request.has_permission('search_audit')
        from_, size = get_pagination(self._request)
        search_term = prepare_search_term(self._request)
        if (hasattr(self._context, 'type_info')
                and hasattr(self._context.type_info, 'name')
                and self._context.type_info.name):
            doc_types = [self._context.type_info.name]
        else:
            doc_types = self._request.params.getall('type')
            if '*' in doc_types:
                doc_types = ['Item']

        # Normalize to item_type
        try:
            doc_types = sorted({types[name].name for name in doc_types})
        except KeyError:
            # Check for invalid types
            bad_types = [t for t in doc_types if t not in types]
            msg = "Invalid type: {}".format(', '.join(bad_types))
            raise HTTPBadRequest(explanation=msg)
        searchterm_specs = self._request.params.getall('searchTerm')
        searchterm_only = urlencode([("searchTerm", searchterm)
                                     for searchterm in searchterm_specs])
        if searchterm_only:
            clear_qs = searchterm_only
        else:
            clear_qs = urlencode([("type", typ) for typ in doc_types])
        search_route = self._request.route_path('search', slash='/')
        clear_route = '?' + clear_qs if clear_qs else ''
        result['clear_filters'] = search_route + clear_route
        if not doc_types:
            if self._request.params.get('mode') == 'picker':
                doc_types = ['Item']
            else:
                doc_types = self._default_doc_types
        else:
            for item_type in doc_types:
                t_thing = types[item_type]
                q_thing = urlencode([
                    (k.encode('utf-8'), v.encode('utf-8'))
                    for k, v in self._request.params.items()
                    if not (k == 'type'
                            and types['Item' if v == '*' else v] is t_thing)
                ])
                result['filters'].append({
                    'field':
                    'type',
                    'term':
                    t_thing.name,
                    'remove':
                    '{}?{}'.format(self._request.path, q_thing)
                })
            if views:
                result['views'] = views
        search_fields, _ = get_search_fields(self._request, doc_types)
        query = get_filtered_query(
            search_term,
            search_fields,
            sorted(list_result_fields(self._request, doc_types)),
            self._principals,
            doc_types,
        )
        schemas = [types[doc_type].schema for doc_type in doc_types]
        columns = list_visible_columns_for_schemas(self._request, schemas)
        if columns:
            result['columns'] = columns
        if search_term == '*':
            del query['query']['query_string']
        else:
            query['query']['query_string']['fields'].extend(
                ['_all', '*.uuid', '*.md5sum', '*.submitted_file_name'])
        set_sort_order(self._request, search_term, types, doc_types, query,
                       result)
        used_filters = set_filters(self._request, query, result)
        facets = [
            ('type', {
                'title': 'Data Type'
            }),
        ]
        if len(doc_types) == 1 and 'facets' in types[doc_types[0]].schema:
            facets.extend(types[doc_types[0]].schema['facets'].items())
        for audit_facet in self._audit_facets:
            if (search_audit and 'group.submitter' in self._principals
                    or 'INTERNAL_ACTION' not in audit_facet[0]):
                facets.append(audit_facet)
        query['aggs'] = set_facets(facets, used_filters, self._principals,
                                   doc_types)
        query = sort_query(query)
        do_scan = size is None or size > 1000
        if not self._request.params.get('type') or 'Item' in doc_types:
            es_index = RESOURCES_INDEX
        else:
            es_index = [
                types[type_name].item_type for type_name in doc_types
                if hasattr(types[type_name], 'item_type')
            ]
        if do_scan:
            es_results = self._elastic_search.search(
                body=query, index=es_index, search_type='query_then_fetch')
        else:
            es_results = self._elastic_search.search(body=query,
                                                     index=es_index,
                                                     from_=from_,
                                                     size=size,
                                                     request_cache=True)
        total = es_results['hits']['total']
        result['total'] = total
        schemas = (types[item_type].schema for item_type in doc_types)
        result['facets'] = self._format_facets(es_results, facets,
                                               used_filters, schemas, total,
                                               self._principals)
        if search_result_actions:
            result.update(
                search_result_actions(self._request, doc_types, es_results))
        if size is not None and size < result['total']:
            params = [(k, v) for k, v in self._request.params.items()
                      if k != 'limit']
            params.append(('limit', 'all'))
            result['all'] = '%s?%s' % (self._request.resource_path(
                self._context), urlencode(params))
        if not result['total']:
            self._request.response.status_code = 404
            result['notification'] = 'No results found'
            result['@graph'] = []
            return result if not self._return_generator else []
        result['notification'] = 'Success'
        if not do_scan:
            graph = format_results(self._request, es_results['hits']['hits'],
                                   result)
            if self._return_generator:
                return graph
            result['@graph'] = list(graph)
            return result
        del query['aggs']
        if size is None:
            hits = scan(self._elastic_search,
                        query=query,
                        index=es_index,
                        preserve_order=False)
        else:
            hits = scan(self._elastic_search,
                        query=query,
                        index=es_index,
                        from_=from_,
                        size=size,
                        preserve_order=False)
        graph = format_results(self._request, hits, result)
        if self._request.__parent__ is not None or self._return_generator:
            if self._return_generator:
                return graph
            result['@graph'] = list(graph)
            return result
        app_iter = iter_long_json('@graph', graph, result)
        self._request.response.content_type = 'application/json'
        if str is bytes:  # Python 2 vs 3 wsgi differences
            self._request.response.app_iter = app_iter  # Python 2
        else:
            self._request.response.app_iter = (item.encode('utf-8')
                                               for item in app_iter)
        return self._request.response