def _create_request(self,
                        catalog: CatalogName,
                        filters: FiltersJSON,
                        post_filter: bool = False,
                        source_filter: SourceFilters = None,
                        enable_aggregation: bool = True,
                        entity_type='files') -> Search:
        """
        This function will create an ElasticSearch request based on
        the filters and facet_config passed into the function
        :param filters: The 'filters' parameter.
        Assumes to be translated into es_key terms
        :param post_filter: Flag for doing either post_filter or regular
        querying (i.e. faceting or not)
        :param List source_filter: A list of "foo.bar" field paths (see
               https://www.elastic.co/guide/en/elasticsearch/reference/5.5/search-request-source-filtering.html)
        :param enable_aggregation: Flag for enabling query aggregation (and
               effectively ignoring facet configuration)
        :param entity_type: the string referring to the entity type used to get
        the ElasticSearch index to search
        :return: Returns the Search object that can be used for executing
        the request
        """
        service_config = self.service_config(catalog)
        field_mapping = service_config.translation
        facet_config = {
            key: field_mapping[key]
            for key in service_config.facets
        }
        es_search = Search(using=self.es_client,
                           index=config.es_index_name(catalog=catalog,
                                                      entity_type=entity_type,
                                                      aggregate=True))
        filters = self._translate_filters(catalog, filters, field_mapping)

        es_query = self._create_query(catalog, filters)

        if post_filter:
            es_search = es_search.post_filter(es_query)
        else:
            es_search = es_search.query(es_query)

        if source_filter:
            es_search = es_search.source(includes=source_filter)
        elif entity_type not in ("files", "bundles"):
            es_search = es_search.source(excludes="bundles")

        if enable_aggregation:
            for agg, translation in facet_config.items():
                # FIXME: Aggregation filters may be redundant when post_filter is false
                #        https://github.com/DataBiosphere/azul/issues/3435
                es_search.aggs.bucket(
                    agg,
                    self._create_aggregate(catalog, filters, facet_config,
                                           agg))

        return es_search
Example #2
0
def get_basic_search_query(
    entity,
    term,
    permission_filters_by_entity=None,
    offset=0,
    limit=100,
    fields_to_exclude=None,
):
    """
    Performs basic search for the given term in the given entity using the SEARCH_FIELDS.
    It also returns number of results in other entities.

    :param permission_filters_by_entity: List of pairs of entities and corresponding permission
                                         filters. Only entities in this list are included in the
                                         results, and those are entities are also filtered using
                                         the corresponding permission filters.
    """
    limit = _clip_limit(offset, limit)

    search_apps = tuple(get_global_search_apps_as_mapping().values())
    indices = [app.es_model.get_read_alias() for app in search_apps]
    fields = set(
        chain.from_iterable(app.es_model.SEARCH_FIELDS for app in search_apps))

    # Sort the fields so that this function is deterministic
    # and the same query is always generated with the same inputs
    fields = sorted(fields)

    query = _build_term_query(term, fields=fields)
    search = Search(index=indices).query(query)

    permission_query = _build_global_permission_query(
        permission_filters_by_entity)
    if permission_query:
        search = search.filter(permission_query)

    search = search.post_filter(
        Bool(should=Term(_document_type=entity.get_app_name()), ), ).sort(
            '_score',
            'id',
        ).source(excludes=fields_to_exclude, )

    search.aggs.bucket(
        'count_by_type',
        'terms',
        field='_document_type',
    )

    return search[offset:offset + limit]
    def _create_autocomplete_request(self,
                                     catalog: CatalogName,
                                     filters: FiltersJSON,
                                     es_client,
                                     _query,
                                     search_field,
                                     entity_type='files'):
        """
        This function will create an ElasticSearch request based on
        the filters passed to the function.

        :param catalog: The name of the catalog to create the ES request for.

        :param filters: The 'filters' parameter from '/keywords'.

        :param es_client: The ElasticSearch client object used to configure the
                          Search object

        :param _query: The query (string) to use for querying.

        :param search_field: The field to do the query on.

        :param entity_type: the string referring to the entity type used to get
                            the ElasticSearch index to search

        :return: Returns the Search object that can be used for executing the
                 request
        """
        service_config = self.service_config(catalog)
        field_mapping = service_config.autocomplete_translation[entity_type]
        es_search = Search(using=es_client,
                           index=config.es_index_name(catalog=catalog,
                                                      entity_type=entity_type,
                                                      aggregate=True))
        filters = self._translate_filters(catalog, filters, field_mapping)
        search_field = field_mapping[
            search_field] if search_field in field_mapping else search_field
        es_filter_query = self._create_query(catalog, filters)
        es_search = es_search.post_filter(es_filter_query)
        es_search = es_search.query(Q('prefix', **{str(search_field): _query}))
        return es_search
Example #4
0
    def apply_extras(self, search, options):

        search = Search().query(
            "function_score",
            query=search.query,
            score_mode="first",
            functions=[
                {
                    "filter": {
                        "term": {
                            "taxid": 9606
                        }
                    },
                    "weight": "1.55"
                },  # human
                {
                    "filter": {
                        "term": {
                            "taxid": 10090
                        }
                    },
                    "weight": "1.3"
                },  # mouse
                {
                    "filter": {
                        "term": {
                            "taxid": 10116
                        }
                    },
                    "weight": "1.1"
                },  # rat
            ])

        # Filter results according to authenticated user permissions
        if options.current_user is not None:
            search = search.filter(
                Q("match", is_public=True)
                | Q("match", author=options.current_user))
        else:
            search = search.filter(Q('match', is_public=True))

        if options.species:
            if 'all' in options.species:
                pass
            elif not all(
                    isinstance(string, str) for string in options.species):
                raise HTTPError(
                    400, reason="species must be strings or integer strings.")
            elif not all(string.isnumeric() for string in options.species):
                raise HTTPError(400,
                                reason="cannot map some species to taxids.")
            else:
                search = search.filter('terms', taxid=options.species)
            if options.aggs and options.species_facet_filter:
                search = search.post_filter('terms',
                                            taxid=options.species_facet_filter)

        if options.source:
            if 'all' in options.source:
                pass
            elif not all(isinstance(src, str) for src in options.source):
                raise HTTPError(400, reason="source must be strings.")
            else:
                search = search.filter('terms', source=options.source)

            if options.aggs and options.source_facet_filter:
                search = search.post_filter('terms',
                                            source=options.source_facet_filter)

        return super().apply_extras(search, options)