Ejemplo n.º 1
0
def create_enum_range_min_max_filter(field, query_term):
    """Creates an ElasticSearch combined enum range filter.

    For example the fields `climbing_rating_min` and `climbing_rating_max` are
    combined into a single search field. Searching for `crat=4c,6b` returns
    the waypoints where the min/max climbing ratings match the given range.
    """
    query_terms = query_term.split(',')
    map_enum = partial(map_enum_to_int, field._enum_mapper)
    range_values = list(map(map_enum, query_terms))
    range_values = [t for t in range_values if t is not None]

    n = len(range_values)
    if n != 2:
        return None

    kwargs_start = {field.field_min: {'gt': range_values[1]}}
    kwargs_end = {field.field_max: {'lt': range_values[0]}}
    return Bool(must_not=Bool(should=[
        Range(**kwargs_start),
        Range(**kwargs_end),
        Bool(must=[
            Missing(field=field.field_min),
            Missing(field=field.field_max)
        ])
    ]))
Ejemplo n.º 2
0
    def __init__(self, **kwargs):
        """Initialize instance."""
        super(B2ShareRecordsSearch, self).__init__(**kwargs)
        if _in_draft_request():
            if not current_user.is_authenticated:
                raise AnonymousDepositSearch()
            # super user can read all deposits
            if StrictDynamicPermission(superuser_access).can():
                return

            filters = [Q('term', **{'_deposit.owners': current_user.id})]

            readable_communities = list_readable_communities(current_user.id)
            for publication_state in readable_communities.all:
                filters.append(Q('term', publication_state=publication_state))
            for community, publication_states in readable_communities.communities.items():
                for publication_state in publication_states:
                    filters.append(Bool(
                        must=[Q('term', publication_state=publication_state),
                              Q('term', community=str(community))],
                    ))

            # otherwise filter returned deposits
            self.query = Bool(
                must=self.query._proxied,
                should=filters,
                minimum_should_match=1
            )
Ejemplo n.º 3
0
def create_index_specific_version_filter(indexes_and_versions):
    """
    Creates the elasticsearch-dsl Bool object necessary to query the given indexes at the given
    specific versions. If there are multiple indexes that require the same version then a terms
    query will be created covering the group rather than several term queries for each index - this
    is probably no different in terms of performance but it does keep the size of the query down
    when large numbers of indexes are queried. If all indexes require the same version then a single
    term query is returned (using the create_version_query above) which has no index filtering in it
    at all.

    :param indexes_and_versions: a dict of index names -> versions
    :return: an elasticsearch-dsl object
    """
    # flip the dict we've been given to group by the version
    by_version = defaultdict(list)
    for index, version in indexes_and_versions.items():
        by_version[version].append(index)

    if len(by_version) == 1:
        # there's only one version, just use it in a single meta.version check with no indexes
        return create_version_query(next(iter(by_version.keys())))
    else:
        filters = []
        for version, indexes in by_version.items():
            version_filter = create_version_query(version)
            if len(indexes) == 1:
                # there's only one index requiring this version so use a term query
                filters.append(Bool(filter=[Q(u'term', _index=indexes[0]), version_filter]))
            else:
                # there are a few indexes using this version, query them using terms as a group
                filters.append(Bool(filter=[Q(u'terms', _index=indexes), version_filter]))
        return Bool(should=filters, minimum_should_match=1)
Ejemplo n.º 4
0
def create_date_range_filter(field, query_term):
    """Creates an ElasticSearch date-range filter.

    This filter type is currently only used for Outing.date_start/date_end.

    Valid query terms are:
        2016-01-01
        2016-01-01,2016-01-01
        2016-01-01,2016-01-03

    """
    query_terms = query_term.split(',')
    range_values = list(map(parse_date, query_terms))
    range_values = [t for t in range_values if t is not None]

    n = len(range_values)
    if n == 0:
        return None
    elif n == 1 or range_values[0] == range_values[1]:
        # single date
        kwargs_start = {field.field_date_start: {'lte': range_values[0]}}
        kwargs_end = {field.field_date_end: {'gte': range_values[0]}}
        return Bool(must=[Range(**kwargs_start), Range(**kwargs_end)])
    else:
        # date range
        kwargs_start = {field.field_date_start: {'gt': range_values[1]}}
        kwargs_end = {field.field_date_end: {'lt': range_values[0]}}
        return Bool(must_not=Bool(
            should=[Range(
                **kwargs_start), Range(**kwargs_end)]))
Ejemplo n.º 5
0
    def search_loans_by_pid(cls,
                            item_pid=None,
                            document_pid=None,
                            filter_states=[],
                            exclude_states=[]):
        """."""
        search = cls()

        if filter_states:
            search = search.query(
                Bool(filter=[Q('terms', state=filter_states)]))
        elif exclude_states:
            search = search.query(
                Bool(filter=[~Q('terms', state=exclude_states)]))

        if document_pid:
            search = search.filter(
                'term', document_pid=document_pid).source(includes='loan_pid')
        elif item_pid:
            search = search.filter(
                'term', item_pid=item_pid).source(includes='loan_pid')

        for result in search.scan():
            if result.loan_pid:
                yield result
Ejemplo n.º 6
0
 def test_create_filter_date_range(self):
     self.assertEqual(create_filter('date', '', SearchOuting), None)
     self.assertEqual(create_filter('date', 'invalid date', SearchOuting),
                      None)
     self.assertEqual(
         create_filter('date', '2016-01-01', SearchOuting),
         Bool(must=[
             Range(date_start={'lte': '2016-01-01'}),
             Range(date_end={'gte': '2016-01-01'})
         ]))
     self.assertEqual(
         create_filter('date', '2016-01-01,invalid date', SearchOuting),
         Bool(must=[
             Range(date_start={'lte': '2016-01-01'}),
             Range(date_end={'gte': '2016-01-01'})
         ]))
     self.assertEqual(
         create_filter('date', '2016-01-01,2016-01-01', SearchOuting),
         Bool(must=[
             Range(date_start={'lte': '2016-01-01'}),
             Range(date_end={'gte': '2016-01-01'})
         ]))
     self.assertEqual(
         create_filter('date', '2016-01-01,2016-01-03', SearchOuting),
         Bool(must_not=Bool(should=[
             Range(date_start={'gt': '2016-01-03'}),
             Range(date_end={'lt': '2016-01-01'})
         ])))
Ejemplo n.º 7
0
def test_cernopendata_query_parser():
    assert cernopendata_query_parser('/Btau') == Bool(must=[QueryString(query='"/Btau"')], must_not=[Match(distribution__availability__keyword='ondemand')])
    assert cernopendata_query_parser('"/Btau"') == Bool(must=[QueryString(query='"/Btau"')], must_not=[Match(distribution__availability__keyword='ondemand')])
    assert cernopendata_query_parser('/btau AND CMS') == Bool(must=[QueryString(query='"/btau" AND CMS')], must_not=[Match(distribution__availability__keyword='ondemand')])
    assert cernopendata_query_parser('"/btau" AND CMS') == Bool(must=[QueryString(query='"/btau" AND CMS')], must_not=[Match(distribution__availability__keyword='ondemand')])
    assert cernopendata_query_parser('CMS AND /btau') == Bool(must=[QueryString(query='CMS AND "/btau"')], must_not=[Match(distribution__availability__keyword='ondemand')])
    assert cernopendata_query_parser('CMS AND /btau', show_ondemand='true') == QueryString(query='CMS AND "/btau"')
Ejemplo n.º 8
0
def _build_subentity_query(id_field, name_field, value, exact):
    """Crea una condición de búsqueda por propiedades de una subentidad. Esta
    condición se utiliza para filtrar resultados utilizando IDs o nombre de una
    subentidad contenida por otra. Por ejemplo, se pueden buscar departamentos
    filtrando por nombre de provincia, o localidades filtrando por IDS de
    municipios.

    Args:
        id_field (str): Nombre del campo de ID de la subentidad.
        name_field (str): Nombre del campo de nombre de la subentidad.
        value (list, str, tuple): Valor a buscar. En caso de ser una lista,
            representa una lista de IDs. En caso de ser un string, representa
            un nombre. En caso de ser una tupla, representa una lista de IDs y
            un nombre (buscar ambos unidos por OR).
        exact (bool): Activa la búsqueda por nombres exactos (en caso de que
            'value' sea de tipo str).

    Returns:
            Query: Condición para Elasticsearch.

    """
    if isinstance(value, list):
        return Bool(filter=[_build_terms_query(id_field, value)])
    if isinstance(value, tuple):
        ids, name = value
        return (_build_name_query(name_field, name, exact)
                | Bool(filter=[_build_terms_query(id_field, ids)]))

    return _build_name_query(name_field, value, exact)
Ejemplo n.º 9
0
    def query(self, search, query):
        """
        Customize search results to support extra functionality.

        If `self.projects` was given, we use it to filter the documents.
        Only filtering by a list of slugs is supported.

        Also:

        * Adds SimpleQueryString with `self.operators` instead of default query.
        * Adds HTML encoding of results to avoid XSS issues.
        """
        search = search.highlight_options(**self._highlight_options)
        search = search.source(excludes=self.excludes)

        queries = self._get_queries(
            query=query,
            fields=self.fields,
        )

        # Run bool query with should, so it returns result where either of the query matches.
        bool_query = Bool(should=queries)

        # Filter by project slugs.
        if self.projects:
            if isinstance(self.projects, list):
                projects_query = Bool(filter=Terms(slug=self.projects))
                bool_query = Bool(must=[bool_query, projects_query])
            else:
                raise ValueError('projects must be a list!')

        search = search.query(bool_query)
        return search
Ejemplo n.º 10
0
 def __init__(self, **kwargs):
     """Initialize instance."""
     super(ExampleSearch, self).__init__(**kwargs)
     if not current_user.is_authenticated:
         if self.query._proxied:
             self.query = self.query._proxied & Q(
                 Bool(filter=[Q('term', public=1)]))
         else:
             self.query = Q(Bool(filter=[Q('term', public=1)]))
Ejemplo n.º 11
0
 def inner(values):
     if 'filling' in values:
         return Bool(should=[
             Q('terms', **{field: values}),
             Bool(must_not=[Q('exists', field='state')])
         ],
                     minimum_should_match=1)
     else:
         return Q('terms', **{field: values})
Ejemplo n.º 12
0
def get_filter_obj(type, filters, filter_fields):
    if len(filter_fields) == 0:
        filter_fields = [None] * len(filters)  # use default filter_field for query type (defined in make_filter())
    unique_fields = set(filter_fields)
    must_bools = []
    for agg_type in unique_fields:
        type_filters = [x for x in zip(filters, filter_fields) if x[1] == agg_type]
        should_bool = Bool(should=[make_filter(type, agg_type, f) for f, t in type_filters])
        must_bools += [should_bool]
    return Bool(must=must_bools)
Ejemplo n.º 13
0
    def test_construction_of_filter_with_two_attributes(self, test_domain):
        q1 = test_domain.get_dao(Person).query.filter(first_name='Jane',
                                                      last_name='Doe')
        q2 = test_domain.get_dao(Person).query.filter(
            first_name='Jane').filter(last_name='Doe')

        filters1 = q1._owner_dao._build_filters(q1._criteria)
        filters2 = q2._owner_dao._build_filters(q2._criteria)

        assert filters1 == Bool(
            must=[Term(first_name='Jane'),
                  Term(last_name='Doe')])
        assert filters2 == Bool(
            must=[Term(first_name='Jane'),
                  Term(last_name='Doe')])
Ejemplo n.º 14
0
    def test_construction_of_filter_with_two_attributes(self, test_domain):
        q1 = test_domain.repository_for(Person)._dao.query.filter(
            first_name="Jane", last_name="Doe")
        q2 = (test_domain.repository_for(Person)._dao.query.filter(
            first_name="Jane").filter(last_name="Doe"))

        filters1 = q1._owner_dao._build_filters(q1._criteria)
        filters2 = q2._owner_dao._build_filters(q2._criteria)

        assert filters1 == Bool(
            must=[Term(first_name="Jane"),
                  Term(last_name="Doe")])
        assert filters2 == Bool(
            must=[Term(first_name="Jane"),
                  Term(last_name="Doe")])
Ejemplo n.º 15
0
def _build_must_queries(filters, ranges, composite_field_mapping):
    """Builds a "must" filter query."""
    must_filter = []

    for field, value in filters.items():
        should_filters = None

        # get nested "or" filters
        if composite_field_mapping and field in composite_field_mapping:
            # process composite filters
            composite_fields = composite_field_mapping[field]
            should_filters = _build_field_queries(
                {
                    composite_field: value
                    for composite_field in composite_fields
                }, )
        elif isinstance(value, dict):
            should_filters = _build_nested_queries(field, value)

        if should_filters:
            # builds an "or" query for given list of fields
            must_filter.append(
                Bool(should=should_filters, minimum_should_match=1), )
        else:
            must_filter.append(_build_field_query(field, value), )

    if ranges:
        must_filter.extend(_build_range_queries(ranges))

    return must_filter
Ejemplo n.º 16
0
 def test_construction_with_combined_filter_and_exclude_with_filter_coming_first(
         self, test_domain):
     q1 = (test_domain.repository_for(Person)._dao.query.filter(
         last_name="Doe").exclude(age=3))
     filters1 = q1._owner_dao._build_filters(q1._criteria)
     assert filters1 == Bool(must=[Term(last_name="Doe")],
                             must_not=[Term(age=3)])
Ejemplo n.º 17
0
 def exclude(self, *args, **kwargs):
     """Add method `exclude` to old elastic search versions."""
     if ES_VERSION[0] == 2:
         from elasticsearch_dsl.query import Bool, Q
         return self.query(Bool(filter=[~Q(*args, **kwargs)]))
     else:
         return super(LoansSearch, self).exclude(*args, **kwargs)
Ejemplo n.º 18
0
    def build_multipolygon_query(coordinates):
        '''
        Utility function for building elasticsearch-dsl queries that represent GeoJSON
        MultiPolygons. Given the coordinates this function creates a geo_polygon queries and Bool
        queries to represent the varioud enclosures and holes in those enclosures to find all
        records residing in the MultiPolygon. The coordinates parameter should match the format
        required by GeoJSON and therefore be a series of nested lists, see the GeoJSON docs for
        details.

        :param coordinates: the coordinate list, which is basically a list of Polygons. See the
                            GeoJSON doc for the exact format and meaning
        :return: an elasticsearch-dsl object representing the MultiPolygon
        '''
        queries = []
        # the first list is a list of GeoJSON Polygons
        for polygon in coordinates:
            # then the Polygon is a list containing at least one element. The first element is the
            # outer boundary shape of the polygon and any other elements are holes in this shape
            outer, holes = polygon[0], polygon[1:]
            outer_query = v1_0_0Schema.build_geo_polygon_query(outer)

            if holes:
                holes_queries = [
                    v1_0_0Schema.build_geo_polygon_query(hole)
                    for hole in holes
                ]
                # create a query which filters the outer query but filters out the holes
                queries.append(
                    Bool(filter=[outer_query], must_not=holes_queries))
            else:
                queries.append(outer_query)

        return v1_0_0Schema.build_or(queries)
Ejemplo n.º 19
0
 def test_construction_with_combined_filter_and_exclude_with_exclude_coming_first(
         self, test_domain):
     q1 = test_domain.get_dao(Person).query.exclude(age=3).filter(
         last_name='Doe')
     filters1 = q1._owner_dao._build_filters(q1._criteria)
     assert filters1 == Bool(must=[Term(last_name='Doe')],
                             must_not=[Term(age=3)])
Ejemplo n.º 20
0
    def _get_nested_query(self, *, query, path, fields):
        """Generate a nested query with passed parameters."""
        queries = self._get_queries(
            query=query,
            fields=fields,
        )

        raw_fields = [
            # Remove boosting from the field
            re.sub(r'\^.*$', '', field)
            for field in fields
        ]

        highlight = dict(
            self._highlight_options,
            fields={
                field: {}
                for field in raw_fields
            },
        )

        return Nested(
            path=path,
            inner_hits={'highlight': highlight},
            query=Bool(should=queries),
        )
Ejemplo n.º 21
0
def etl(index='cf_rfem_hist_price',
        start_date='2018-12-26',
        end_date='2019-03-25',
        symbol='rfem'):
    ESLowLevelClientByConnection.get_instance()
    search = Search(index=index, using='high_level_client')[0:100]
    search.query = Q(
        Bool(must=[
            Range(date={
                'gte': '2018-12-26',
                'lte': '2019-03-25'
            }),
            Term(symbol='rfem')
        ]))
    aggs = A(
        DateHistogram(field='date',
                      interval='1d',
                      format='yyyy-MM-dd',
                      min_doc_count=1))
    response = search.execute()
    hits = response['hits']
    hits = hits['hits']
    XX = []
    for hit in hits:
        X = []
        X.append(hit['_source']['changeOverTime'])
        X.append(hit['_source']['changePercent'])
        X.append(hit['_source']['volume'])
        XX.append(X)
    return (XX)
Ejemplo n.º 22
0
    def test_construction_of_filter_with_two_negated_attributes(
            self, test_domain):
        q1 = test_domain.get_dao(Person).query.exclude(first_name="Jane",
                                                       last_name="Doe")
        q2 = (test_domain.get_dao(Person).query.exclude(
            first_name="Jane").exclude(last_name="Doe"))

        filters1 = q1._owner_dao._build_filters(q1._criteria)
        filters2 = q2._owner_dao._build_filters(q2._criteria)

        assert filters1 == Bool(
            must_not=[Term(first_name="Jane"),
                      Term(last_name="Doe")])
        assert filters2 == Bool(
            must_not=[Term(first_name="Jane"),
                      Term(last_name="Doe")])
Ejemplo n.º 23
0
    def get_uniprot2ensembl(self, uniprot_id):
        assert uniprot_id is not None

        self.cache_u2e.queries += 1
        if uniprot_id in self.cache_u2e:
            self.cache_u2e.hits += 1
            return self.cache_u2e[uniprot_id]

        response = Search().using(self._es).index(
            self._es_index).extra(track_total_hits=True).query(
                Bool(should=[
                    Match(uniprot_id=uniprot_id),
                    Match(uniprot_accessions=uniprot_id)
                ]))[0:1].source(includes=["ensembl_gene_id"]).execute()
        #see https://www.elastic.co/guide/en/elasticsearch/reference/7.x/search-request-track-total-hits.html
        if response.hits.total.value == 0:
            #no hit, return None
            self.cache_u2e[uniprot_id] = None
            return None
        elif response.hits.total.value == 1:
            #exactly one hit, return it
            val = response.hits[0].ensembl_gene_id
            self.cache_u2e[uniprot_id] = val
            return val
        else:
            #more then one hit, throw error
            raise ValueError("Multiple genes with uniprot %s" % (uniprot_id))
Ejemplo n.º 24
0
def getUSWDSquery(indexbase, query, version, agency, domaintype, sort):
    index = indexbase + '-uswds2'
    try:
        query = int(query)
    except:
        query = 0

    s = Search(using=es, index=index)
    if sort == 'Score':
        s = s.sort('-data.total_score')
    else:
        s = s.sort('domain')
    s = s.query(Bool(should=[Range(data__total_score={'gte': query})]))
    if version != 'all versions':
        if version == 'detected versions':
            s = s.query("query_string", query='v*', fields=['data.uswdsversion'])
        else:
            versionquery = '"' + version + '"'
            s = s.query("query_string", query=versionquery, fields=['data.uswdsversion'])
    if agency != 'All Agencies':
        agencyquery = '"' + agency + '"'
        s = s.query("query_string", query=agencyquery, fields=['agency'])
    if domaintype != 'All Branches':
        domaintypequery = '"' + domaintype + '"'
        s = s.query("query_string", query=domaintypequery, fields=['domaintype'])

    return s
Ejemplo n.º 25
0
    def _search(self, index, table, fields=None):
        """
        Search private area for matching docs in Elasticsearch.

        only returns the _id of the matching document.

        fields = {
            'id': [1, 2],
            'uid': ['a002', 'a009']
        }
        """
        fields = fields or {}
        search = Search(using=self.__es, index=index)
        # explicitly exclude all fields since we only need the doc _id
        search = search.source(excludes=['*'])
        for key, values in fields.items():
            search = search.query(
                Bool(
                    filter=[
                        Q('terms', **{f'{META}.{table}.{key}': values}) |
                        Q('terms', **{f'{META}.{table}.{key}.keyword': values})
                    ]
                )
            )
        for hit in search.scan():
            yield hit.meta.id
Ejemplo n.º 26
0
    def _get_nested_query(self, *, query, path, fields):
        """Generate a nested query with passed parameters."""
        queries = self._get_queries(
            query=query,
            fields=fields,
        )

        raw_fields = [
            # Remove boosting from the field
            re.sub(r'\^.*$', '', field) for field in fields
        ]

        # Highlight from the raw fields too, if it is a single term.
        if self._is_single_term(query):
            raw_fields.extend(
                [re.sub(r'\^.*$', '.raw', field) for field in fields])

        highlight = dict(
            self._highlight_options,
            fields={field: {}
                    for field in raw_fields},
        )

        return Nested(
            path=path,
            inner_hits={'highlight': highlight},
            query=Bool(should=queries),
        )
Ejemplo n.º 27
0
    def query(self, search, query):
        """
        Add query part to ``search`` when needed.

        Also:

        * Adds SimpleQueryString instead of default query.
        * Adds HTML encoding of results to avoid XSS issues.
        """
        search = search.highlight_options(encoder='html',
                                          number_of_fragments=3)
        search = search.source(exclude=['content', 'headers'])

        all_queries = []

        # need to search for both 'and' and 'or' operations
        # the score of and should be higher as it satisfies both or and and

        for operator in self.operators:
            query_string = SimpleQueryString(query=query,
                                             fields=self.fields,
                                             default_operator=operator)
            all_queries.append(query_string)

        # run bool query with should, so it returns result where either of the query matches
        bool_query = Bool(should=all_queries)

        search = search.query(bool_query)
        return search
Ejemplo n.º 28
0
Archivo: rest.py Proyecto: unsftn/rsj
def _search_korpus(request):
    if not request.data or request.data['term'] is None:
        return bad_request('no search term')

    term = request.data['term']
    hits = []
    s = Search(index=KORPUS_INDEX)
    s = s.source(includes=['pk', 'osnovniOblik'])
    s.query = Bool(
        must=[Match(oblici=term)]
    )
    try:
        response = s.execute()
        for hit in response.hits.hits:
            hits.append(hit['_source'])

        serializer = KorpusResponseSerializer(hits, many=True)
        data = serializer.data

        return Response(
            data,
            status=HTTP_200_OK,
            content_type=JSON
        )
    except ElasticsearchException as error:
        return server_error(error.args)
Ejemplo n.º 29
0
    def __init__(self, **kwargs):
        """Use Meta to set kwargs defaults."""
        # at object instantiation, kwargs['index'] is not defined.
        # Elasticsearch-dsl-py re-instantiated the object at each search
        # by cloning it and passing as kwargs the list of indices
        # kwargs['index'] = ['index-name1', 'index-name2']
        if not kwargs.get('index') and getattr(self.Meta, 'index', None):
            _index_name = prefix_index(app=current_app,
                                       index=getattr(self.Meta, 'index', None))
            kwargs.setdefault('index', _index_name)

        kwargs.setdefault('doc_type', getattr(self.Meta, 'doc_types', None))
        kwargs.setdefault('using', current_search_client)
        kwargs.setdefault('extra', {})

        min_score = current_app.config.get('SEARCH_RESULTS_MIN_SCORE')
        if min_score:
            kwargs['extra'].update(min_score=min_score)

        super(RecordsSearch, self).__init__(**kwargs)

        default_filter = getattr(self.Meta, 'default_filter', None)
        if default_filter:
            # NOTE: https://github.com/elastic/elasticsearch/issues/21844
            self.query = Bool(minimum_should_match=MinShouldMatch("0<1"),
                              filter=default_filter)
Ejemplo n.º 30
0
 def default_multi_community_filter(community_list):
     return Bool(must=[
         Bool(should=[
             Q(
                 'terms', **{
                     current_oarepo_communities.primary_community_field:
                     community_list
                 }),
             Q(
                 'terms', **{
                     current_oarepo_communities.communities_field:
                     community_list
                 })
         ],
              minimum_should_match=1)
     ])