Python Search Beispiele, django_elasticsearch_dsl.search.Search Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: api.py Projekt: thluna/mytardis

def simple_search_public_data(query_text):
    result_dict = {k: [] for k in ["experiments", "datasets", "datafiles"]}
    index_list = ['experiments', 'dataset', 'datafile']
    ms = MultiSearch(index=index_list)
    query_exp = Q("match", title=query_text)
    query_exp_oacl = Q("term", public_access=100)
    query_exp = query_exp & query_exp_oacl
    ms = ms.add(Search(index='experiments')
                .extra(size=MAX_SEARCH_RESULTS, min_score=MIN_CUTOFF_SCORE)
                .query(query_exp))
    query_dataset = Q("match", description=query_text)
    query_dataset_oacl = Q("term", **{'experiments.public_access': 100})
    ms = ms.add(Search(index='dataset')
                .extra(size=MAX_SEARCH_RESULTS, min_score=MIN_CUTOFF_SCORE).query(query_dataset)
                .query('nested', path='experiments', query=query_dataset_oacl))
    query_datafile = Q("match", filename=query_text)
    query_datafile_oacl = Q("term", experiments__public_access=100)
    query_datafile = query_datafile & query_datafile_oacl
    ms = ms.add(Search(index='datafile')
                .extra(size=MAX_SEARCH_RESULTS, min_score=MIN_CUTOFF_SCORE)
                .query(query_datafile))
    results = ms.execute()
    for item in results:
        for hit in item.hits.hits:
            if hit["_index"] == "dataset":
                result_dict["datasets"].append(hit.to_dict())

            elif hit["_index"] == "experiments":
                result_dict["experiments"].append(hit.to_dict())

            elif hit["_index"] == "datafile":
                result_dict["datafiles"].append(hit.to_dict())
    return result_dict

Beispiel #2

0

Datei anzeigen

def search_request(request):
    if request.method == 'GET':
        if q := request.GET.get('q', None):
            object_list = []
            search = Search(
                index=['songs', 'artists', 'albums', 'labels', 'genres'])
            objects = search.from_dict({
                "query": {
                    "dis_max": {
                        "queries": [{
                            "multi_match": {
                                "query":
                                q,
                                "type":
                                "phrase",
                                "fields": [
                                    "title^20", "lyrics^10", "name^100",
                                    "description^50"
                                ]
                            }
                        }, {
                            "multi_match": {
                                "query":
                                q,
                                "fuzziness":
                                "AUTO",
                                "fields": [
                                    "title^2", "lyrics", "name^10",
                                    "description^5"
                                ]
                            }
                        }]
                    }
                }
            })

            for obj in objects:
                print(obj)

                row = {
                    'id': obj.meta.id,
                    'score': obj.meta.score,
                    'url': 'main:' + obj.meta.index[:-1] + '-detail',
                    'model': obj.meta.index,
                }
                if obj.meta.index == 'songs':
                    row['text'] = f'Song: {obj.title}'
                elif obj.meta.index == 'artists':
                    row['text'] = f'Artist: {obj.name}'
                elif obj.meta.index == 'labels':
                    row['text'] = f'Label: {obj.name}'
                elif obj.meta.index == 'genres':
                    row['text'] = f'Genre: {obj.name}'
                elif obj.meta.index == 'albums':
                    row['text'] = f'Album: {obj.title}'
                object_list.append(row)
            return render(request=request,
                          template_name="main/search.html",
                          context={'object_list': object_list})

Beispiel #3

0

Datei anzeigen

    def get_object_list(self, request):
        user = request.user
        query_text = request.GET.get('query', None)
        if not user.is_authenticated:
            result_dict = simple_search_public_data(query_text)
            return [SearchObject(id=1, hits=result_dict)]
        groups = user.groups.all()
        index_list = ['experiments', 'dataset', 'datafile']
        ms = MultiSearch(index=index_list)

        query_exp = Q("match", title=query_text)
        query_exp_oacl = Q("term", objectacls__entityId=user.id) | \
            Q("term", public_access=100)
        for group in groups:
            query_exp_oacl = query_exp_oacl | \
                                 Q("term", objectacls__entityId=group.id)
        query_exp = query_exp & query_exp_oacl
        ms = ms.add(
            Search(index='experiments').extra(
                size=MAX_SEARCH_RESULTS,
                min_score=MIN_CUTOFF_SCORE).query(query_exp))

        query_dataset = Q("match", description=query_text)
        query_dataset_oacl = Q("term", **{'experiments.objectacls.entityId': user.id}) | \
            Q("term", **{'experiments.public_access': 100})
        for group in groups:
            query_dataset_oacl = query_dataset_oacl | \
                                 Q("term", **{'experiments.objectacls.entityId': group.id})
        ms = ms.add(
            Search(index='dataset').extra(
                size=MAX_SEARCH_RESULTS,
                min_score=MIN_CUTOFF_SCORE).query(query_dataset).query(
                    'nested', path='experiments', query=query_dataset_oacl))

        query_datafile = Q("match", filename=query_text)
        query_datafile_oacl = Q("term", experiments__objectacls__entityId=user.id) | \
            Q("term", experiments__public_access=100)
        for group in groups:
            query_datafile_oacl = query_datafile_oacl | \
                                 Q("term", experiments__objectacls__entityId=group.id)
        query_datafile = query_datafile & query_datafile_oacl
        ms = ms.add(
            Search(index='datafile').extra(
                size=MAX_SEARCH_RESULTS,
                min_score=MIN_CUTOFF_SCORE).query(query_datafile))
        results = ms.execute()
        result_dict = {k: [] for k in ["experiments", "datasets", "datafiles"]}
        for item in results:
            for hit in item.hits.hits:
                if hit["_index"] == "dataset":
                    result_dict["datasets"].append(hit.to_dict())

                elif hit["_index"] == "experiments":
                    result_dict["experiments"].append(hit.to_dict())

                elif hit["_index"] == "datafile":
                    result_dict["datafiles"].append(hit.to_dict())

        return [SearchObject(id=1, hits=result_dict)]

Beispiel #4

0

Datei anzeigen

Datei: serializers.py Projekt: minidron/stationery

    def filter(self, qs, value):
        client = Elasticsearch([settings.ELASTICSEARCH_HOST])
        value = value.lower()

        search_query = {
            "bool": {
                "must_not": [  # исключает из выдачи is_published=False
                    {
                        "term": {
                            "is_published": False
                        }
                    }
                ],
                "should": [
                    {
                        "simple_query_string": {
                            "fields": ["category_name"],
                            "quote_field_suffix": ".exact",
                            "query": value
                        }
                    },
                ]
            }
        }

        s = Search(using=client, index='category') \
            .query(search_query)\
            .sort("_score", "-views")\
            .extra(size=self.max_result, from_=0)

        hits_list = []
        items = s.execute()
        if items:
            for item in items:
                hits_list.append(item.meta.id)
            hits_order = Case(
                *[When(pk=pk, then=pos) for pos, pk in enumerate(hits_list)])
            qs = qs.filter(id__in=hits_list).order_by(hits_order)
        else:
            qs = qs.none()

            # TODO: fallback?
            # bits = value.split(' ')
            # search_clauses = reduce(operator.and_,
            #                         [Q(title__icontains=v) for v in bits])
            # unpublished = Category.objects.get_queryset_descendants(
            #     Category.objects.filter(is_published=False), include_self=True)
            # qs = (qs
            #       .exclude(pk__in=unpublished)
            #       .filter(search_clauses)
            #       .order_by('-views'))
        return qs[:self.max_result]

Beispiel #5

0

Datei anzeigen

Datei: views.py Projekt: uktrade/dit-helpdesk

    def _do_check(self):
        """
        Performs a basic check on the database by performing a select query on a simple table then
        performs a basic check on ElasticSearch by performing a search without exceptions occuring
        :return: False according to results of check, True if successful False if there is a fail
        """
        try:
            # Perform database check
            HealthCheck.objects.get(health_check_field=True)

            # Perform Elaseticsearch check
            client = Elasticsearch(hosts=[settings.ES_URL])
            query_object = {
                "multi_match": {
                    "query":
                    "a_commodity_or_code",
                    "type":
                    "most_fields",
                    "fields": ["keywords", "description"],
                    "operator":
                    "and" if "," not in "a_commodity_or_code" else "or",
                }
            }
            Search().index("indexes").using(client).query(query_object).sort(
                "sort_object")

            # Return success if we have reached this point
            return True

        except Exception as e:
            capture_exception(e)
            return False

Beispiel #6

0

Datei anzeigen

 def highlight(self, search: Search) -> Search:
     # TODO: Why did we have this?
     # search = search.highlight_options(require_field_match=False)
     search = search.highlight("*",
                               fragment_size=150,
                               pre_tags="<mark>",
                               post_tags="</mark>")
     return search

Beispiel #7

0

Datei anzeigen

def autocomplete_search(q, doc_type=None, fuzzy_mode=False, **kwargs):

    query = autocomplete_query(q, fuzzy_mode)
    limit = kwargs.get('limit', 20)
    offset = kwargs.get('offset', 0)
    filters = kwargs.get('filters', {})

    if limit and limit > 100:
        limit = 100

    s = Search().index('_all')

    if doc_type:
        s = s.doc_type(doc_type)

    s = s.query('match', autocomplete=query)

    # TODO: implement in a generic way
    # add filters like: `&filter_status=Ready&filter_type=Broadcasts`
    for key, value in filters.iteritems():
        s = s.query('term', **{key: value[0]})

    s = s[offset:limit + offset]

    return format_search_results(s.execute())

Beispiel #8

0

Datei anzeigen

Datei: search.py Projekt: prototypefund/meine-stadt-transparent

def autocomplete(query: str) -> Response:
    """
    https://www.elastic.co/guide/en/elasticsearch/guide/current/_index_time_search_as_you_type.html
    We use the ngram-based autocomplete-analyzer for indexing, but the standard analyzer for searching
    This way we enforce that the whole entered word has to be matched (save for some fuzziness) and the algorithm
    does not fall back to matching only the first character in extreme cases. This prevents absurd cases where
    "Garret Walker" and "Hector Mendoza" are suggested when we're entering "Mahatma Ghandi"
    """
    search_query = Search(index=list(DOCUMENT_INDICES.values()))
    search_query = search_query.query(
        "match",
        autocomplete={
            "query": escape_elasticsearch_query(query),
            "analyzer": "standard",
            "fuzziness": "AUTO",
            "prefix_length": 1,
        },
    )
    search_query = search_query.extra(min_score=1)
    search_query = search_query.update_from_dict({
        "indices_boost": [
            {
                DOCUMENT_INDICES["person"]: 4
            },
            {
                DOCUMENT_INDICES["organization"]: 4
            },
            {
                DOCUMENT_INDICES["paper"]: 2
            },
        ]
    })
    response = search_query.execute()
    return response

Beispiel #9

0

Datei anzeigen

def search_by_code(code):

    processed_query = process_commodity_code(code)
    query_object = {"term": {"commodity_code": processed_query}}

    client = Elasticsearch(hosts=[settings.ES_URL])
    hits = Search().index(*alias_names).using(client).query(query_object)
    for hit in hits:
        try:
            hit["hierarchy_context"] = json.loads(hit["hierarchy_context"])
        except KeyError as exception:
            logger.info("{0} {1}".format(hit["commodity_code"],
                                         exception.args))
    return hits

Beispiel #10

0

Datei anzeigen

def _add_date_before(search: Search, params: Dict[str, Any], options,
                     errors) -> Search:
    """Filters by a date given a string, catching parsing errors."""
    try:
        before = parse(params["before"])
    except (ValueError, OverflowError) as e:
        errors.append(
            gettext(
                f"The value for before is invalid. The correct format is 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS': {e}"
            ))
        return search
    search = search.filter(
        Q("range", start={"lte": before})
        | Q("range", legal_date={"lte": before}))
    options["before"] = before
    return search

Beispiel #11

0

Datei anzeigen

def _build_search_request(query, sort_key, sort_order, filter_on_leaf=None):
    client = Elasticsearch(hosts=[settings.ES_URL])

    sort_object = {sort_key: sort_order}
    query_object = {
        "multi_match": {
            "query": query,
            "type": "most_fields",
            "fields": ["keywords", "description"],
            "operator": "and" if "," not in query else "or",
        }
    }

    request = (Search().index(
        *alias_names).using(client).query(query_object).sort(sort_object))

    if filter_on_leaf:
        request = request.filter("term", leaf=filter_on_leaf)

    return request

Beispiel #12

0

Datei anzeigen

Datei: search.py Projekt: prototypefund/meine-stadt-transparent

 def query(self, search: Search, query: str) -> Search:
     if query:
         self.options["searchterm"] = query
         # Fuzzines AUTO(=2) gives more error tolerance, but is also a lot slower and has many false positives
         # We're using https://stackoverflow.com/a/35375562/3549270 to make exact matches score higher than fuzzy
         # matches
         search = search.query(
             Bool(should=[
                 MultiMatch(
                     query=escape_elasticsearch_query(query),
                     operator="and",
                     fields=self.fields,
                 ),
                 MultiMatch(
                     query=escape_elasticsearch_query(query),
                     operator="and",
                     fields=self.fields,
                     fuzziness="1",
                     prefix_length=1,
                 ),
             ]))
     return search

Beispiel #13

0

Datei anzeigen

Datei: serializers.py Projekt: minidron/stationery

    def filter(self, qs, value):
        # инициализируем подключение
        client = Elasticsearch([settings.ELASTICSEARCH_HOST])
        value = value.lower()

        # формируем запрос
        search_query = {
            "bool": {
                "must_not": [  # исключает из выдачи is_published=False
                    {
                        "term": {
                            "is_published": False
                        }
                    }
                ],
                "should": [
                    {
                        "simple_query_string": {  # ищем что-то разумное
                            "fields": ["fullname", "category_name"],
                            "quote_field_suffix": ".exact",
                            "query": value
                        }
                    },
                    {
                        # частичное вхождение по строкам с транслитом (англ->рус)
                        # constant_score запрещает буст по частоте вхождения
                        "constant_score": {
                            "filter": {
                                "match": {
                                    "fullname_translit": {
                                        "query": value,
                                        "fuzziness": 1,
                                        "operator": "and",
                                    }
                                }
                            }
                        }
                    },
                ]
            }
        }

        # Инициализация запроса
        s = Search(using=client, index='offer') \
            .query(search_query)\
            .sort("_score", "-views")\
            .extra(size=self.max_result, from_=0)

        self.hits_list = []
        items = s.execute()
        if items:
            for item in items:
                self.hits_list.append(item.meta.id)
            # нужно для того, чтобы у выборки из пусгреса сохранился порядок, который вернул эластик
            self.hits_order = Case(*[
                When(pk=pk, then=pos) for pos, pk in enumerate(self.hits_list)
            ])
            qs = qs.filter(id__in=self.hits_list).order_by(self.hits_order)
        else:
            qs = qs.none()

        # TODO: старая реализация. Может, оставить, как fallback?
        # else:
        #     qs = qs.annotate(full_name=Concat(
        #         'product__article', Value(' '),
        #         'product__title', Value(' '),
        #         'product__search_title'))
        #     bits = value.split(' ')
        #     if len(bits) is 1 and bits[0].isdecimal():
        #         full_name_clauses = Q(full_name__icontains=bits[0])
        #     else:
        #         full_name_clauses = reduce(
        #             operator.and_,
        #             [Q(full_name__iregex=r'(^|\s)%s' % escape(v))
        #              for v in bits])
        #
        #     unpublished = Category.objects.get_queryset_descendants(
        #         Category.objects.filter(is_published=False),
        #         include_self=True)
        #
        #     qs = (qs.filter(full_name_clauses)
        #             .exclude(product__category__in=unpublished))
        #
        #     if self.uniq_category:
        #         products = (qs.order_by('product__category__title')
        #                       .distinct('product__category__title'))
        #         qs = (qs.filter(id__in=products)
        #                 .order_by('-product__category__views'))

        return qs

Beispiel #14

0

Datei anzeigen

Datei: views.py Projekt: antoninabondarchuk/pharmacy-project

class DrugsListView(APIView):
    client = Elasticsearch(hosts=[{"host": "elasticsearch", "port": 9200}])
    search = Search(index='drugs').using(client).sort('trade_name.raw')

    @swagger_auto_schema(manual_parameters=[PAGE_FIELD, QUERY_FIELD])
    def get(self, request):
        page = int(request.GET.get('page', 1))
        page_size = settings.REST_FRAMEWORK.get('PAGE_SIZE')
        query_word = request.GET.get('query', None)
        if not query_word:
            s = self.search.query("match_all")[page - 1:page - 1 + page_size]
            res = s.execute().to_dict()['hits']['hits']
            return Response(data=res, status=status.HTTP_200_OK)
        query_word = query_word.lower() + "*"
        query = {
            "dis_max": {
                "queries": [
                    {
                        "wildcard": {
                            "trade_name": {
                                "value": query_word,
                                "boost": 3.0
                            }
                        }
                    },
                    {
                        "wildcard": {
                            "international_name.name": {
                                "value": query_word,
                                "boost": 3.0
                            }
                        }
                    },
                    {
                        "wildcard": {
                            "formula": {
                                "value": query_word,
                                "boost": 2.0
                            }
                        }
                    },
                    {
                        "wildcard": {
                            "registration number": {
                                "value": query_word,
                                "boost": 1.0
                            }
                        }
                    },
                    {
                        "wildcard": {
                            "INN.name": {
                                "value": query_word,
                                "boost": 0.5
                            }
                        }
                    },
                    {
                        "nested": {
                            "path": "atcs",
                            "query": {
                                "wildcard": {
                                    "atcs.name": {
                                        "value": query_word,
                                        "boost": 0.5
                                    }
                                }
                            }
                        }
                    },
                ],
            }
        }
        s = self.search.query(query)[page - 1:page - 1 + page_size]
        res = s.execute().to_dict()['hits']['hits']
        return Response(data=res, status=status.HTTP_200_OK)

    @swagger_auto_schema(request_body=DrugSerializer)
    def post(self, request):
        serializer = DrugSerializer(data=request.data)
        if serializer.is_valid():
            serializer.create(serializer.validated_data)
            DrugDocument(serializer.validated_data).save(using=self.client)
            return Response(data=serializer.validated_data,
                            status=status.HTTP_201_CREATED)
        return Response(data=serializer.errors,
                        status=status.HTTP_400_BAD_REQUEST)

Beispiel #15

0

Datei anzeigen

    def obj_create(self, bundle, **kwargs):
        user = bundle.request.user
        groups = user.groups.all()

        # if anonymous user search public data only
        query_text = bundle.data.get("text", None)
        type_tag = bundle.data.get("TypeTag", [])
        index_list = []
        for type in type_tag:
            if type == 'Experiment':
                index_list.append('experiments')
            elif type == 'Dataset':
                index_list.append('dataset')
            elif type == 'Datafile':
                index_list.append('datafile')
        end_date = bundle.data.get("EndDate", None)
        start_date = bundle.data.get("StartDate", None)
        if end_date is not None:
            end_date_utc = datetime.datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%S.%fZ") \
                .replace(tzinfo=pytz.timezone('UTC'))
            end_date = end_date_utc.astimezone(LOCAL_TZ).date()
        else:
            # set end date to today's date
            end_date = datetime.datetime.today().replace(
                tzinfo=pytz.timezone('UTC'))
        if start_date:
            start_date_utc = datetime.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%S.%fZ") \
                .replace(tzinfo=pytz.timezone('UTC'))
            start_date = start_date_utc.astimezone(LOCAL_TZ).date()
        instrument_list = bundle.data.get("InstrumentList", None)
        instrument_list_id = []
        if instrument_list:
            for ins in instrument_list:
                instrument_list_id.append(
                    Instrument.objects.get(name__exact=ins).id)
        # query for experiment model
        ms = MultiSearch(index=index_list)
        if 'experiments' in index_list:
            query_exp = Q("match", title=query_text)
            if user.is_authenticated:
                query_exp_oacl = Q("term", objectacls__entityId=user.id) | \
                                 Q("term", public_access=100)
                for group in groups:
                    query_exp_oacl = query_exp_oacl | \
                                     Q("term", objectacls__entityId=group.id)
            else:
                query_exp_oacl = Q("term", public_access=100)
            if start_date is not None:
                query_exp = query_exp & Q("range",
                                          created_time={
                                              'gte': start_date,
                                              'lte': end_date
                                          })
            query_exp = query_exp & query_exp_oacl
            ms = ms.add(
                Search(index='experiments').extra(
                    size=MAX_SEARCH_RESULTS,
                    min_score=MIN_CUTOFF_SCORE).query(query_exp))
        if 'dataset' in index_list:
            query_dataset = Q("match", description=query_text)
            if user.is_authenticated:
                query_dataset_oacl = Q("term", **{'experiments.objectacls.entityId': user.id}) | \
                                     Q("term", **{'experiments.public_access': 100})
                for group in groups:
                    query_dataset_oacl = query_dataset_oacl | \
                                         Q("term", **{'experiments.objectacls.entityId': group.id})
            else:
                query_dataset_oacl = Q("term",
                                       **{'experiments.public_access': 100})
            if start_date is not None:
                query_dataset = query_dataset & Q("range",
                                                  created_time={
                                                      'gte': start_date,
                                                      'lte': end_date
                                                  })
            if instrument_list:
                query_dataset = query_dataset & Q(
                    "terms", **{'instrument.id': instrument_list_id})
            # add instrument query
            ms = ms.add(
                Search(index='dataset').extra(
                    size=MAX_SEARCH_RESULTS,
                    min_score=MIN_CUTOFF_SCORE).query(query_dataset).query(
                        'nested', path='experiments',
                        query=query_dataset_oacl))
        if 'datafile' in index_list:
            query_datafile = Q("match", filename=query_text)
            if user.is_authenticated:
                query_datafile_oacl = Q("term", **{'dataset.experiments.objectacls.entityId': user.id}) | \
                                      Q("term", **{'dataset.experiments.public_access': 100})
                for group in groups:
                    query_datafile_oacl = query_datafile_oacl | \
                                          Q("term", **{'dataset.experiments.objectacls.entityId': group.id})
            else:
                query_datafile_oacl = Q(
                    "term", **{'dataset.experiments.public_access': 100})
            if start_date is not None:
                query_datafile = query_datafile & Q("range",
                                                    created_time={
                                                        'gte': start_date,
                                                        'lte': end_date
                                                    })
            ms = ms.add(
                Search(index='datafile').extra(
                    size=MAX_SEARCH_RESULTS,
                    min_score=MIN_CUTOFF_SCORE).query(query_datafile).query(
                        'nested',
                        path='dataset.experiments',
                        query=query_datafile_oacl))
        result = ms.execute()
        result_dict = {k: [] for k in ["experiments", "datasets", "datafiles"]}
        for item in result:
            for hit in item.hits.hits:
                if hit["_index"] == "dataset":
                    result_dict["datasets"].append(hit)

                elif hit["_index"] == "experiments":
                    result_dict["experiments"].append(hit)

                elif hit["_index"] == "datafile":
                    result_dict["datafiles"].append(hit)

        if bundle.request.method == 'POST':
            bundle.obj = SearchObject(id=1, hits=result_dict)
        return bundle