Python Search.doc_type Exemples, elasticsearch_dsl.Search.doc_type Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : controller_elastic.py Projet : romanorac/elastic_localized_search

    def search(self, doc_type, query=""):
        """
        Execute search query and retrive results

        :param doc_type: Type in ElasticSearch
        :param query: search query
        :return: list with results
        """
        results = []
        if type(query) in [str, unicode] and type(doc_type) == DocTypeMeta:
            q = Q("multi_match",
                  query=query.lower(),
                  fields=["title"])

            s = Search()
            s = s.using(self.client)
            s = s.index(self.index_name)
            s = s.doc_type(doc_type)
            s = s.query(q)
            print "search query: " + str(s.to_dict())

            response = s.execute()

            for resp in response:
                results.append(resp)
        return results

Exemple #2

0

Afficher le fichier

Fichier : models.py Projet : densho/ddr-public

def search_aggregation(hosts, index, field):
    s = Search(using=ES, index=index)
    s = s.doc_type(Record)
    s.aggs.bucket('choices', 'terms', field=field)
    
    response = s.execute()
    records = [Record.from_hit(hit) for hit in response]
    return records

Exemple #3

0

Afficher le fichier

Fichier : searchers.py Projet : deconevidya/django-elasticsearch

    def search(self, query: str, filters: dict=None, only_this_type: bool=True, **kwargs: dict) -> list:
        """performs a search against elasticsearch and then pulls the corresponding data from the db

        :param query: query terms to search by
        :param filters: named (attribute, value) filters to limit the query results
        :param kwargs: additional search keyword arguments
        :return: a list of models with an additional `__score` value added
        """
        # build base search object
        s = Search(using=self.indexer.es).index(self.indexer.index_name)
        if only_this_type:
            s = s.doc_type(self.indexer.doc_type_name)

        # build query
        s = s.query('match', _all=query)

        # add filter
        if filters is not None:
            for attr, value in filters.items():
                s = s.filter(F({'term': {attr: value}}))

        # execute query
        res = s.execute()

        # build up django query
        results = {}
        for hit in res:
            # get the model
            dj_type = hit._meta.doc_type
            model = get_model(dj_type)

            # get the pk
            pk_name = model._meta.pk.name
            pk = getattr(hit, pk_name)

            # get the score
            score = hit._meta.score

            # add to mapping
            results.setdefault(model, {})
            results[model][pk] = score

        # get queryset
        querysets = []
        for model, pk_score in results.items():
            qs = model.objects.filter(pk__in=pk_score.keys())
            querysets += list(qs)

        # attach scores to instances
        for instance in querysets:
            score = results[type(instance)][instance.pk]
            instance._meta.es_score = score

        # order by score
        querysets = sorted(querysets, key=lambda i: i._meta.es_score, reverse=True)

        # return
        return querysets

Exemple #4

0

Afficher le fichier

Fichier : converters.py Projet : Azelphur/Monord

    async def convert(self, ctx, argument):
        try:
            if not argument.isnumeric():
                try:
                    alias = ctx.cog.session.query(models.GymAlias).filter_by(
                        guild_id=ctx.message.channel.guild.id,
                        title=argument.lower()).one()
                    return get_es_gym_by_id(ctx, alias.gym.id)
                except NoResultFound:
                    pass
            if argument.isnumeric():
                return get_es_gym_by_id(ctx, argument)

            region = config.get(ctx.cog.session, "region", ctx.message.channel)
            if region is None:
                raise commands.CommandError(
                    _("This guild/channel does not have a region set"))

            region = to_shape(region)
            points = []
            for point in region.exterior.coords:
                points.append({"lat": point[1], "lon": point[0]})
            s = Search(index="gym").query("match",
                                          title={
                                              'query': argument,
                                              'fuzziness': 2
                                          })
            s = s.filter("geo_polygon", location={"points": points})
            s.doc_type(es_models.Gym)
            response = s.execute()
            if response.hits.total == 0:
                raise commands.CommandError(
                    _("Gym \"{}\" not found").format(argument))
            return response[0]
        except (commands.CommandError, commands.BadArgument):
            raise
        except Exception as e:
            logger.exception("Exception in Gym converter")
            await ctx.send(
                _("Error in Gym converter. Check your console or logs for details"
                  ))
            raise

Exemple #5

0

Afficher le fichier

Fichier : graphs.py Projet : CapitolZen/CapitolZen-Backend

    def _build_related_lists(self):
        # Build the list of related documents from ES based on the keywords /
        # phrases extracted from the current document we're analyzing
        if self.related_docs:
            return self.related_docs
        self.related_docs = {"ids": [], "content": []}
        for phrase in self._get_keyphrases():
            s = Search().using(self.es_client).query('match_phrase',
                                                     content=phrase)
            s.doc_type(BillDocument)
            response = s.execute()
            self.related_docs['ids'] += list(
                set([hit.remote_id
                     for hit in response]) - set(self.related_docs['ids']))
            self.related_docs['content'] += list(
                set([hit.content
                     for hit in response]) - set(self.related_docs['content']))
        # Don't include the document itself in the analysis
        self.related_docs['ids'].remove(self.instance.remote_id)
        self.related_docs['content'].remove(self.instance.content)

        return self.related_docs

Exemple #6

0

Afficher le fichier

 def field_values(field, es=None, index=None):
     """Returns unique values and counts for specified field.
     """
     if es and index:
         s = Search(using=es, index=index)
     else:
         s = Search()
     s = s.doc_type(Record)
     s.aggs.bucket('bucket', 'terms', field=field, size=1000)
     response = s.execute()
     return [
         (x['key'], x['doc_count'])
         for x in response.aggregations['bucket']['buckets']
     ]

Exemple #7

0

Afficher le fichier

 def setUp(self):
     """
     Creates objects to be utilized in testing below.
     Objects created: User, MDVR, GPSRecord, AlertSummary, Alert,
     LearningSession
     :return: None
     """
     self.manager = BillManager
     self.es_client = Elasticsearch(
         hosts=settings.ELASTICSEARCH_DSL['default']['hosts'],
         connection_class=RequestsHttpConnection)
     s = Search(using=self.es_client)
     s = s.doc_type(BillDocument)
     try:
         [bill.delete() for bill in s.execute()]
     except NotFoundError:
         pass
     Graph(**settings.GRAPH_DATABASE).data("MATCH (n) DETACH DELETE n")

Exemple #8

0

Afficher le fichier

    def get_langs_from_unlabeled_tweets(self, **kwargs):

        # TODO: we need to execute this in case the user doesn't have it enabled. I can't find the
        # PUT / twitterfdl2017 / _mapping / tweet
        # {
        #     "properties": {
        #         "lang": {
        #             "type": "text",
        #             "fielddata": true
        #         }
        #     }
        # }

        the_host = "http://" + kwargs["host"] + ":" + kwargs["port"]
        client = connections.create_connection(hosts=[the_host])
        s = Search(using=client, index=kwargs["index"], doc_type="tweet")

        body = {
            "size": 0,
            "aggs": {
                "distinct_lang": {
                    "terms": {
                        "field": "lang",
                        "size": 1000
                    }
                }
            }
        }

        s = Search.from_dict(body)
        s = s.index(kwargs["index"])
        s = s.doc_type("tweet")
        body = s.to_dict()

        t = s.execute()

        distinct_langs = []
        for item in t.aggregations.distinct_lang:
            # print(item.key, item.doc_count)
            distinct_langs.append(item.key)

        return distinct_langs

Exemple #9

0

Afficher le fichier

    def find_posts(self, query, strategy="fuzzy", size=50):
        """
        Find all posts that match against the query searched.
        Fuzzy matching is turned on by default, but we can use exact
        string matching by simply the changing strategy arg to "match"

        :param size:
        :param query (str): what are we searching for
        :param strategy (str): matching strategy
        :return: posts (list): a list of posts (dicts) that are similar to the query, by content
        """
        search = Search(using=self.client, index=self.index)
        search.update_from_dict({"size": size})
        results = search.doc_type(Post.DOC_TYPE).query(
            strategy, content=query).execute()
        posts = []
        for hit in results:
            posts.append(hit.to_dict())
        posts.sort(key=lambda x: -x["score"])
        return posts

Exemple #10

0

Afficher le fichier

Fichier : models.py Projet : densho/ddr-public

def search(
        hosts, index, query_type='multi_match', query='', filters={},
        sort='m_pseudoid', start=0, pagesize=10
):
    """Constructs Search object
    
    Note: allows any combination of filters, even illogical ones
    
    @param hosts: list settings.DOCSTORE_HOSTS
    @param index: elasticsearch_dsl.Index
    @param query_type: str Name of query type.
    @param query: str Query string.
    @param filters: dict Filters and their arguments.
    @param sort: str Name of field on which to sort.
    @param start: int Start of result set.
    @param pagesize: int Number of records to return.
    @returns: elasticsearch_dsl.Search
    """
    ## remove empty filter args
    #filter_args = {key:val for key,val in filters.items() if val}
    #if not (query or filter_args):
    #    return None,[]
    s = Search(using=ES, index=index)
    s = s.doc_type(Record)
    if filters:
        for field,values in filters.items():
            if values:
                # multiple terms for a field are OR-ed
                s = s.filter('terms', **{field: values})
    if query:
        s = s.query(
            query_type, query=query, fields=definitions.FIELDS_MASTER
        )
    # aggregations
    if filters:
        for field in filters.keys():
            s.aggs.bucket(field, 'terms', field=field, size=1000)
    s = s.fields(definitions.FIELDS_MASTER)
    s = s.sort(sort)
    s = s[start:start+pagesize]
    return s

Exemple #11

0

Afficher le fichier

    def doSearch(self, body):
        try:
            client = connections.create_connection(hosts=[settings.ES_URL])
            s = Search(using=client,
                       index=settings.ES_INDEX_NAME,
                       doc_type=settings.ES_INDEX_TYPE)
            s = Search.from_dict(body)
            s = s.index(settings.ES_INDEX_NAME)
            s = s.doc_type(settings.ES_INDEX_TYPE)

            # hightlight the following fields in the search result
            s = s.highlight('title')
            s = s.highlight('description')
            s = s.highlight('data_time')
            s = s.highlight('source')

            body = s.to_dict()
            response = s.execute()
        except Exception:
            return None

        return response

Exemple #12

0

Afficher le fichier

    ],
    'descritions':
    ['alpha', '0', 'beta', 'alpha_0', '2nd ed', '1st ed', '3st ed', 'draft']
}

for i in range(len(books['titles'])):
    book = Book(id=1000 + i,
                title=books['titles'][i],
                description=books['descritions'][i])
    book.save()

s1 = Book.search()
r1 = s1.execute()

s2 = Search()
s2.doc_type(Book)
r2 = s2.execute()

s3 = Search(index='book')
r3 = s3.execute()

s1 = s1.filter('terms', title=['python', 'c#'])
r_1 = s1.execute()

s2 = s2.filter('term', title='python')
r_2 = s2.execute()

s3 = s3.query('match', title='python')
r_3 = s3.execute()

s4 = Book.search().query('match_phrase_prefix', title='java')