def search(self, doc_type, query=""):
        """
        Execute search query and retrive results

        :param doc_type: Type in ElasticSearch
        :param query: search query
        :return: list with results
        """
        results = []
        if type(query) in [str, unicode] and type(doc_type) == DocTypeMeta:
            q = Q("multi_match",
                  query=query.lower(),
                  fields=["title"])

            s = Search()
            s = s.using(self.client)
            s = s.index(self.index_name)
            s = s.doc_type(doc_type)
            s = s.query(q)
            print "search query: " + str(s.to_dict())

            response = s.execute()

            for resp in response:
                results.append(resp)
        return results
Example #2
0
def search_aggregation(hosts, index, field):
    s = Search(using=ES, index=index)
    s = s.doc_type(Record)
    s.aggs.bucket('choices', 'terms', field=field)
    
    response = s.execute()
    records = [Record.from_hit(hit) for hit in response]
    return records
    def search(self, query: str, filters: dict=None, only_this_type: bool=True, **kwargs: dict) -> list:
        """performs a search against elasticsearch and then pulls the corresponding data from the db

        :param query: query terms to search by
        :param filters: named (attribute, value) filters to limit the query results
        :param kwargs: additional search keyword arguments
        :return: a list of models with an additional `__score` value added
        """
        # build base search object
        s = Search(using=self.indexer.es).index(self.indexer.index_name)
        if only_this_type:
            s = s.doc_type(self.indexer.doc_type_name)

        # build query
        s = s.query('match', _all=query)

        # add filter
        if filters is not None:
            for attr, value in filters.items():
                s = s.filter(F({'term': {attr: value}}))

        # execute query
        res = s.execute()

        # build up django query
        results = {}
        for hit in res:
            # get the model
            dj_type = hit._meta.doc_type
            model = get_model(dj_type)

            # get the pk
            pk_name = model._meta.pk.name
            pk = getattr(hit, pk_name)

            # get the score
            score = hit._meta.score

            # add to mapping
            results.setdefault(model, {})
            results[model][pk] = score

        # get queryset
        querysets = []
        for model, pk_score in results.items():
            qs = model.objects.filter(pk__in=pk_score.keys())
            querysets += list(qs)

        # attach scores to instances
        for instance in querysets:
            score = results[type(instance)][instance.pk]
            instance._meta.es_score = score

        # order by score
        querysets = sorted(querysets, key=lambda i: i._meta.es_score, reverse=True)

        # return
        return querysets
Example #4
0
    async def convert(self, ctx, argument):
        try:
            if not argument.isnumeric():
                try:
                    alias = ctx.cog.session.query(models.GymAlias).filter_by(
                        guild_id=ctx.message.channel.guild.id,
                        title=argument.lower()).one()
                    return get_es_gym_by_id(ctx, alias.gym.id)
                except NoResultFound:
                    pass
            if argument.isnumeric():
                return get_es_gym_by_id(ctx, argument)

            region = config.get(ctx.cog.session, "region", ctx.message.channel)
            if region is None:
                raise commands.CommandError(
                    _("This guild/channel does not have a region set"))

            region = to_shape(region)
            points = []
            for point in region.exterior.coords:
                points.append({"lat": point[1], "lon": point[0]})
            s = Search(index="gym").query("match",
                                          title={
                                              'query': argument,
                                              'fuzziness': 2
                                          })
            s = s.filter("geo_polygon", location={"points": points})
            s.doc_type(es_models.Gym)
            response = s.execute()
            if response.hits.total == 0:
                raise commands.CommandError(
                    _("Gym \"{}\" not found").format(argument))
            return response[0]
        except (commands.CommandError, commands.BadArgument):
            raise
        except Exception as e:
            logger.exception("Exception in Gym converter")
            await ctx.send(
                _("Error in Gym converter. Check your console or logs for details"
                  ))
            raise
Example #5
0
    def _build_related_lists(self):
        # Build the list of related documents from ES based on the keywords /
        # phrases extracted from the current document we're analyzing
        if self.related_docs:
            return self.related_docs
        self.related_docs = {"ids": [], "content": []}
        for phrase in self._get_keyphrases():
            s = Search().using(self.es_client).query('match_phrase',
                                                     content=phrase)
            s.doc_type(BillDocument)
            response = s.execute()
            self.related_docs['ids'] += list(
                set([hit.remote_id
                     for hit in response]) - set(self.related_docs['ids']))
            self.related_docs['content'] += list(
                set([hit.content
                     for hit in response]) - set(self.related_docs['content']))
        # Don't include the document itself in the analysis
        self.related_docs['ids'].remove(self.instance.remote_id)
        self.related_docs['content'].remove(self.instance.content)

        return self.related_docs
Example #6
0
 def field_values(field, es=None, index=None):
     """Returns unique values and counts for specified field.
     """
     if es and index:
         s = Search(using=es, index=index)
     else:
         s = Search()
     s = s.doc_type(Record)
     s.aggs.bucket('bucket', 'terms', field=field, size=1000)
     response = s.execute()
     return [
         (x['key'], x['doc_count'])
         for x in response.aggregations['bucket']['buckets']
     ]
Example #7
0
 def setUp(self):
     """
     Creates objects to be utilized in testing below.
     Objects created: User, MDVR, GPSRecord, AlertSummary, Alert,
     LearningSession
     :return: None
     """
     self.manager = BillManager
     self.es_client = Elasticsearch(
         hosts=settings.ELASTICSEARCH_DSL['default']['hosts'],
         connection_class=RequestsHttpConnection)
     s = Search(using=self.es_client)
     s = s.doc_type(BillDocument)
     try:
         [bill.delete() for bill in s.execute()]
     except NotFoundError:
         pass
     Graph(**settings.GRAPH_DATABASE).data("MATCH (n) DETACH DELETE n")
Example #8
0
    def get_langs_from_unlabeled_tweets(self, **kwargs):

        # TODO: we need to execute this in case the user doesn't have it enabled. I can't find the
        # PUT / twitterfdl2017 / _mapping / tweet
        # {
        #     "properties": {
        #         "lang": {
        #             "type": "text",
        #             "fielddata": true
        #         }
        #     }
        # }

        the_host = "http://" + kwargs["host"] + ":" + kwargs["port"]
        client = connections.create_connection(hosts=[the_host])
        s = Search(using=client, index=kwargs["index"], doc_type="tweet")

        body = {
            "size": 0,
            "aggs": {
                "distinct_lang": {
                    "terms": {
                        "field": "lang",
                        "size": 1000
                    }
                }
            }
        }

        s = Search.from_dict(body)
        s = s.index(kwargs["index"])
        s = s.doc_type("tweet")
        body = s.to_dict()

        t = s.execute()

        distinct_langs = []
        for item in t.aggregations.distinct_lang:
            # print(item.key, item.doc_count)
            distinct_langs.append(item.key)

        return distinct_langs
Example #9
0
    def find_posts(self, query, strategy="fuzzy", size=50):
        """
        Find all posts that match against the query searched.
        Fuzzy matching is turned on by default, but we can use exact
        string matching by simply the changing strategy arg to "match"

        :param size:
        :param query (str): what are we searching for
        :param strategy (str): matching strategy
        :return: posts (list): a list of posts (dicts) that are similar to the query, by content
        """
        search = Search(using=self.client, index=self.index)
        search.update_from_dict({"size": size})
        results = search.doc_type(Post.DOC_TYPE).query(
            strategy, content=query).execute()
        posts = []
        for hit in results:
            posts.append(hit.to_dict())
        posts.sort(key=lambda x: -x["score"])
        return posts
Example #10
0
def search(
        hosts, index, query_type='multi_match', query='', filters={},
        sort='m_pseudoid', start=0, pagesize=10
):
    """Constructs Search object
    
    Note: allows any combination of filters, even illogical ones
    
    @param hosts: list settings.DOCSTORE_HOSTS
    @param index: elasticsearch_dsl.Index
    @param query_type: str Name of query type.
    @param query: str Query string.
    @param filters: dict Filters and their arguments.
    @param sort: str Name of field on which to sort.
    @param start: int Start of result set.
    @param pagesize: int Number of records to return.
    @returns: elasticsearch_dsl.Search
    """
    ## remove empty filter args
    #filter_args = {key:val for key,val in filters.items() if val}
    #if not (query or filter_args):
    #    return None,[]
    s = Search(using=ES, index=index)
    s = s.doc_type(Record)
    if filters:
        for field,values in filters.items():
            if values:
                # multiple terms for a field are OR-ed
                s = s.filter('terms', **{field: values})
    if query:
        s = s.query(
            query_type, query=query, fields=definitions.FIELDS_MASTER
        )
    # aggregations
    if filters:
        for field in filters.keys():
            s.aggs.bucket(field, 'terms', field=field, size=1000)
    s = s.fields(definitions.FIELDS_MASTER)
    s = s.sort(sort)
    s = s[start:start+pagesize]
    return s
Example #11
0
    def doSearch(self, body):
        try:
            client = connections.create_connection(hosts=[settings.ES_URL])
            s = Search(using=client,
                       index=settings.ES_INDEX_NAME,
                       doc_type=settings.ES_INDEX_TYPE)
            s = Search.from_dict(body)
            s = s.index(settings.ES_INDEX_NAME)
            s = s.doc_type(settings.ES_INDEX_TYPE)

            # hightlight the following fields in the search result
            s = s.highlight('title')
            s = s.highlight('description')
            s = s.highlight('data_time')
            s = s.highlight('source')

            body = s.to_dict()
            response = s.execute()
        except Exception:
            return None

        return response
Example #12
0
    ],
    'descritions':
    ['alpha', '0', 'beta', 'alpha_0', '2nd ed', '1st ed', '3st ed', 'draft']
}

for i in range(len(books['titles'])):
    book = Book(id=1000 + i,
                title=books['titles'][i],
                description=books['descritions'][i])
    book.save()

s1 = Book.search()
r1 = s1.execute()

s2 = Search()
s2.doc_type(Book)
r2 = s2.execute()

s3 = Search(index='book')
r3 = s3.execute()

s1 = s1.filter('terms', title=['python', 'c#'])
r_1 = s1.execute()

s2 = s2.filter('term', title='python')
r_2 = s2.execute()

s3 = s3.query('match', title='python')
r_3 = s3.execute()

s4 = Book.search().query('match_phrase_prefix', title='java')