def search(self, doc_type, query=""): """ Execute search query and retrive results :param doc_type: Type in ElasticSearch :param query: search query :return: list with results """ results = [] if type(query) in [str, unicode] and type(doc_type) == DocTypeMeta: q = Q("multi_match", query=query.lower(), fields=["title"]) s = Search() s = s.using(self.client) s = s.index(self.index_name) s = s.doc_type(doc_type) s = s.query(q) print "search query: " + str(s.to_dict()) response = s.execute() for resp in response: results.append(resp) return results
def search_aggregation(hosts, index, field): s = Search(using=ES, index=index) s = s.doc_type(Record) s.aggs.bucket('choices', 'terms', field=field) response = s.execute() records = [Record.from_hit(hit) for hit in response] return records
def search(self, query: str, filters: dict=None, only_this_type: bool=True, **kwargs: dict) -> list: """performs a search against elasticsearch and then pulls the corresponding data from the db :param query: query terms to search by :param filters: named (attribute, value) filters to limit the query results :param kwargs: additional search keyword arguments :return: a list of models with an additional `__score` value added """ # build base search object s = Search(using=self.indexer.es).index(self.indexer.index_name) if only_this_type: s = s.doc_type(self.indexer.doc_type_name) # build query s = s.query('match', _all=query) # add filter if filters is not None: for attr, value in filters.items(): s = s.filter(F({'term': {attr: value}})) # execute query res = s.execute() # build up django query results = {} for hit in res: # get the model dj_type = hit._meta.doc_type model = get_model(dj_type) # get the pk pk_name = model._meta.pk.name pk = getattr(hit, pk_name) # get the score score = hit._meta.score # add to mapping results.setdefault(model, {}) results[model][pk] = score # get queryset querysets = [] for model, pk_score in results.items(): qs = model.objects.filter(pk__in=pk_score.keys()) querysets += list(qs) # attach scores to instances for instance in querysets: score = results[type(instance)][instance.pk] instance._meta.es_score = score # order by score querysets = sorted(querysets, key=lambda i: i._meta.es_score, reverse=True) # return return querysets
async def convert(self, ctx, argument): try: if not argument.isnumeric(): try: alias = ctx.cog.session.query(models.GymAlias).filter_by( guild_id=ctx.message.channel.guild.id, title=argument.lower()).one() return get_es_gym_by_id(ctx, alias.gym.id) except NoResultFound: pass if argument.isnumeric(): return get_es_gym_by_id(ctx, argument) region = config.get(ctx.cog.session, "region", ctx.message.channel) if region is None: raise commands.CommandError( _("This guild/channel does not have a region set")) region = to_shape(region) points = [] for point in region.exterior.coords: points.append({"lat": point[1], "lon": point[0]}) s = Search(index="gym").query("match", title={ 'query': argument, 'fuzziness': 2 }) s = s.filter("geo_polygon", location={"points": points}) s.doc_type(es_models.Gym) response = s.execute() if response.hits.total == 0: raise commands.CommandError( _("Gym \"{}\" not found").format(argument)) return response[0] except (commands.CommandError, commands.BadArgument): raise except Exception as e: logger.exception("Exception in Gym converter") await ctx.send( _("Error in Gym converter. Check your console or logs for details" )) raise
def _build_related_lists(self): # Build the list of related documents from ES based on the keywords / # phrases extracted from the current document we're analyzing if self.related_docs: return self.related_docs self.related_docs = {"ids": [], "content": []} for phrase in self._get_keyphrases(): s = Search().using(self.es_client).query('match_phrase', content=phrase) s.doc_type(BillDocument) response = s.execute() self.related_docs['ids'] += list( set([hit.remote_id for hit in response]) - set(self.related_docs['ids'])) self.related_docs['content'] += list( set([hit.content for hit in response]) - set(self.related_docs['content'])) # Don't include the document itself in the analysis self.related_docs['ids'].remove(self.instance.remote_id) self.related_docs['content'].remove(self.instance.content) return self.related_docs
def field_values(field, es=None, index=None): """Returns unique values and counts for specified field. """ if es and index: s = Search(using=es, index=index) else: s = Search() s = s.doc_type(Record) s.aggs.bucket('bucket', 'terms', field=field, size=1000) response = s.execute() return [ (x['key'], x['doc_count']) for x in response.aggregations['bucket']['buckets'] ]
def setUp(self): """ Creates objects to be utilized in testing below. Objects created: User, MDVR, GPSRecord, AlertSummary, Alert, LearningSession :return: None """ self.manager = BillManager self.es_client = Elasticsearch( hosts=settings.ELASTICSEARCH_DSL['default']['hosts'], connection_class=RequestsHttpConnection) s = Search(using=self.es_client) s = s.doc_type(BillDocument) try: [bill.delete() for bill in s.execute()] except NotFoundError: pass Graph(**settings.GRAPH_DATABASE).data("MATCH (n) DETACH DELETE n")
def get_langs_from_unlabeled_tweets(self, **kwargs): # TODO: we need to execute this in case the user doesn't have it enabled. I can't find the # PUT / twitterfdl2017 / _mapping / tweet # { # "properties": { # "lang": { # "type": "text", # "fielddata": true # } # } # } the_host = "http://" + kwargs["host"] + ":" + kwargs["port"] client = connections.create_connection(hosts=[the_host]) s = Search(using=client, index=kwargs["index"], doc_type="tweet") body = { "size": 0, "aggs": { "distinct_lang": { "terms": { "field": "lang", "size": 1000 } } } } s = Search.from_dict(body) s = s.index(kwargs["index"]) s = s.doc_type("tweet") body = s.to_dict() t = s.execute() distinct_langs = [] for item in t.aggregations.distinct_lang: # print(item.key, item.doc_count) distinct_langs.append(item.key) return distinct_langs
def find_posts(self, query, strategy="fuzzy", size=50): """ Find all posts that match against the query searched. Fuzzy matching is turned on by default, but we can use exact string matching by simply the changing strategy arg to "match" :param size: :param query (str): what are we searching for :param strategy (str): matching strategy :return: posts (list): a list of posts (dicts) that are similar to the query, by content """ search = Search(using=self.client, index=self.index) search.update_from_dict({"size": size}) results = search.doc_type(Post.DOC_TYPE).query( strategy, content=query).execute() posts = [] for hit in results: posts.append(hit.to_dict()) posts.sort(key=lambda x: -x["score"]) return posts
def search( hosts, index, query_type='multi_match', query='', filters={}, sort='m_pseudoid', start=0, pagesize=10 ): """Constructs Search object Note: allows any combination of filters, even illogical ones @param hosts: list settings.DOCSTORE_HOSTS @param index: elasticsearch_dsl.Index @param query_type: str Name of query type. @param query: str Query string. @param filters: dict Filters and their arguments. @param sort: str Name of field on which to sort. @param start: int Start of result set. @param pagesize: int Number of records to return. @returns: elasticsearch_dsl.Search """ ## remove empty filter args #filter_args = {key:val for key,val in filters.items() if val} #if not (query or filter_args): # return None,[] s = Search(using=ES, index=index) s = s.doc_type(Record) if filters: for field,values in filters.items(): if values: # multiple terms for a field are OR-ed s = s.filter('terms', **{field: values}) if query: s = s.query( query_type, query=query, fields=definitions.FIELDS_MASTER ) # aggregations if filters: for field in filters.keys(): s.aggs.bucket(field, 'terms', field=field, size=1000) s = s.fields(definitions.FIELDS_MASTER) s = s.sort(sort) s = s[start:start+pagesize] return s
def doSearch(self, body): try: client = connections.create_connection(hosts=[settings.ES_URL]) s = Search(using=client, index=settings.ES_INDEX_NAME, doc_type=settings.ES_INDEX_TYPE) s = Search.from_dict(body) s = s.index(settings.ES_INDEX_NAME) s = s.doc_type(settings.ES_INDEX_TYPE) # hightlight the following fields in the search result s = s.highlight('title') s = s.highlight('description') s = s.highlight('data_time') s = s.highlight('source') body = s.to_dict() response = s.execute() except Exception: return None return response
], 'descritions': ['alpha', '0', 'beta', 'alpha_0', '2nd ed', '1st ed', '3st ed', 'draft'] } for i in range(len(books['titles'])): book = Book(id=1000 + i, title=books['titles'][i], description=books['descritions'][i]) book.save() s1 = Book.search() r1 = s1.execute() s2 = Search() s2.doc_type(Book) r2 = s2.execute() s3 = Search(index='book') r3 = s3.execute() s1 = s1.filter('terms', title=['python', 'c#']) r_1 = s1.execute() s2 = s2.filter('term', title='python') r_2 = s2.execute() s3 = s3.query('match', title='python') r_3 = s3.execute() s4 = Book.search().query('match_phrase_prefix', title='java')