def build(self): fs = self._clone() for facet in self.facets: if "include_%s" % facet.name not in self.args: continue agg_filter = esd.Q("match_all") for inner in self.facets: if inner.name != facet.name: if inner.is_filtered(self.args): agg_filter &= inner.filters(self.args) for agg_name, agg in facet.aggregates(): fs.aggs.bucket("_filter_" + agg_name, "filter", filter=agg_filter).bucket(agg_name, agg) post_filter = esd.Q('match_all') for facet in self.facets: if facet.is_filtered(self.args): post_filter &= facet.filters(self.args) fs.post_filter._proxied &= post_filter return fs
def count_by_country(vk_elastic_db: es_client.VkDataDatabaseClient, size=10, is_need_active=False, days_delta=20): country_aggs_name = "country_count" es = get_elastic_object(vk_elastic_db) s = elasticsearch_dsl.Search(using=es, index=index) if is_need_active: s = get_active_users_filter(es, index, s, days_delta=days_delta) s = s.filter( "bool", must=[elasticsearch_dsl.Q("exists", field="country.title.keyword")]) s = s.filter( "bool", must_not=[elasticsearch_dsl.Q("match", country__title__keywordd="")]) a = elasticsearch_dsl.A('terms', field="country.title.keyword", size=size) s.aggs.bucket(country_aggs_name, a) response = s.execute() data = [] for country_hit in response.aggregations[country_aggs_name].buckets: country_dict = { 'country': country_hit.key, 'count': country_hit.doc_count } data.append(country_dict) add_geoposition(data) return data
def filters(self, args): if self.name in args and args[self.name] == "": return esd.Q('missing', field=self.name) else: return esd.Q('terms', **{self.name: [ args.get(self.name), ]})
def mongodb_condition_to_es(key, value): if not isinstance(value, dict): if isinstance(value, bson.ObjectId): value = bson_oid_to_int(value) if isinstance(value, datetime.datetime): value = datetime_to_unix(value) args = { key: { "gte": value, "lte": value, 'format': 'epoch_millis' } } else: args = {key: {"gte": value, "lte": value}} return elasticsearch_dsl.Q("range", **args) if len(value) != 1: raise NotImplementedError op, value = list(value.items())[0] if op in ['$gte', '$lte', '$gt', '$lt']: if isinstance(value, bson.ObjectId): value = bson_oid_to_int(value) if isinstance(value, datetime.datetime): value = datetime_to_unix(value) args = {key: {op[1:]: value, 'format': 'epoch_millis'}} else: args = {key: {op[1:]: value}} return elasticsearch_dsl.Q("range", **args) raise NotImplementedError
def add_altitude_info(provincia, municipio=None): chunk_size = conf.config.getint('Google Elevation', 'chunk_size') print "Chunk Size = " + str(chunk_size) try: sigpac_record.init() time.sleep(5) except Exception as e: conf.error_handler.error(__name__, "build_record", str(e)) conf.error_handler.flush() raise filter = [dsl.Q("term", provincia=provincia)] if municipio is not None: filter.append(dsl.Q("term", municipio=municipio)) # query elasticsearch for the neccesary registers search = dsl.Search(index='plots').query('bool', filter=filter).fields( ['bbox_center.lat', 'bbox_center.lon']) search.execute() records = [] centers = [] for r in search.scan(): record = sigpac_record(meta={'id': r.meta.id}) records.append(record) centers.append((r['bbox_center.lat'][0], r['bbox_center.lon'][0])) if len(records) >= chunk_size: print "Inserting next " + str(chunk_size) + " elevations" try: records = obtain_elevation_from_google(records, centers) print " ... Obtained info from google" util.elastic_bulk_update(records) print " ...success" except ConnectionError as e: print " ...error" conf.error_handler.error(__name__, 'obtain_elevation_from_google', e.message) records = [] centers = [] if len(records) > 0: try: records = obtain_elevation_from_google(records, centers) util.elastic_bulk_update(records) except ConnectionError as e: conf.error_handler.error(__name__, 'obtain_elevation_from_google', e.message)
def filters(self, args): if self.name in args and len(self.name) > 0: return esd.Q('prefix', **{ self.name: args.get(self.name), }) else: return super().filters(args)
def list(self, filter, sort, page, page_size, user): # Rip the search object out of the elasticsearch backend try: search = self.parents[-1]._state._backend.raw_backend().search except AttributeError: print('Tried to search on an unsearchable collection') raise if self._request.query_arguments.get('q'): search = search.query( elasticsearch_dsl.Q('query_string', query=self._request.query_arguments['q'] [-1].decode('utf-8'))) else: # This should technically be elsewhere but the search object # does not provide a nice way to figure out if there is a query or not. search = search.sort( {'ref': { 'order': 'asc', 'unmapped_type': 'string' }}) if self._request.query_arguments.get('sort'): search = search.sort({ sort.key: { 'order': 'asc' if sort.order == 1 else 'desc', 'unmapped_type': 'string' } }) start = page * page_size search = search[start:start + page_size] return SearchResultWrapper(search)
def filters(self): range = {} if self.value[0] is not None: range["from"] = self.value[0] if self.value[1] is not None: range["to"] = self.value[1] return esd.Q('range', **{self.name: range})
def get_cost_by_product(cls, key, date_from=None, date_to=None, without_discount=False, only_discount=False, size=0x7FFFFFFF): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter('term', linked_account_id=key) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) if without_discount: s = s.query( 'bool', filter=[ ~dsl.Q('term', item_description='PAR_APN_ProgramFee_2500') ]) if only_discount: s = s.filter('term', item_description='PAR_APN_ProgramFee_2500') agg = s.aggs.bucket('products', 'terms', field='product_name', order={'cost': 'desc'}, size=size) agg.bucket('cost', 'sum', field='cost') s = s.query('bool', filter=[~dsl.Q('term', cost=0)]) res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) products = [{ 'product': SHORT_NAMES.get(product['key'], product['key']), 'cost': product['cost']['value'], } for product in res['aggregations']['products']['buckets']] return dict(products=products)
def construct_regexp_query(field_in: str, regex: str): q = None for field in extract_field_plus_rawfield(field_in): q_tmp = es_dsl.Q("regexp", **{field: regex}) if not q: q = q_tmp else: q = q | q_tmp return q
def occam_search(self, search_input, index_name='occam_index'): # not done just matches orgional test functionality (probably) # res = self.es.search(index=index_name, body={"from":0,"size":10,"query":{"match":{"description":search_input}}}) res = elasticsearch_dsl.Search(using=self.es, index=index_name) # print('<for debug/dev> number of objects in index ' + str(res.count())) q = elasticsearch_dsl.Q('multi_match', query=search_input, fields=['name', 'summary', 'environment']) ans = res.query(q) answer = ans.execute() # returns an array of dicts that conatin the data occam requres from search return help_return(answer)
def build_es_query(self, ): qd = self._qd rangeq = elasticsearch_dsl.Q( 'range', **{ '{}'.format(self._ts_field): { 'from': self._start_time.format(self._ISO_TS), 'to': self._end_time.format(self._ISO_TS) } }) luceneq = elasticsearch_dsl.Q('query_string', query=qd['query_opts']['args']) if isinstance(self.exclude, list): self.exclude = ' OR '.join(self.exclude) excludeq = elasticsearch_dsl.Q('query_string', query=self.exclude) s = elasticsearch_dsl.Search() s = s.source(include=qd['fields']) q = elasticsearch_dsl.Q('bool', must=[luceneq, rangeq], must_not=excludeq) s = s.query(q) # aggs if 'aggs' in qd and len(qd.get('aggs')) >= 1: aggs = qd['aggs'] try: aggobj = self.build_aggs(s.aggs, aggs[0]) except Exception as e: raise TQLException( "Unable to agg base: reason: {}, agg: {}".format(e, aggs)) if len(aggs) > 1: aggobj = self.build_aggs(aggobj, aggs[1:]) s = s[self.agg_size_from:self.agg_size] else: s = s[self.hit_size_from:self.hit_size] self._esq = s self._esqd = self._esq.to_dict()
def search_by_keywords(terms): # create the query terms_list = terms.split() should = [] for term in terms_list: query = elasticsearch_dsl.Q("match", keywords=term) should.append(query) # perform the query q = elasticsearch_dsl.Q("bool", should=should, minimum_should_match=1) s = elasticsearch_dsl.Search(using=es, index="tweet").query(q) # return the first 250 hits results = s[:250] tweets = [dict(hit._d_) for hit in results] for tweet in tweets: del tweet["raw"] tweet["id"] = "https://twitter.com/statuses" + tweet["id"] return tweets
def construct_unary_op(arg: ast.Node, query_creator): q = None if is_a(arg, op.ARG_LOGICAL): queries = [] for n in arg.children: if is_a(n, op.ARGS): queries.append(query_creator(n)) elif is_a(n, op.ARG_LOGICAL): l_queries = [] for child in n.children: l_queries.append(query_creator(child)) if is_a(n, op.ARG_AND): queries.append(es_dsl.Q("bool", must=l_queries)) elif is_a(n, op.ARG_OR): queries.append(es_dsl.Q("bool", should=l_queries)) else: queries.append(es_dsl.Q("bool", must_not=l_queries)) if is_a(arg, op.ARG_AND): q = es_dsl.Q("bool", must=queries) elif is_a(arg, op.ARG_OR): q = es_dsl.Q("bool", should=queries) else: q = es_dsl.Q("bool", must_not=queries) else: q = query_creator(arg) return q
def search_ids(self, args, resource_id: str, entry_ids: str): logger.info( "Called EsSearch.search_ids(self, args, resource_id, entry_ids) with:" ) logger.info(" resource_id = {}".format(resource_id)) logger.info(" entry_ids = {}".format(entry_ids)) entries = entry_ids.split(",") query = es_dsl.Q("terms", _id=entries) logger.debug("query = {}".format(query)) s = es_dsl.Search(using=self.es, index=resource_id).query(query) logger.debug("s = {}".format(s.to_dict())) response = s.execute() return self._format_result([resource_id], response)
def aggregate(self, search): """ Add aggregations representing the facets selected, including potential filters. """ for f, facet in iteritems(self.facets): agg = facet.get_aggregation() agg_filter = esd.Q('match_all') for field, filter in iteritems(self._filters): if f == field or (f.startswith("date") and field.startswith("date")): continue agg_filter &= filter search.aggs.bucket( '_filter_' + f, 'filter', filter=agg_filter ).bucket(f, agg)
def get_yearly_cost_by_product(cls, keys, date_from=None, date_to=None, size=0x7FFFFFFF): date_from = date_from or datetime.utcnow().replace( month=1, day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(month=12, day=31, hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) agg = s.aggs.bucket('intervals', 'date_histogram', field='usage_start_date', interval='year', min_doc_count=1) agg = agg.bucket('products', 'terms', field='product_name', size=size) agg.metric('cost', 'sum', field='cost') s = s.query('bool', filter=[~dsl.Q('term', cost=0)]) res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) res = [{ 'year': interval['key_as_string'][:4], 'products': [{ 'product': SHORT_NAMES.get(product['key'], product['key']), 'cost': product['cost']['value'], } for product in interval['products']['buckets']] } for interval in res['aggregations']['intervals']['buckets']] return dict(years=res)
def _do_query(self, value, term): search = dsl.Search(using=self.esclient, index=self.user_id, doc_type='indexed_message') term_query = dsl.Q('term', **{term: value}) search = search.query('nested', path='participants', score_mode='avg', query=term_query) search.aggs.bucket('messages_with_value', 'date_histogram', field='date', interval=self.resolution) r = search.execute() results = r.aggregations.messages_with_value['buckets'] return self._format_results(results)
def get(self, handler): # Rip the search object out of the elasticsearch backend sort = handler.sort search = self.collection._state._backend.raw_backend().search if handler.request.query_arguments.get('q'): search = search.query( elasticsearch_dsl.Q('query_string', query=handler.request.query_arguments['q'] [-1].decode('utf-8'))) else: # This should technically be elsewhere but the search object # does not provide a nice way to figure out if there is a query or not. search = search.sort( {'ref': { 'order': 'asc', 'unmapped_type': 'string' }}) if handler.request.query_arguments.get('sort'): search = search.sort({ sort.key: { 'order': 'asc' if sort.order == 1 else 'desc', 'unmapped_type': 'string' } }) # Hacking into the serializer handler._serializer = self.get_serializer() handler._view.parents = handler._view.parents + (self.collection, ) start = handler.page * handler.page_size wrapper = SearchResultWrapper(search[start:start + handler.page_size]) return handler.write({ 'meta': { 'total': wrapper.count(), 'perPage': handler.page_size }, # TODO 'links': {}, 'data': [handler.serialize(resource) for resource in wrapper] })
def _filter(self, criteria: Q, offset: int = 0, limit: int = 10, order_by: list = ()) -> ResultSet: """ Filter objects from the data store. Method must return a `ResultSet` object """ conn = self.provider.get_connection() # Build the filters from the criteria q = elasticsearch_dsl.Q() if criteria.children: q = self._build_filters(criteria) s = (Search( using=conn, index=self.model_cls._index._name).query(q).params(version=True)) if order_by: s = s.sort(*order_by) s = s[offset:offset + limit] # Return the results try: response = s.execute() result = ResultSet( offset=offset, limit=limit, total=response.hits.total.value, items=response.hits, ) except Exception as exc: logger.error(f"Error while filtering: {exc}") raise return result
def _delete_all(self, criteria: Q = None): """Delete all records matching criteria from the Repository""" conn = self._get_session() # Build the filters from the criteria q = elasticsearch_dsl.Q() if criteria and criteria.children: q = self._build_filters(criteria) s = Search(using=conn, index=self.model_cls._index._name).query(q) # Return the results try: response = s.delete() # `Search.delete` does not refresh index, so we have to manually refresh index = Index(name=self.entity_cls.meta_.schema_name, using=conn) index.refresh() except Exception as exc: logger.error(f"Error while deleting records: {exc}") raise return response.deleted
def get(self, request: http.HttpRequest ) -> typing.Union[tuple, http.HttpResponse]: # site_models is really slow. so I use settings object instead # site = site_models.Site.objects.get_current() search = 0 p_c = None is_a_search = False form = artisan_forms.PostListSearch(request.GET) if form.is_valid( ): ## could make a search object factory class to hide implementation of search, is_a_search = True ## to allow search method (elasticsearch, postgres full text etc) to be changed terms = form.cleaned_data['q'].split(' ') if len(terms) > 1: t = 'terms' else: t = 'match' terms = terms[0] queryset = artisan_documents.Post.search().query( elasticsearch_dsl.Q(t, text=terms) | elasticsearch_dsl.Q(t, author=terms) | elasticsearch_dsl.Q(t, title=terms) | elasticsearch_dsl.Q(t, category=terms) | elasticsearch_dsl.Q(t, location=terms)).to_queryset() queryset_comments = forum_documents.Comment.search().query( elasticsearch_dsl.Q(t, text=terms) | elasticsearch_dsl.Q(t, author=terms)).to_queryset() for sr in queryset_comments: queryset = queryset | artisan_models.Post.objects.filter( id=sr.post_fk.id) time_range = eval( 'form.' + form['published'].value()) #### TODO !!! eval is evil. search = len(queryset) if search and time_range: queryset = ( queryset.filter( created_at__lt=time_range[0], created_at__gt=time_range[1]).order_by('-pinned'). select_related('author').select_related('author__profile'). select_related('author__profile__avatar')) search = len(queryset) if not search: queryset = (artisan_models.Post.objects.select_related( 'author').select_related('author__profile').select_related( 'author__profile__avatar').order_by('-pinned')) else: form.errors.clear() queryset = (artisan_models.Post.objects.select_related( 'author').select_related('author__profile').select_related( 'author__profile__avatar').order_by('-pinned')) paginator = pagination.Paginator(queryset, self.paginate_by) page_number = request.GET.get('page') page_obj = paginator.get_page(page_number) context = { 'form': form, 'page_obj': page_obj, 'search': search, 'is_a_search': is_a_search, 'site_url': ('https' if self.request.is_secure() else 'http') + '://' + conf.settings.SITE_DOMAIN } return shortcuts.render(request, self.template_name, context)
def construct_exists_query(node: ast.Node): return es_dsl.Q("exists", field=get_value(node))
def construct_freetext_query(node): if is_a(node, op.STRING): return es_dsl.Q("multi_match", query=get_value(node), fuzziness=1) else: return es_dsl.Q("multi_match", query=get_value(node))
def construct_range_query(field: str, range_args: Dict): return es_dsl.Q("range", **{field: range_args})
def create_es_query(node: ast.Node): node.pprint(0) if node is None: raise TypeError() def extract_values(n: ast.Node) -> List[ast.AnyValue]: values = [] if is_a(n, op.ARG_LOGICAL): for child in n.children: values.append(get_value(child)) return values def extract_values_and_logicals( n: ast.Node, ) -> Tuple[List[ast.AnyValue], List[ast.Node]]: values = [] logicals = [] if is_a(n, op.ARG_LOGICAL): for child in n.children: if is_a(child, op.ARG_LOGICAL): logicals.append(child) else: values.append(get_value(child)) return values, logicals def construct_binary_op(field: ast.Node, arg: ast.Node, query_creator): q = None if is_a(field, op.ARG_LOGICAL): if is_a(arg, op.ARG_LOGICAL): pass else: queries = [] for n in field.children: if is_a(n, op.ARGS): queries.append(query_creator(n, arg)) elif is_a(n, op.ARG_LOGICAL): l_queries = [] for child in n.children: l_queries.append(query_creator(child, arg)) if is_a(n, op.ARG_AND): queries.append(es_dsl.Q("bool", must=l_queries)) elif is_a(n, op.ARG_OR): queries.append(es_dsl.Q("bool", should=l_queries)) else: queries.append(es_dsl.Q("bool", must_not=l_queries)) if is_a(arg, op.ARG_AND): q = es_dsl.Q("bool", must=queries) elif is_a(arg, op.ARG_OR): q = es_dsl.Q("bool", should=queries) else: q = es_dsl.Q("bool", must_not=queries) if not arg_values: def prepare_equals_arg(node, v): return v arg = prepare_equals_arg(node, get_value(arg2)) queries = [ construct_equals_query(field, arg) for field in field_values ] for logical in field_logicals: l_queries = [] for field in logical.children: l_queries.append(construct_equals_query(field.value, arg)) if is_a(logical, op.ARG_OR): queries.append(es_dsl.Q("bool", should=l_queries)) elif is_a(logical, op.ARG_AND): queries.append(es_dsl.Q("bool", must=l_queries)) else: queries.append(es_dsl.Q("bool", must_not=l_queries)) print( "|create_es_query::EQUALS| queries = {queries}".format( queries=queries ) ) if is_a(arg1, op.ARG_AND): q = es_dsl.Q("bool", must=queries) elif is_a(arg1, op.ARG_OR): q = es_dsl.Q("bool", should=queries) else: q = es_dsl.Q( "bool", must_not=queries, must=es_dsl.Q("multi_match", query=get_value(arg2)), ) else: if is_a(arg, op.ARG_LOGICAL): queries = [] for n in arg.children: if is_a(n, op.ARGS): queries.append(query_creator(field, n)) elif is_a(n, op.ARG_LOGICAL): l_queries = [] for child in n.children: l_queries.append(query_creator(field, child)) if is_a(n, op.ARG_AND): queries.append(es_dsl.Q("bool", must=l_queries)) elif is_a(n, op.ARG_OR): queries.append(es_dsl.Q("bool", should=l_queries)) else: queries.append(es_dsl.Q("bool", must_not=l_queries)) if is_a(arg, op.ARG_AND): q = es_dsl.Q("bool", must=queries) elif is_a(arg, op.ARG_OR): q = es_dsl.Q("bool", should=queries) else: q = es_dsl.Q("bool", must_not=queries) else: q = query_creator(field, arg) return q def construct_equals_query(field: ast.Node, query: ast.Node): kwargs = {get_value(field): {"query": get_value(query), "operator": "and"}} return es_dsl.Q("match", **kwargs) q = None if is_a(node, op.LOGICAL): # TODO check minimum should match rules in different contexts queries = [create_es_query(n) for n in node.children] if len(queries) == 2: q1 = queries[0] q2 = queries[1] print("q1 = {}".format(q1.to_dict())) print("q2 = {}".format(repr(q2))) q1_dict = q1.to_dict() q2_dict = q2.to_dict() if "range" in q1_dict and "range" in q2_dict: for q1_field, q1_value in q1_dict["range"].items(): for q2_field, q2_value in q2_dict["range"].items(): if q1_field == q2_field: print("q1_field == q2_field") range_args = q1_value range_args.update(q2_value) print("field = {}".format(q1_field)) print("range_args = {}".format(range_args)) q = es_dsl.Q("range", **{q1_field: range_args}) return q if is_a(node, op.AND): q = es_dsl.Q("bool", must=queries) elif is_a(node, op.OR): q = es_dsl.Q("bool", should=queries) else: q = es_dsl.Q("bool", must_not=queries) elif is_a(node, op.UNARY_OPS): arg = node.children[0] arg_values, arg_logicals = extract_values_and_logicals(arg) print("arg_values = {}".format(arg_values)) print("arg_logicals = {}".format(arg_logicals)) def construct_unary_op(arg: ast.Node, query_creator): q = None if is_a(arg, op.ARG_LOGICAL): queries = [] for n in arg.children: if is_a(n, op.ARGS): queries.append(query_creator(n)) elif is_a(n, op.ARG_LOGICAL): l_queries = [] for child in n.children: l_queries.append(query_creator(child)) if is_a(n, op.ARG_AND): queries.append(es_dsl.Q("bool", must=l_queries)) elif is_a(n, op.ARG_OR): queries.append(es_dsl.Q("bool", should=l_queries)) else: queries.append(es_dsl.Q("bool", must_not=l_queries)) if is_a(arg, op.ARG_AND): q = es_dsl.Q("bool", must=queries) elif is_a(arg, op.ARG_OR): q = es_dsl.Q("bool", should=queries) else: q = es_dsl.Q("bool", must_not=queries) else: q = query_creator(arg) return q def construct_exists_query(node: ast.Node): return es_dsl.Q("exists", field=get_value(node)) if is_a(node, op.FREETEXT): def construct_freetext_query(node): if is_a(node, op.STRING): return es_dsl.Q("multi_match", query=get_value(node), fuzziness=1) else: return es_dsl.Q("multi_match", query=get_value(node)) if not arg_values: q = construct_freetext_query(arg) else: queries = [construct_freetext_query(n) for n in arg.children] if is_a(arg, op.ARG_OR): q = es_dsl.Q("bool", should=queries) elif is_a(arg, op.ARG_AND): q = es_dsl.Q("bool", must=queries) else: q = es_dsl.Q("bool", must_not=queries) elif is_a(node, op.FREERGXP): kwargs = {"default_field": "*"} if not arg_values: kwargs["query"] = "/{}/".format(arg.value) else: if is_a(arg, op.ARG_OR): operator = " OR " else: # if is_a(arg, op.ARG_AND): operator = " AND " kwargs["query"] = operator.join("(/{}/)".format(v) for v in arg_values) print("kwargs = {}".format(kwargs)) q = es_dsl.Q("query_string", **kwargs) if is_a(arg, op.ARG_NOT): q = es_dsl.Q("bool", must_not=q) elif is_a(node, op.EXISTS): q = construct_unary_op(node.children[0], construct_exists_query) elif is_a(node, op.MISSING): if not arg_values: q = es_dsl.Q("bool", must_not=es_dsl.Q("exists", field=arg.value)) else: queries = [es_dsl.Q("exists", field=value) for value in arg_values] for logical in arg_logicals: l_queries = [] for field in logical.children: l_queries.append(construct_exists_query(field)) if is_a(logical, op.ARG_AND): queries.append(es_dsl.Q("bool", must=l_queries)) elif is_a(logical, op.ARG_OR): queries.append(es_dsl.Q("bool", should=l_queries)) else: # if is_a(logical, op.ARG_NOT): queries.append(es_dsl.Q("bool", must_not=l_queries)) if is_a(arg, op.ARG_AND): q = es_dsl.Q("bool", must_not=queries) elif is_a(arg, op.ARG_OR): q = None for query in queries: q_tmp = es_dsl.Q("bool", must_not=query) if not q: q = q_tmp else: q = q | q_tmp else: # is_a(arg, op.ARG_NOT): q = es_dsl.Q("bool", must=queries) else: raise UnsupportedQuery("not implemented") elif is_a(node, op.BINARY_OPS): arg1 = node.children[0] arg2 = node.children[1] field_values, field_logicals = extract_values_and_logicals(arg1) arg_values = extract_values(arg2) print("field_values = {field_values}".format(field_values=field_values)) print("field_logicals = {field_logicals}".format(field_logicals=field_logicals)) print("arg_values = {}".format(arg_values)) # TODO this check breaks and and or since they always (?) have ast.ArgNode as parameters # if not isinstance(arg1, ast.ArgNode) or not isinstance(arg2, ast.ArgNode): # TODO these need to be moved outside of current query, for example: # "equals|name||or|Partille|Kumla" could be expressed in two ways # es_dsl.Q('terms', name=['Partille', 'Kumla']) # or # es_dsl.Q('bool', should=[es_dsl.Q('term', name='Partille'), es_dsl.Q('term', name='Kumla')]) # but "regexp|name||or|Part*|Kum*" # can only be expressed as in the longer form above # raise UnsupportedQuery() if is_a(node, op.EQUALS): # q = construct_binary_op(node.children[0], node.children[1], construct_equals_query) # if len(field_values) == 1: if not field_values and not field_logicals: if not arg_values: q = construct_equals_query(arg1, arg2) else: queries = [ construct_equals_query(get_value(arg1), query) for query in arg_values ] if is_a(arg2, op.ARG_AND): q = es_dsl.Q("bool", must=queries) elif is_a(arg2, op.ARG_OR): q = es_dsl.Q("bool", should=queries) else: q = es_dsl.Q( "bool", must_not=queries ) # , must=es_dsl.Q('multi_match', query=get_value(arg1))) else: # if field_values: # if len(arg_values) == 1: if not arg_values: def prepare_equals_arg(node, v): return v arg = prepare_equals_arg(node, get_value(arg2)) queries = [ construct_equals_query(field, arg) for field in field_values ] for logical in field_logicals: l_queries = [] for field in logical.children: l_queries.append(construct_equals_query(field.value, arg)) if is_a(logical, op.ARG_OR): queries.append(es_dsl.Q("bool", should=l_queries)) elif is_a(logical, op.ARG_AND): queries.append(es_dsl.Q("bool", must=l_queries)) else: queries.append(es_dsl.Q("bool", must_not=l_queries)) print( "|create_es_query::EQUALS| queries = {queries}".format( queries=queries ) ) if is_a(arg1, op.ARG_AND): q = es_dsl.Q("bool", must=queries) elif is_a(arg1, op.ARG_OR): q = es_dsl.Q("bool", should=queries) else: q = es_dsl.Q( "bool", must_not=queries, must=es_dsl.Q("multi_match", query=get_value(arg2)), ) else: # if arg_values: raise UnsupportedQuery("Don't know how to handle ") elif is_a(node, op.REGEX_OPS): def extract_field_plus_rawfield(field: str): if field.endswith(".raw"): yield field else: yield field yield field + ".raw" def prepare_regex(node, s: str): if is_a(node, op.CONTAINS): return ".*" + re.escape(s) + ".*" elif is_a(node, op.STARTSWITH): return re.escape(s) + ".*" elif is_a(node, op.ENDSWITH): return ".*" + re.escape(s) else: return s # Construct query def construct_regexp_query(field_in: str, regex: str): q = None for field in extract_field_plus_rawfield(field_in): q_tmp = es_dsl.Q("regexp", **{field: regex}) if not q: q = q_tmp else: q = q | q_tmp return q q = None if not field_values and not field_logicals: if not arg_values: q = construct_regexp_query( get_value(arg1), prepare_regex(node, get_value(arg2)) ) else: queries = [] for regex in arg_values: q_tmp = construct_regexp_query( get_value(arg1), prepare_regex(node, regex) ) print("q_tmp = {q_tmp}".format(q_tmp=q_tmp)) queries.append(q_tmp) if is_a(arg2, op.ARG_OR): q = es_dsl.Q("bool", should=queries) elif is_a(arg2, op.ARG_AND): q = es_dsl.Q("bool", must=queries) else: q = es_dsl.Q( "bool", must_not=queries ) # , must=es_dsl.Q('query_string', **kwargs)) else: # if field_values: if not arg_values: regex = prepare_regex(node, get_value(arg2)) queries = [] for field in field_values: q_tmp = construct_regexp_query(field, regex) print("q_tmp = {q_tmp}".format(q_tmp=q_tmp)) queries.append(q_tmp) for logical in field_logicals: l_queries = [] for field in logical.children: l_queries.append(construct_regexp_query(field.value, regex)) if is_a(logical, op.ARG_OR): queries.append(es_dsl.Q("bool", should=l_queries)) elif is_a(logical, op.ARG_AND): queries.append(es_dsl.Q("bool", must=l_queries)) else: queries.append(es_dsl.Q("bool", must_not=l_queries)) print( "|create_es_query::REGEX| queries = {queries}".format( queries=queries ) ) if is_a(arg1, op.ARG_OR): q = es_dsl.Q("bool", should=queries) elif is_a(arg1, op.ARG_AND): q = es_dsl.Q("bool", must=queries) else: kwargs = {"default_field": "*"} kwargs["query"] = "/{}/".format(regex) print("regex NOT kwargs = {}".format(kwargs)) q = es_dsl.Q( "bool", must_not=queries, must=es_dsl.Q("query_string", **kwargs), ) else: raise UnsupportedQuery("Complex regex not implemented") elif is_a(node, op.RANGE_OPS): if arg_values: raise UnsupportedQuery( "Not allowed to use logical operators in 2nd argument for RANGE operators." ) def prepare_range_args(node, arg22): range_args = {} if is_a(node, op.LT): range_args["lt"] = arg22 elif is_a(node, op.LTE): range_args["lte"] = arg22 elif is_a(node, op.GT): range_args["gt"] = arg22 elif is_a(node, op.GTE): range_args["gte"] = arg22 return range_args range_args = prepare_range_args(node, get_value(arg2)) def construct_range_query(field: str, range_args: Dict): return es_dsl.Q("range", **{field: range_args}) q = None if not field_values: q = construct_range_query(get_value(arg1), range_args) else: queries = [construct_range_query(f, range_args) for f in field_values] for logical in field_logicals: l_queries = [] for field in logical.children: l_queries.append(construct_range_query(field.value, range_args)) if is_a(logical, op.ARG_OR): queries.append(es_dsl.Q("bool", should=l_queries)) elif is_a(logical, op.ARG_AND): queries.append(es_dsl.Q("bool", must=l_queries)) else: queries.append(es_dsl.Q("bool", must_not=l_queries)) print( "|create_es_query::RANGE| queries = {queries}".format( queries=queries ) ) if is_a(arg1, op.ARG_AND): q = es_dsl.Q("bool", must=queries) elif is_a(arg1, op.ARG_OR): q = es_dsl.Q("bool", should=queries) else: q = es_dsl.Q("bool", must_not=queries) # elif isinstance(node, ast.TernaryOp): # op = node.value # arg1 = node.children[0] # arg2 = node.children[1] # arg3 = node.children[2] # if op == query_dsl.Operators.RANGE: # raise UnsupportedQuery('don\'t now what to do yet') # else: # raise UnsupportedQuery('what operators?') else: raise UnsupportedQuery("Unknown query op '{node}'".format(node=node)) return q
def construct_binary_op(field: ast.Node, arg: ast.Node, query_creator): q = None if is_a(field, op.ARG_LOGICAL): if is_a(arg, op.ARG_LOGICAL): pass else: queries = [] for n in field.children: if is_a(n, op.ARGS): queries.append(query_creator(n, arg)) elif is_a(n, op.ARG_LOGICAL): l_queries = [] for child in n.children: l_queries.append(query_creator(child, arg)) if is_a(n, op.ARG_AND): queries.append(es_dsl.Q("bool", must=l_queries)) elif is_a(n, op.ARG_OR): queries.append(es_dsl.Q("bool", should=l_queries)) else: queries.append(es_dsl.Q("bool", must_not=l_queries)) if is_a(arg, op.ARG_AND): q = es_dsl.Q("bool", must=queries) elif is_a(arg, op.ARG_OR): q = es_dsl.Q("bool", should=queries) else: q = es_dsl.Q("bool", must_not=queries) if not arg_values: def prepare_equals_arg(node, v): return v arg = prepare_equals_arg(node, get_value(arg2)) queries = [ construct_equals_query(field, arg) for field in field_values ] for logical in field_logicals: l_queries = [] for field in logical.children: l_queries.append(construct_equals_query(field.value, arg)) if is_a(logical, op.ARG_OR): queries.append(es_dsl.Q("bool", should=l_queries)) elif is_a(logical, op.ARG_AND): queries.append(es_dsl.Q("bool", must=l_queries)) else: queries.append(es_dsl.Q("bool", must_not=l_queries)) print( "|create_es_query::EQUALS| queries = {queries}".format( queries=queries ) ) if is_a(arg1, op.ARG_AND): q = es_dsl.Q("bool", must=queries) elif is_a(arg1, op.ARG_OR): q = es_dsl.Q("bool", should=queries) else: q = es_dsl.Q( "bool", must_not=queries, must=es_dsl.Q("multi_match", query=get_value(arg2)), ) else: if is_a(arg, op.ARG_LOGICAL): queries = [] for n in arg.children: if is_a(n, op.ARGS): queries.append(query_creator(field, n)) elif is_a(n, op.ARG_LOGICAL): l_queries = [] for child in n.children: l_queries.append(query_creator(field, child)) if is_a(n, op.ARG_AND): queries.append(es_dsl.Q("bool", must=l_queries)) elif is_a(n, op.ARG_OR): queries.append(es_dsl.Q("bool", should=l_queries)) else: queries.append(es_dsl.Q("bool", must_not=l_queries)) if is_a(arg, op.ARG_AND): q = es_dsl.Q("bool", must=queries) elif is_a(arg, op.ARG_OR): q = es_dsl.Q("bool", should=queries) else: q = es_dsl.Q("bool", must_not=queries) else: q = query_creator(field, arg) return q
def count_by_city_order_by_country( vk_elastic_db: es_client.VkDataDatabaseClient, size=10, is_need_other=True, is_need_print=False, is_need_plot=True, is_need_active=False, days_delta=20): country_aggs_name = "country_count" city_aggs_name = "city_count" title = "count by city" if is_need_active: title += " active" es = get_elastic_object(vk_elastic_db) s = elasticsearch_dsl.Search(using=es, index=index) if is_need_active: s = get_active_users_filter(es, index, s, days_delta=days_delta) s = s.filter( "bool", must=[elasticsearch_dsl.Q("exists", field="country.title.keyword")]) s = s.filter( "bool", must=[elasticsearch_dsl.Q("exists", field="city.title.keyword")]) s = s.filter( "bool", must_not=[elasticsearch_dsl.Q("match", country__title__keywordd="")]) s = s.filter( "bool", must_not=[elasticsearch_dsl.Q("match", city__title__keyword="")]) a = elasticsearch_dsl.A('terms', field="country.title.keyword", size=size, collect_mode="breadth_first") a1 = elasticsearch_dsl.A('terms', field="city.title.keyword", size=size) s.aggs.bucket(country_aggs_name, a).bucket(city_aggs_name, a1) response = s.execute() data_dict = {} for country_hit in response.aggregations[country_aggs_name].buckets: x_axis = [hit.key for hit in country_hit[city_aggs_name].buckets] y_axis = [hit.doc_count for hit in country_hit[city_aggs_name].buckets] if is_need_other: x_axis.append("other") y_axis.append(country_hit[city_aggs_name].sum_other_doc_count) data_dict[country_hit.key] = {} data_dict[country_hit.key]["x_axis"] = x_axis data_dict[country_hit.key]["y_axis"] = y_axis for country in data_dict: x_axis = data_dict[country]["x_axis"] y_axis = data_dict[country]["y_axis"] cur_title = f"{title}\n{country}" figname = f"{title.replace(' ', '_')}_{country}" if is_need_print: print(cur_title) for i in range(len(x_axis)): print(f"{i + 1}\t{x_axis[i]} {y_axis[i]}") if is_need_plot: fig, ax = plt.subplots(1, 1) ax.set_title(cur_title) ax.barh(x_axis, y_axis) # plt.show() fig.savefig(f"{save_path}/{figname}.png", dpi=300, format='png', bbox_inches='tight') plt.close(fig)
def as_Q(field, query, fuzziness=1): return dsl.Q('match', **{field: {'query': query, 'fuzziness': fuzziness}})
def get_monthly_cost_by_product(cls, keys, tagged=False, date_from=None, date_to=None, size=0x7FFFFFFF): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) agg = s.aggs.bucket('intervals', 'date_histogram', field='usage_start_date', interval='month', min_doc_count=1) agg = agg.bucket('products', 'terms', field='product_name', size=size) agg.bucket('cost', 'sum', field='cost') if tagged: agg = agg.bucket('tags', 'terms', field='tag.value') agg.bucket('cost', 'sum', field='cost') s = s.query('bool', filter=[~dsl.Q('term', cost=0)]) res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) def tagged_cost(bucket, total): total_tag = 0.0 for tag in bucket: total_tag += tag['cost']['value'] yield (tag['key'], tag['cost']['value']) if total != total_tag: yield ('untagged', total - total_tag) res = [{ 'month': interval['key_as_string'].split('T')[0], 'products': [{ 'product': SHORT_NAMES.get(product['key'], product['key']), 'cost': product['cost']['value'], 'tags': [{ 'name': tag[0], 'cost': tag[1], } for tag in tagged_cost(product['tags']['buckets'], product['cost']['value'])], } for product in interval['products']['buckets']] if tagged else [{ 'product': SHORT_NAMES.get(product['key'], product['key']), 'cost': product['cost']['value'], } for product in interval['products']['buckets']] } for interval in res['aggregations']['intervals']['buckets']] return dict(months=res)