def get_es_user_counts_by_doc_type(domain): agg = aggregations.TermsAggregation('doc_type', 'doc_type').aggregation( aggregations.TermsAggregation('base_doc', 'base_doc')) doc_type_buckets = (es.UserES().remove_default_filters().filter( es.users.domain(domain)).aggregation(agg).size( 0).run().aggregations.doc_type.buckets_dict) counts = Counter() for doc_type, bucket in doc_type_buckets.items(): for base_doc, count in bucket.base_doc.counts_by_bucket().items(): deleted = base_doc.endswith('deleted') if deleted: doc_type += '-Deleted' counts[doc_type] = count return counts
def aggregations(self, data_source_config, lang): # SQL supports max and min on strings so hack it into ES if self._use_terms_aggregation_for_max_min(data_source_config): aggregation = aggregations.TermsAggregation(self.column_id, self.field, size=1) order = "desc" if self.aggregation == 'max' else 'asc' aggregation = aggregation.order('_term', order=order) else: aggregation = ES_AGG_MAP[self.aggregation](self.column_id, self.field) return filter(None, [aggregation])
def get_case_and_action_counts_for_domains(domains): actions_agg = aggregations.NestedAggregation('actions', 'actions') aggregation = aggregations.TermsAggregation( 'domain', 'domain').aggregation(actions_agg) results = CaseES() \ .filter(filters.term('domain', domains)) \ .aggregation(aggregation) \ .size(0) \ .run() domains_to_cases = results.aggregations.domain.buckets_dict def _domain_stats(domain_name): cases = domains_to_cases.get(domain_name, None) return { 'cases': cases.doc_count if cases else 0, 'case_actions': cases.actions.doc_count if cases else 0 } return {domain: _domain_stats(domain) for domain in domains}
def _get_parent_case_ids_matching_subcase_query(subcase_query, context): """Get a list of case IDs for cases that have a subcase with the given index identifier and matching the subcase predicate filter. Only cases with `[>,=] case_count_gt` subcases will be returned. """ # TODO: validate that the subcase filter doesn't contain any ancestor filtering from corehq.apps.case_search.filter_dsl import ( MAX_RELATED_CASES, build_filter_from_ast, ) if subcase_query.subcase_filter: subcase_filter = build_filter_from_ast(subcase_query.subcase_filter, context) else: subcase_filter = filters.match_all() index_identifier_filter = filters.term('indices.identifier', subcase_query.index_identifier) index_query = queries.nested( 'indices', queries.filtered( queries.match_all(), filters.AND( index_identifier_filter, filters.NOT(filters.term('indices.referenced_id', '')) # exclude deleted indices ) ) ) es_query = ( CaseSearchES().domain(context.domain) .filter(index_query) .filter(subcase_filter) .aggregation( aggregations.NestedAggregation( 'indices', 'indices', ).aggregation( aggregations.FilterAggregation( 'matching_indices', index_identifier_filter ).aggregation( aggregations.TermsAggregation( 'referenced_id', 'indices.referenced_id' ) ) ) ) ) if es_query.count() > MAX_RELATED_CASES: from ..exceptions import TooManyRelatedCasesError raise TooManyRelatedCasesError( _("The related case lookup you are trying to perform would return too many cases"), serialize(subcase_query.subcase_filter) ) counts_by_parent_id = es_query.run().aggregations.indices.matching_indices.referenced_id.counts_by_bucket() if subcase_query.op == '>' and subcase_query.count <= 0: return list(counts_by_parent_id) return [ case_id for case_id, count in counts_by_parent_id.items() if subcase_query.filter_count(count) ]
def terms_aggregation(self, term, name, size=None): return self.aggregation(aggregations.TermsAggregation(name, term, size=size))