def test_nesting_aggregations(self): json_output = { "query": { "filtered": { "filter": { "and": [{ "match_all": {} }] }, "query": { "match_all": {} } } }, "aggs": { "users": { "terms": { "field": "user_id", "size": SIZE_LIMIT }, "aggs": { "closed": { "filter": { "term": { "closed": True } } } } }, "total_by_status": { "filters": { "filters": { "closed": { "term": { "closed": True } }, "open": { "term": { "closed": False } } } } } }, "size": SIZE_LIMIT } query = HQESQuery('cases').aggregations([ TermsAggregation("users", 'user_id').aggregation( FilterAggregation('closed', filters.term('closed', True))), FiltersAggregation('total_by_status').add_filter( 'closed', filters.term('closed', True)).add_filter('open', filters.term('closed', False)) ]) self.checkQuery(query, json_output)
def user_query(self, pagination=True): mobile_user_and_group_slugs = set( # Cater for old ReportConfigs self.request.GET.getlist('location_restricted_mobile_worker') + self.request.GET.getlist(ExpandedMobileWorkerFilter.slug) ) user_query = ExpandedMobileWorkerFilter.user_es_query( self.domain, mobile_user_and_group_slugs, self.request.couch_user, ) user_query = (user_query .set_sorting_block(self.get_sorting_block())) if pagination: user_query = (user_query .size(self.pagination.count) .start(self.pagination.start)) if self.selected_app_id: # adding nested filter for reporting_metadata.last_submissions.app_id # and reporting_metadata.last_syncs.app_id when app is selected last_submission_filter = filters.nested('reporting_metadata.last_submissions', filters.term('reporting_metadata.last_submissions.app_id', self.selected_app_id) ) last_sync_filter = filters.nested('reporting_metadata.last_syncs', filters.term("reporting_metadata.last_syncs.app_id", self.selected_app_id) ) user_query = user_query.OR(last_submission_filter, last_sync_filter ) return user_query
def user_es_query(cls, domain, request): user_ids = cls.selected_user_ids(request) user_types = cls.selected_user_types(request) group_ids = cls.selected_group_ids(request) user_type_filters = [] if HQUserType.ADMIN in user_types: user_type_filters.append(user_es.admin_users()) if HQUserType.UNKNOWN in user_types: user_type_filters.append(user_es.unknown_users()) user_type_filters.append(user_es.web_users()) if HQUserType.DEMO_USER in user_types: user_type_filters.append(user_es.demo_users()) q = user_es.UserES().domain(domain) if HQUserType.REGISTERED in user_types: # return all users with selected user_types user_type_filters.append(user_es.mobile_users()) return q.OR(*user_type_filters) else: # return matching user types and exact matches id_filter = filters.OR( filters.term("_id", user_ids), filters.term("__group_ids", group_ids), ) if user_type_filters: return q.OR( id_filter, filters.OR(*user_type_filters), ) else: return q.filter(id_filter)
def get_by_case_id_form_es_query(start, size, case_id): base_query = get_base_form_es_query(start, size) return (base_query.filter( filters.nested( 'form.case', filters.OR(filters.term('form.case.@case_id', case_id), filters.term('form.case.case_id', case_id)))))
def user_query(self, pagination=True): mobile_user_and_group_slugs = set( # Cater for old ReportConfigs self.request.GET.getlist('location_restricted_mobile_worker') + self.request.GET.getlist(ExpandedMobileWorkerFilter.slug)) user_query = ExpandedMobileWorkerFilter.user_es_query( self.domain, mobile_user_and_group_slugs, self.request.couch_user, ) user_query = (user_query.set_sorting_block(self.get_sorting_block())) if pagination: user_query = (user_query.size(self.pagination.count).start( self.pagination.start)) if self.selected_app_id: # adding nested filter for reporting_metadata.last_submissions.app_id # and reporting_metadata.last_syncs.app_id when app is selected last_submission_filter = filters.nested( 'reporting_metadata.last_submissions', filters.term('reporting_metadata.last_submissions.app_id', self.selected_app_id)) last_sync_filter = filters.nested( 'reporting_metadata.last_syncs', filters.term("reporting_metadata.last_syncs.app_id", self.selected_app_id)) user_query = user_query.OR(last_submission_filter, last_sync_filter) return user_query
def case_owners(self): # Get user ids for each user that match the demo_user, admin, Unknown Users, or All Mobile Workers filters user_types = EMWF.selected_user_types(self.request) user_type_filters = [] if HQUserType.ADMIN in user_types: user_type_filters.append(user_es.admin_users()) if HQUserType.UNKNOWN in user_types: user_type_filters.append(user_es.unknown_users()) user_type_filters.append(user_es.web_users()) if HQUserType.DEMO_USER in user_types: user_type_filters.append(user_es.demo_users()) if HQUserType.REGISTERED in user_types: user_type_filters.append(user_es.mobile_users()) if len(user_type_filters) > 0: special_q = user_es.UserES().domain(self.domain).OR(*user_type_filters).show_inactive() special_user_ids = special_q.run().doc_ids else: special_user_ids = [] # Get user ids for each user that was specifically selected selected_user_ids = EMWF.selected_user_ids(self.request) # Get group ids for each group that was specified selected_reporting_group_ids = EMWF.selected_reporting_group_ids(self.request) selected_sharing_group_ids = EMWF.selected_sharing_group_ids(self.request) # Get user ids for each user in specified reporting groups report_group_q = HQESQuery(index="groups").domain(self.domain)\ .doc_type("Group")\ .filter(filters.term("_id", selected_reporting_group_ids))\ .fields(["users"]) user_lists = [group["users"] for group in report_group_q.run().hits] selected_reporting_group_users = list(set().union(*user_lists)) # Get ids for each sharing group that contains a user from selected_reporting_group_users OR a user that was specifically selected share_group_q = HQESQuery(index="groups").domain(self.domain)\ .doc_type("Group")\ .filter(filters.term("case_sharing", True))\ .filter(filters.term("users", selected_reporting_group_users+selected_user_ids+special_user_ids))\ .fields([]) sharing_group_ids = share_group_q.run().doc_ids owner_ids = list(set().union( special_user_ids, selected_user_ids, selected_sharing_group_ids, selected_reporting_group_users, sharing_group_ids )) if HQUserType.COMMTRACK in user_types: owner_ids.append("commtrack-system") if HQUserType.DEMO_USER in user_types: owner_ids.append("demo_user_group_id") owner_ids += self.location_sharing_owner_ids() owner_ids += self.location_reporting_owner_ids() return owner_ids
def login_as_user_query(domain, couch_user, search_string, limit, offset, user_data_fields=None): ''' Takes in various parameters to determine which users to populate the login as screen. :param domain: String domain :param couch_user: The CouchUser that is using the Login As feature :param search_string: The query that filters the users returned. Filters based on the `search_fields` as well as any fields defined in `user_data_fields`. :param limit: The max amount of users returned. :param offset: From where to start the query. :param user_data_fields: A list of custom user data fields that should also be searched by the `search_string` :returns: An EsQuery instance. ''' search_fields = [ "base_username", "last_name", "first_name", "phone_numbers" ] should_criteria_query = [ queries.search_string_query(search_string, search_fields), ] if user_data_fields: or_criteria = [] for field in user_data_fields: or_criteria.append( filters.AND( filters.term('user_data_es.key', field), filters.term('user_data_es.value', search_string), ), ) should_criteria_query.append( queries.nested_filter('user_data_es', filters.OR(*or_criteria))) user_es = ( UserES().domain(domain).start(offset).size(limit). sort('username.exact').set_query( queries.BOOL_CLAUSE( queries.SHOULD_CLAUSE( should_criteria_query, # It should either match on the search fields like username or it # should match on the custom user data fields. If this were 2, then # it would require the search string to match both on the search fields and # the custom user data fields. minimum_should_match=1, ), ))) if not couch_user.has_permission(domain, 'access_all_locations'): loc_ids = SQLLocation.objects.accessible_to_user( domain, couch_user).location_ids() user_es = user_es.location(list(loc_ids)) return user_es.mobile_users()
def login_as_user_query( domain, couch_user, search_string, limit, offset): ''' Takes in various parameters to determine which users to populate the login as screen. :param domain: String domain :param couch_user: The CouchUser that is using the Login As feature :param search_string: The query that filters the users returned. Filters based on the `search_fields` as well as any fields defined in `user_data_fields`. :param limit: The max amount of users returned. :param offset: From where to start the query. :returns: An EsQuery instance. ''' search_fields = ["base_username", "last_name", "first_name", "phone_numbers"] user_es = ( UserES() .domain(domain) .start(offset) .size(limit) .sort('username.exact') .search_string_query(search_string, search_fields) ) if not couch_user.has_permission(domain, 'access_all_locations'): loc_ids = SQLLocation.objects.accessible_to_user( domain, couch_user ).location_ids() user_es = user_es.location(list(loc_ids)) if _limit_login_as(couch_user, domain): user_filters = [ filters.AND( filters.term('user_data_es.key', 'login_as_user'), filters.term('user_data_es.value', couch_user.username), ) ] if couch_user.has_permission(domain, 'access_default_login_as_user'): user_filters.append( filters.AND( filters.term('user_data_es.key', 'login_as_user'), filters.term('user_data_es.value', 'default'), ) ) user_es = user_es.filter( queries.nested( 'user_data_es', filters.OR( *user_filters ) ) ) return user_es.mobile_users()
def test_not_filter_edge_case(self): self._setup_data() query = FormES().remove_default_filters().filter( filters.NOT(filters.OR( filters.term('domain', 'd'), filters.term('app_id', 'a') )) ) self.assertEqual(query.run().doc_ids, ['doc3'])
def case_owners(self): # Get user ids for each user that match the demo_user, admin, Unknown Users, or All Mobile Workers filters user_types = EMWF.selected_user_types(self.request) user_type_filters = [] if HQUserType.ADMIN in user_types: user_type_filters.append(user_es.admin_users()) if HQUserType.UNKNOWN in user_types: user_type_filters.append(user_es.unknown_users()) user_type_filters.append(user_es.web_users()) if HQUserType.DEMO_USER in user_types: user_type_filters.append(user_es.demo_users()) if HQUserType.REGISTERED in user_types: user_type_filters.append(user_es.mobile_users()) if len(user_type_filters) > 0: special_q = user_es.UserES().domain( self.domain).OR(*user_type_filters) special_user_ids = special_q.run().doc_ids else: special_user_ids = [] # Get user ids for each user that was specifically selected selected_user_ids = EMWF.selected_user_ids(self.request) # Get group ids for each group that was specified selected_reporting_group_ids = EMWF.selected_reporting_group_ids( self.request) selected_sharing_group_ids = EMWF.selected_sharing_group_ids( self.request) # Get user ids for each user in specified reporting groups report_group_q = HQESQuery(index="groups").domain(self.domain)\ .doc_type("Group")\ .filter(filters.term("_id", selected_reporting_group_ids))\ .fields(["users"]) user_lists = [group["users"] for group in report_group_q.run().hits] selected_reporting_group_users = list(set().union(*user_lists)) # Get ids for each sharing group that contains a user from selected_reporting_group_users OR a user that was specifically selected share_group_q = HQESQuery(index="groups").domain(self.domain)\ .doc_type("Group")\ .filter(filters.term("case_sharing", True))\ .filter(filters.term("users", selected_reporting_group_users+selected_user_ids+special_user_ids))\ .fields([]) sharing_group_ids = share_group_q.run().doc_ids owner_ids = list(set().union(special_user_ids, selected_user_ids, selected_sharing_group_ids, selected_reporting_group_users, sharing_group_ids)) if HQUserType.COMMTRACK in EMWF.selected_user_types(self.request): owner_ids.append("commtrack-system") return owner_ids
def es_results(self): fields = [ "_id", "name", "pactid.#value", "opened_on", "modified_on", "hp_status.#value", "hp.#value", "dot_status.#value", "closed_on", "closed" ] full_query = (get_base_case_es_query( self.pagination.start, self.pagination.count).filter( filters.term('type', PACT_CASE_TYPE)).source(fields)) def status_filtering(slug, field, prefix, any_field, default): if self.request.GET.get(slug, None) is not None: field_status_filter_query = self.request.GET[slug] if field_status_filter_query == "": #silly double default checker here - set default or the any depending on preference field_status_filter_query = default if field_status_filter_query is None: return else: if field_status_filter_query.startswith(prefix): field_status_prefix = field_status_filter_query elif field_status_filter_query == any_field: field_status_prefix = prefix else: field_status_prefix = None full_query = full_query.filter( filters.term(field, field_status_filter_query.lower())) if field_status_prefix is not None: field_filter = { "prefix": { field: field_status_prefix.lower() } } full_query = full_query.add_query(field_filter) status_filtering(DOTStatus.slug, "dot_status.#value", "DOT", DOTStatus.ANY_DOT, None) status_filtering(HPStatusField.slug, "hp_status.#value", "HP", HPStatusField.ANY_HP, HPStatusField.ANY_HP) #primary_hp filter from the user filter if self.request.GET.get(PactPrimaryHPField.slug, "") != "": primary_hp_term = self.request.GET[PactPrimaryHPField.slug] primary_hp_filter = {"term": {"hp.#value": primary_hp_term}} full_query = full_query.filter( filters.term("hp.#value", primary_hp_term)) full_query['sort'] = self.get_sorting_block() return self.case_es.run_query(full_query.raw_query)
def user_es_query(cls, domain, mobile_user_and_group_slugs, request_user): # The queryset returned by this method is location-safe user_ids = cls.selected_user_ids(mobile_user_and_group_slugs) user_types = cls.selected_user_types(mobile_user_and_group_slugs) group_ids = cls.selected_group_ids(mobile_user_and_group_slugs) location_ids = cls.selected_location_ids(mobile_user_and_group_slugs) user_type_filters = [] if HQUserType.ADMIN in user_types: user_type_filters.append(user_es.admin_users()) if HQUserType.UNKNOWN in user_types: user_type_filters.append(user_es.unknown_users()) user_type_filters.append(user_es.web_users()) if HQUserType.DEMO_USER in user_types: user_type_filters.append(user_es.demo_users()) q = user_es.UserES().domain(domain) if HQUserType.ACTIVE in user_types and HQUserType.DEACTIVATED in user_types: q = q.show_inactive() elif HQUserType.DEACTIVATED in user_types: q = q.show_only_inactive() if not request_user.has_permission(domain, 'access_all_locations'): cls._verify_users_are_accessible(domain, request_user, user_ids) return q.OR( filters.term("_id", user_ids), user_es.location(list(SQLLocation.active_objects .get_locations_and_children(location_ids) .accessible_to_user(domain, request_user) .location_ids())), ) elif HQUserType.ACTIVE in user_types or HQUserType.DEACTIVATED in user_types: # return all users with selected user_types user_type_filters.append(user_es.mobile_users()) return q.OR(*user_type_filters) else: # return matching user types and exact matches location_ids = list(SQLLocation.active_objects .get_locations_and_children(location_ids) .location_ids()) id_filter = filters.OR( filters.term("_id", user_ids), filters.term("__group_ids", group_ids), user_es.location(location_ids), ) if user_type_filters: return q.OR( id_filter, filters.OR(*user_type_filters), ) else: return q.filter(id_filter)
def es_query_from_get_params(search_params, domain, reserved_query_params=None, doc_type='form'): # doc_type can be form or case assert doc_type in ['form', 'case'] es = FormES() if doc_type == 'form' else CaseES() query = es.remove_default_filters().domain(domain) if doc_type == 'form': if 'include_archived' in search_params: query = query.filter( filters.OR(filters.term('doc_type', 'xforminstance'), filters.term('doc_type', 'xformarchived'))) else: query = query.filter(filters.term('doc_type', 'xforminstance')) if '_search' in search_params: # This is undocumented usecase by Data export tool and one custom project # Validate that the passed in param is one of these two expected _filter = _validate_and_get_es_filter( json.loads(search_params['_search'])) query = query.filter(_filter) # filters are actually going to be a more common case reserved_query_params = RESERVED_QUERY_PARAMS | set(reserved_query_params or []) query_params = { param: value for param, value in search_params.items() if param not in reserved_query_params and not param.endswith('__full') } for consumer in query_param_consumers: try: payload_filter = consumer.consume_params(query_params) except DateTimeError as e: raise Http400("Bad query parameter: {}".format(str(e))) if payload_filter: query = query.filter(payload_filter) # add unconsumed filters for param, value in query_params.items(): # assume these fields are analyzed in ES so convert to lowercase # Any fields that are not analyzed in ES should be in the ``query_param_consumers`` above value = value.lower() query = query.filter(filters.term(param, value)) return query.raw_query
def get_case_owner_filters(domains): mobile_user_ids = list(get_user_ids(True, domains)) group_query = GroupES() if domains: group_query = group_query.domain(domains) group_ids = group_query.get_ids() return filters.term('owner_id', mobile_user_ids + group_ids)
def to_es_filter(self): if self.show_all: return None if self.is_null: return filters.missing(self.filter.field) terms = [v.value for v in self.value] return filters.term(self.filter.field, terms)
def _get_case_counts_by_user(domain, datespan, case_types=None, is_opened=True, user_ids=None, export=False): date_field = 'opened_on' if is_opened else 'closed_on' user_field = 'opened_by' if is_opened else 'closed_by' es_instance = ES_EXPORT_INSTANCE if export else ES_DEFAULT_INSTANCE case_query = (CaseES(es_instance_alias=es_instance).domain(domain).filter( filters.date_range( date_field, gte=datespan.startdate.date(), lte=datespan.enddate.date(), )).terms_aggregation(user_field, 'by_user').size(0)) if case_types: case_query = case_query.case_type(case_types) else: case_query = case_query.filter( filters.NOT(case_type_filter('commcare-user'))) if user_ids: case_query = case_query.filter(filters.term(user_field, user_ids)) return case_query.run().aggregations.by_user.counts_by_bucket()
def get_call_center_domains(): result = ( DomainES() .is_active() .filter(filters.term('call_center_config.enabled', True)) .source([ 'name', 'default_timezone', 'call_center_config.case_type', 'call_center_config.case_owner_id', 'call_center_config.use_user_location_as_owner', 'call_center_config.use_fixtures']) .run() ) def to_domain_lite(hit): config = hit.get('call_center_config', {}) case_type = config.get('case_type', None) case_owner_id = config.get('case_owner_id', None) use_user_location_as_owner = config.get('use_user_location_as_owner', None) if case_type and (case_owner_id or use_user_location_as_owner): # see CallCenterProperties.config_is_valid() return DomainLite( name=hit['name'], default_timezone=hit['default_timezone'], cc_case_type=case_type, use_fixtures=config.get('use_fixtures', True), form_datasource_enabled=config.get('form_datasource_enabled', True), case_datasource_enabled=config.get('case_datasource_enabled', True), case_actions_datasource_enabled=config.get('case_actions_datasource_enabled', True), ) return [_f for _f in [to_domain_lite(hit) for hit in result.hits] if _f]
def to_es_filter(self): location_id = self.value[0].value fs = [ filters.term(x.column, x.filter_value) for x in self.get_hierarchy(location_id) ] return filters.OR(fs)
def get_form_counts_for_domains(domains): return FormES() \ .filter(filters.term('domain', domains)) \ .domain_aggregation() \ .size(0) \ .run() \ .aggregations.domain.counts_by_bucket()
def _get_case_counts_by_user(domain, datespan, case_types=None, is_opened=True, user_ids=None, export=False): date_field = 'opened_on' if is_opened else 'closed_on' user_field = 'opened_by' if is_opened else 'closed_by' es_instance = ES_EXPORT_INSTANCE if export else ES_DEFAULT_INSTANCE case_query = (CaseES(es_instance_alias=es_instance) .domain(domain) .filter( filters.date_range( date_field, gte=datespan.startdate.date(), lte=datespan.enddate.date(), ) ) .terms_aggregation(user_field, 'by_user') .size(0)) if case_types: case_query = case_query.case_type(case_types) else: case_query = case_query.filter(filters.NOT(case_type_filter('commcare-user'))) if user_ids: case_query = case_query.filter(filters.term(user_field, user_ids)) return case_query.run().aggregations.by_user.counts_by_bucket()
def _get_case_counts_by_user(domain, datespan, case_types=None, is_opened=True, owner_ids=None): date_field = 'opened_on' if is_opened else 'closed_on' user_field = 'opened_by' if is_opened else 'closed_by' case_query = (CaseES() .domain(domain) .filter( filters.date_range( date_field, gte=datespan.startdate.date(), lte=datespan.enddate.date(), ) ) .terms_aggregation(user_field, 'by_user') .size(0)) if case_types: case_query = case_query.case_type(case_types) else: case_query = case_query.filter(filters.NOT(case_type_filter('commcare-user'))) if owner_ids: case_query = case_query.filter(filters.term(user_field, owner_ids)) return case_query.run().aggregations.by_user.counts_by_bucket()
def get_call_center_domains(): result = ( DomainES() .is_active() .filter(filters.term("call_center_config.enabled", True)) .source( [ "name", "default_timezone", "call_center_config.case_type", "call_center_config.case_owner_id", "call_center_config.use_user_location_as_owner", "call_center_config.use_fixtures", ] ) .run() ) def to_domain_lite(hit): config = hit.get("call_center_config", {}) case_type = config.get("case_type", None) case_owner_id = config.get("case_owner_id", None) use_user_location_as_owner = config.get("use_user_location_as_owner", None) if case_type and (case_owner_id or use_user_location_as_owner): # see CallCenterProperties.config_is_valid() return DomainLite( name=hit["name"], default_timezone=hit["default_timezone"], cc_case_type=case_type, use_fixtures=config.get("use_fixtures", True), ) return filter(None, [to_domain_lite(hit) for hit in result.hits])
def handle(self, *args, **options): possible_bad_forms = (FormES().submitted( gte=datetime.date(2016, 10, 14), lt=datetime.date(2016, 10, 20), ).filter(filters.term('backend_id', 'sql')).source('_id')).run().hits form_ids = [form['_id'] for form in possible_bad_forms] blob_db = get_blob_db() for form_id in form_ids: form = FormAccessorSQL.get_form(form_id) for attachment in form.get_attachments(): if attachment.blob_bucket: continue bucket = attachment.blobdb_bucket(remove_dashes=False) attach_id = str(attachment.attachment_id) if blob_db.exists(attachment.blob_id, bucket): FormAccessorSQL.write_blob_bucket(attachment, bucket) logging.info(attach_id + " overwritten blob_bucket_succesfully") else: # This is the default and what we want long term # verify it exists bucket = attachment.blobdb_bucket(remove_dashes=True) if not blob_db.exists(attachment.blob_id, bucket): logger.error( attach_id + " does not exist in either expected bucket")
def get_call_center_domains(): result = (DomainES().is_active().filter( filters.term('call_center_config.enabled', True)).source([ 'name', 'default_timezone', 'call_center_config.case_type', 'call_center_config.case_owner_id', 'call_center_config.use_user_location_as_owner', 'call_center_config.use_fixtures' ]).run()) def to_domain_lite(hit): config = hit.get('call_center_config', {}) case_type = config.get('case_type', None) case_owner_id = config.get('case_owner_id', None) use_user_location_as_owner = config.get('use_user_location_as_owner', None) if case_type and (case_owner_id or use_user_location_as_owner): # see CallCenterProperties.config_is_valid() return DomainLite( name=hit['name'], default_timezone=hit['default_timezone'], cc_case_type=case_type, use_fixtures=config.get('use_fixtures', True), form_datasource_enabled=config.get('form_datasource_enabled', True), case_datasource_enabled=config.get('case_datasource_enabled', True), case_actions_datasource_enabled=config.get( 'case_actions_datasource_enabled', True), ) return [_f for _f in [to_domain_lite(hit) for hit in result.hits] if _f]
def status_filtering(slug, field, prefix, any_field, default): if self.request.GET.get(slug, None) is not None: field_status_filter_query = self.request.GET[slug] if field_status_filter_query == "": #silly double default checker here - set default or the any depending on preference field_status_filter_query = default if field_status_filter_query is None: return else: if field_status_filter_query.startswith(prefix): field_status_prefix = field_status_filter_query elif field_status_filter_query == any_field: field_status_prefix = prefix else: field_status_prefix = None full_query = full_query.filter( filters.term(field, field_status_filter_query.lower())) if field_status_prefix is not None: field_filter = { "prefix": { field: field_status_prefix.lower() } } full_query = full_query.add_query(field_filter)
def test_nested_aggregation(self): json_output = { "query": { "filtered": { "filter": { "and": [ {"match_all": {}} ] }, "query": {"match_all": {}} } }, "aggs": { "users": { "terms": { "field": "user_id", "size": SIZE_LIMIT }, "aggs": { "closed": { "filter": { "term": {"closed": True} } } } }, "total_by_status": { "filters": { "filters": { "closed": {"term": {"closed": True}}, "open": {"term": {"closed": False}} } } } }, "size": SIZE_LIMIT } query = HQESQuery('cases').aggregations([ TermsAggregation("users", 'user_id').aggregation( FilterAggregation('closed', filters.term('closed', True)) ), FiltersAggregation('total_by_status') .add_filter('closed', filters.term('closed', True)) .add_filter('open', filters.term('closed', False)) ]) self.checkQuery(query, json_output)
def aggregation(self): if self.expand_value is None: return MissingAggregation( self.es_alias, self.data_source_field ) return FilterAggregation( self.es_alias, filters.term(self.data_source_field, self.expand_value) )
def to_es_filter(self): if self.show_all: return None if self.is_null: return filters.OR( *[filters.missing(field) for field in self.filter['fields']]) terms = [v.value for v in self.value] return filters.OR(*[filters.term(self.filter['field'], terms)])
def _get_child_cases(self, household_ids): query = (CaseES(es_instance_alias='export') .domain('icds-cas') .case_type('person') .is_closed(False) .source(SOURCE_FIELDS) .filter(filters.term("indices.referenced_id", household_ids)) ) return query.run()
def get_form_counts_for_domains(domains): return ( FormES() .filter(filters.term("domain", domains)) .domain_aggregation() .size(0) .run() .aggregations.domain.counts_by_bucket() )
def test_not_or_rewrite(self): json_output = { "query": { "bool": { "filter": [ { "bool": { "must_not": { "bool": { "should": [ { "term": { "type": "A" } }, { "term": { "type": "B" } } ] } } } }, { "match_all": {} } ], "must": { "match_all": {} } } }, "size": SIZE_LIMIT } query = HQESQuery('cases').filter( filters.NOT( filters.OR(filters.term('type', 'A'), filters.term('type', 'B')) ) ) self.checkQuery(query, json_output)
def test_result_parsing_basic(self): query = HQESQuery('cases').aggregations([ FilterAggregation('closed', filters.term('closed', True)), FilterAggregation('open', filters.term('closed', False)) ]) raw_result = { "aggregations": { "closed": { "doc_count": 1 }, "open": { "doc_count": 2 } } } queryset = ESQuerySet(raw_result, deepcopy(query)) self.assertEqual(queryset.aggregations.closed.doc_count, 1) self.assertEqual(queryset.aggregations.open.doc_count, 2)
def to_es_filter(self): # TODO: support the array and null operators defined at top of class if self._is_dyn_date(): start_date, end_date = get_daterange_start_end_dates(self.value['operator'], *self.value['operand']) return filters.date_range(self.filter.field, gt=start_date, lt=end_date) elif self._is_null(): return filters.missing(self.filter.field) else: terms = [v.value.lower() for v in self.value] return filters.term(self.filter.field, terms)
def es_search_by_params(search_params, domain, reserved_query_params=None): payload = { "filter": { "and": [{ "term": { "domain.exact": domain } }] }, } # ?_search=<json> for providing raw ES query, which is nonetheless restricted here # NOTE: The fields actually analyzed into ES indices differ somewhat from the raw # XML / JSON. if '_search' in search_params: additions = json.loads(search_params['_search']) if 'filter' in additions: payload['filter']['and'].append(additions['filter']) if 'query' in additions: payload['query'] = additions['query'] # ?q=<lucene> if 'q' in search_params: payload['query'] = payload.get('query', {}) payload['query']['query_string'] = { 'query': search_params['q'] } # A bit indirect? # filters are actually going to be a more common case reserved_query_params = RESERVED_QUERY_PARAMS | set(reserved_query_params or []) query_params = { param: value for param, value in search_params.items() if param not in reserved_query_params and not param.endswith('__full') } for consumer in query_param_consumers: try: payload_filter = consumer.consume_params(query_params) except DateTimeError as e: raise Http400("Bad query parameter: {}".format(six.text_type(e))) if payload_filter: payload["filter"]["and"].append(payload_filter) # add unconsumed filters for param, value in query_params.items(): # assume these fields are analyzed in ES so convert to lowercase # Any fields that are not analyzed in ES should be in the ``query_param_consumers`` above value = value.lower() payload["filter"]["and"].append(filters.term(param, value)) return payload
def case_filter(self): now = datetime.datetime.utcnow() fromdate = now - timedelta(days=42) _filters = BaseHNBCReport.base_filters(self) _filters.append(filters.term('pp_case_filter.#value', '1')) _filters.append(filters.range(gte=json_format_date(fromdate))) status = self.request_params.get('PNC_status', '') if status: if status == 'On Time': for i in range(1, 8): _filters.append(filters.term('case_pp_%s_done.#value' % i, 'yes')) else: or_stmt = [] for i in range(1, 8): or_stmt.append(filters.not_term('case_pp_%s_done.#value' % i, 'yes')) if or_stmt: _filters.append(filters.OR(*or_stmt)) return filters.AND(*_filters)
def _get_cursor_query(domain, params, last_date, last_id): query = _get_query(domain, params) return query.filter( filters.OR( filters.AND( filters.term('@indexed_on', last_date), filters.range_filter('_id', gt=last_id), ), case_search.indexed_on(gt=last_date), ) )
def top_five_projects_by_country(request): data = {} if 'country' in request.GET: country = request.GET.get('country') projects = (DomainES().is_active().real_domains() .filter(filters.term('deployment.countries', country)) .sort('cp_n_active_cc_users', True) .source(['internal.area', 'internal.sub_area', 'cp_n_active_cc_users', 'deployment.countries']) .size(5).run().hits) data = {country: projects} return json_response(data)
def get_case_and_action_counts_for_domains(domains): actions_agg = aggregations.NestedAggregation("actions", "actions") aggregation = aggregations.TermsAggregation("domain", "domain").aggregation(actions_agg) results = CaseES().filter(filters.term("domain", domains)).aggregation(aggregation).size(0).run() domains_to_cases = results.aggregations.domain.buckets_dict def _domain_stats(domain_name): cases = domains_to_cases.get(domain_name, None) return {"cases": cases.doc_count if cases else 0, "case_actions": cases.actions.doc_count if cases else 0} return {domain: _domain_stats(domain) for domain in domains}
def to_es_filter(self): # TODO: support the array and null operators defined at top of class if self._is_dyn_date(): start_date, end_date = get_daterange_start_end_dates(self.value['operator'], *self.value['operand']) return filters.date_range(self.filter.field, gt=start_date, lt=end_date) elif self._is_null(): return filters.missing(self.filter.field) elif self._is_list(): terms = [v.value for v in self.value['operand']] return filters.term(self.filter.field, terms) else: return self._scalar_filter.es(self.filter.field, self.value['operand'])
def case_owners(self): # Get user ids for each user that match the demo_user, admin, # Unknown Users, or All Mobile Workers filters mobile_user_and_group_slugs = self.request.GET.getlist(EMWF.slug) user_types = EMWF.selected_user_types(mobile_user_and_group_slugs) special_owner_ids = self.get_special_owner_ids( admin=HQUserType.ADMIN in user_types, unknown=HQUserType.UNKNOWN in user_types, demo=HQUserType.DEMO_USER in user_types, commtrack=HQUserType.COMMTRACK in user_types, ) # Get user ids for each user that was specifically selected selected_user_ids = EMWF.selected_user_ids(mobile_user_and_group_slugs) # Get group ids for each group that was specified selected_reporting_group_ids = EMWF.selected_reporting_group_ids(mobile_user_and_group_slugs) selected_sharing_group_ids = EMWF.selected_sharing_group_ids(mobile_user_and_group_slugs) # Show cases owned by any selected locations, user locations, or their children loc_ids = set(EMWF.selected_location_ids(mobile_user_and_group_slugs) + get_users_location_ids(self.domain, selected_user_ids)) location_owner_ids = get_locations_and_children(loc_ids).location_ids() # Get user ids for each user in specified reporting groups report_group_q = HQESQuery(index="groups").domain(self.domain)\ .doc_type("Group")\ .filter(filters.term("_id", selected_reporting_group_ids))\ .fields(["users"]) user_lists = [group["users"] for group in report_group_q.run().hits] selected_reporting_group_users = list(set().union(*user_lists)) # Get ids for each sharing group that contains a user from selected_reporting_group_users OR a user that was specifically selected share_group_q = (HQESQuery(index="groups") .domain(self.domain) .doc_type("Group") .term("case_sharing", True) .term("users", (selected_reporting_group_users + selected_user_ids)) .fields([])) sharing_group_ids = share_group_q.run().doc_ids owner_ids = list(set().union( special_owner_ids, selected_user_ids, selected_sharing_group_ids, selected_reporting_group_users, sharing_group_ids, location_owner_ids, )) return owner_ids
def couch_sql_migration_stats(): result = ( DomainES() .filter(filters.term('use_sql_backend', False)) .remove_default_filters() .aggregations([ aggregations.SumAggregation('cases', 'cp_n_cases'), aggregations.SumAggregation('forms', 'cp_n_forms'), ]) .size(0).run() ) datadog_gauge('commcare.couch_sql_migration.domains_remaining', int(result.total)) datadog_gauge('commcare.couch_sql_migration.forms_remaining', int(result.aggregations.forms.value)) datadog_gauge('commcare.couch_sql_migration.cases_remaining', int(result.aggregations.cases.value))
def top_five_projects_by_country(request): data = {} internalMode = request.user.is_superuser attributes = ['internal.area', 'internal.sub_area', 'cp_n_active_cc_users', 'deployment.countries'] if internalMode: attributes = ['name', 'internal.organization_name', 'internal.notes'] + attributes if 'country' in request.GET: country = request.GET.get('country') projects = (DomainES().is_active_project().real_domains() .filter(filters.term('deployment.countries', country)) .sort('cp_n_active_cc_users', True).source(attributes).size(5).run().hits) data = {country: projects, 'internal': internalMode} return json_response(data)
def es_search_by_params(search_params, domain, reserved_query_params=None): payload = { "filter": { "and": [ {"term": {"domain.exact": domain}} ] }, } # ?_search=<json> for providing raw ES query, which is nonetheless restricted here # NOTE: The fields actually analyzed into ES indices differ somewhat from the raw # XML / JSON. if '_search' in search_params: additions = json.loads(search_params['_search']) if 'filter' in additions: payload['filter']['and'].append(additions['filter']) if 'query' in additions: payload['query'] = additions['query'] # ?q=<lucene> if 'q' in search_params: payload['query'] = payload.get('query', {}) payload['query']['query_string'] = {'query': search_params['q']} # A bit indirect? # filters are actually going to be a more common case reserved_query_params = RESERVED_QUERY_PARAMS | set(reserved_query_params or []) query_params = { param: value for param, value in search_params.items() if param not in reserved_query_params and not param.endswith('__full') } for consumer in query_param_consumers: try: payload_filter = consumer.consume_params(query_params) except DateTimeError: raise Http400("Bad query parameter") if payload_filter: payload["filter"]["and"].append(payload_filter) # add unconsumed filters for param, value in query_params.items(): payload["filter"]["and"].append(filters.term(param, value)) return payload
def get_call_center_domains(): result = ( DomainES() .is_active() .is_snapshot(False) .filter(filters.term('call_center_config.enabled', True)) .fields(['name', 'default_timezone', 'call_center_config.case_type']) .run() ) def to_domain_lite(hit): return DomainLite( name=hit['name'], default_timezone=hit['default_timezone'], cc_case_type=hit.get('call_center_config.case_type', '') ) return [to_domain_lite(hit) for hit in result.hits]
def get_domains_to_update_es_filter(): """ Returns ES filter to filter domains that are never updated or domains that haven't been updated since a week or domains that have been updated within last week but have new form submissions in the last day. """ last_week = datetime.utcnow() - timedelta(days=7) more_than_a_week_ago = filters.date_range('cp_last_updated', lt=last_week) less_than_a_week_ago = filters.date_range('cp_last_updated', gte=last_week) not_updated = filters.missing('cp_last_updated') domains_submitted_today = (FormES().submitted(gte=datetime.utcnow() - timedelta(days=1)) .terms_aggregation('domain', 'domain').size(0).run().aggregations.domain.keys) return filters.OR( not_updated, more_than_a_week_ago, filters.AND(less_than_a_week_ago, filters.term('name', domains_submitted_today)) )
def get_call_center_domains(): result = ( DomainES() .is_active() .is_snapshot(False) .filter(filters.term("call_center_config.enabled", True)) .fields(["name", "default_timezone", "call_center_config.case_type", "call_center_config.use_fixtures"]) .run() ) def to_domain_lite(hit): return DomainLite( name=hit["name"], default_timezone=hit["default_timezone"], cc_case_type=hit.get("call_center_config.case_type", ""), use_fixtures=hit.get("call_center_config.use_fixtures", True), ) return [to_domain_lite(hit) for hit in result.hits]
def get_case_and_action_counts_for_domains(domains): actions_agg = aggregations.NestedAggregation('actions', 'actions') aggregation = aggregations.TermsAggregation('domain', 'domain').aggregation(actions_agg) results = CaseES() \ .filter(filters.term('domain', domains)) \ .aggregation(aggregation) \ .size(0) \ .run() domains_to_cases = results.aggregations.domain.buckets_dict def _domain_stats(domain_name): cases = domains_to_cases.get(domain_name, None) return { 'cases': cases.doc_count if cases else 0, 'case_actions': cases.actions.doc_count if cases else 0 } return { domain: _domain_stats(domain) for domain in domains }
def get_block_filter(block): return es_filters.term("block_name.#value", block.lower())
def make_filter(term): return es_filters.term(prop, term)
def get_gp_filter(gp): owner_ids = [user._id for user in self.users if getattr(user, 'user_data', {}).get('gp') in self.gp] return es_filters.term("owner_id", owner_ids)
def to_es_filter(self): return esfilters.term('name', self.case_name)
def consume_params(self, raw_params): value = raw_params.pop(self.param, None) if value: return filters.term(self.term, value)
def test_result_parsing_complex(self): query = HQESQuery('cases').aggregation( TermsAggregation("users", 'user_id').aggregation( FilterAggregation('closed', filters.term('closed', True)) ).aggregation( FilterAggregation('open', filters.term('closed', False)) ) ).aggregation( RangeAggregation('by_date', 'name', [ AggregationRange(end='c'), AggregationRange(start='f'), AggregationRange(start='k', end='p') ]) ) raw_result = { "aggregations": { "users": { "buckets": [ { "closed": { "doc_count": 0 }, "doc_count": 2, "key": "user1", "open": { "doc_count": 2 } } ], "doc_count_error_upper_bound": 0, "sum_other_doc_count": 0 }, "by_date": { "buckets": { "*-c": { "to": "c", "doc_count": 3 }, "f-*": { "from": "f", "doc_count": 8 }, "k-p": { "from": "k", "to": "p", "doc_count": 6 } } } }, } queryset = ESQuerySet(raw_result, deepcopy(query)) self.assertEqual(queryset.aggregations.users.buckets.user1.key, 'user1') self.assertEqual(queryset.aggregations.users.buckets.user1.doc_count, 2) self.assertEqual(queryset.aggregations.users.buckets.user1.closed.doc_count, 0) self.assertEqual(queryset.aggregations.users.buckets.user1.open.doc_count, 2) self.assertEqual(queryset.aggregations.users.buckets_dict['user1'].open.doc_count, 2) self.assertEqual(queryset.aggregations.users.counts_by_bucket(), { 'user1': 2 }) self.assertEqual(queryset.aggregations.by_date.counts_by_bucket(), { '*-c': 3, 'f-*': 8, 'k-p': 6, })