def _get_case_counts_by_user(domain, datespan, case_types=None, is_opened=True, owner_ids=None): date_field = 'opened_on' if is_opened else 'closed_on' user_field = 'opened_by' if is_opened else 'closed_by' case_query = (CaseES() .domain(domain) .filter( filters.date_range( date_field, gte=datespan.startdate.date(), lte=datespan.enddate.date(), ) ) .terms_aggregation(user_field, 'by_user') .size(0)) if case_types: case_query = case_query.case_type(case_types) else: case_query = case_query.filter(filters.NOT(case_type_filter('commcare-user'))) if owner_ids: case_query = case_query.filter(filters.term(user_field, owner_ids)) return case_query.run().aggregations.by_user.counts_by_bucket()
def _comparison_raw(case_property_name_raw, op, value_raw, node): if not isinstance(case_property_name_raw, Step): raise CaseFilterError( _("We didn't understand what you were trying to do with {}"). format(serialize(node)), serialize(node)) case_property_name = serialize(case_property_name_raw) value = unwrap_value(value_raw, context) if op in [EQ, NEQ]: query = case_property_query(case_property_name, value, fuzzy=context.fuzzy) if op == NEQ: query = filters.NOT(query) return query else: try: return case_property_range_query( case_property_name, **{RANGE_OP_MAPPING[op]: value}) except (TypeError, ValueError): raise CaseFilterError( _("The right hand side of a comparison must be a number or date. " "Dates must be surrounded in quotation marks"), serialize(node), )
def _get_form_counts_by_date(domain, user_ids, datespan, timezone, is_submission_time): form_query = (FormES().domain(domain).user_id(user_ids)) for xmlns in SYSTEM_FORM_XMLNS_MAP.keys(): form_query = form_query.filter(filters.NOT(xmlns_filter(xmlns))) if is_submission_time: form_query = (form_query.submitted( gte=datespan.startdate.date(), lte=datespan.enddate.date()).submitted_histogram(timezone.zone)) else: form_query = (form_query.completed( gte=datespan.startdate.date(), lte=datespan.enddate.date()).completed_histogram(timezone.zone)) form_query = form_query.size(0) results = form_query.run().aggregations.date_histogram.buckets_list # Convert timestamp into timezone aware datetime. Must divide timestamp by 1000 since python's # fromtimestamp takes a timestamp in seconds, whereas elasticsearch's timestamp is in milliseconds results = list( map( lambda result: (datetime.fromtimestamp(result.key // 1000).date(). isoformat(), result.doc_count), results, )) return dict(results)
def _get_case_counts_by_user(domain, datespan, case_types=None, is_opened=True, user_ids=None, export=False): date_field = 'opened_on' if is_opened else 'closed_on' user_field = 'opened_by' if is_opened else 'closed_by' es_instance = ES_EXPORT_INSTANCE if export else ES_DEFAULT_INSTANCE case_query = (CaseES(es_instance_alias=es_instance).domain(domain).filter( filters.date_range( date_field, gte=datespan.startdate.date(), lte=datespan.enddate.date(), )).terms_aggregation(user_field, 'by_user').size(0)) if case_types: case_query = case_query.case_type(case_types) else: case_query = case_query.filter( filters.NOT(case_type_filter('commcare-user'))) if user_ids: case_query = case_query.filter(filters.term(user_field, user_ids)) return case_query.run().aggregations.by_user.counts_by_bucket()
def _get_case_case_counts_by_owner(domain, datespan, case_types, is_total=False, owner_ids=None, export=False): es_instance = ES_EXPORT_INSTANCE if export else ES_DEFAULT_INSTANCE case_query = (CaseES( es_instance_alias=es_instance).domain(domain).opened_range( lte=datespan.enddate.date()).NOT( closed_range_filter( lt=datespan.startdate.date())).terms_aggregation( 'owner_id', 'owner_id').size(0)) if case_types: case_query = case_query.filter({"terms": {"type.exact": case_types}}) else: case_query = case_query.filter( filters.NOT(case_type_filter('commcare-user'))) if not is_total: case_query = case_query.active_in_range( gte=datespan.startdate.date(), lte=datespan.enddate.date(), ) if owner_ids: case_query = case_query.owner(owner_ids) return case_query.run().aggregations.owner_id.counts_by_bucket()
def _equality(node): """Returns the filter for an equality operation (=, !=) """ acceptable_rhs_types = (int, str, float, FunctionCall, UnaryExpression) if isinstance(node.left, Step) and (isinstance(node.right, acceptable_rhs_types)): # This is a leaf node case_property_name = serialize(node.left) value = _unwrap_function(node.right) if value == '': q = case_property_missing(case_property_name) elif fuzzy: q = case_property_text_query(case_property_name, value, fuzziness='AUTO') else: q = exact_case_property_text_query(case_property_name, value) if node.op == '!=': return filters.NOT(q) return q if isinstance(node.right, Step): _raise_step_RHS(node) raise CaseFilterError( _("We didn't understand what you were trying to do with {}"). format(serialize(node)), serialize(node))
def _equality(node): """Returns the filter for an equality operation (=, !=) """ if isinstance(node.left, Step) and (isinstance( node.right, integer_types + (string_types, float, FunctionCall))): # This is a leaf node case_property_name = serialize(node.left) value = _unwrap_function(node.right) if value == '': q = case_property_missing(case_property_name) else: q = exact_case_property_text_query(case_property_name, value) if node.op == '!=': return filters.NOT(q) return q if isinstance(node.right, Step): _raise_step_RHS(node) raise CaseFilterError( _("We didn't understand what you were trying to do with {}"). format(serialize(node)), serialize(node))
def _get_form_counts_by_date(domain, user_ids, datespan, timezone, is_submission_time): form_query = (FormES().domain(domain).user_id(user_ids)) for xmlns in SYSTEM_FORM_XMLNS_MAP.keys(): form_query = form_query.filter(filters.NOT(xmlns_filter(xmlns))) if is_submission_time: form_query = (form_query.submitted( gte=datespan.startdate.date(), lte=datespan.enddate.date()).submitted_histogram(timezone.zone)) else: form_query = (form_query.completed( gte=datespan.startdate.date(), lte=datespan.enddate.date()).completed_histogram(timezone.zone)) form_query = form_query.size(0) results = form_query.run().aggregations.date_histogram.buckets_list # Convert timestamp from millis -> seconds -> aware datetime # ES bucket key is an epoch timestamp relative to the timezone specified, # so pass timezone into fromtimestamp() to create an accurate datetime, otherwise will be treated as UTC results = list( map( lambda result: (datetime.fromtimestamp(result.key // 1000, timezone).date(). isoformat(), result.doc_count), results, )) return dict(results)
def consume_params(self, raw_params): value = raw_params.pop(self.param, None) if value: return filters.OR( filters.AND(filters.NOT(filters.missing(self.param)), filters.range_filter(self.param, **value)), filters.AND(filters.missing(self.param), filters.range_filter("received_on", **value)))
def test_not_filter_edge_case(self): self._setup_data() query = FormES().remove_default_filters().filter( filters.NOT(filters.OR( filters.term('domain', 'd'), filters.term('app_id', 'a') )) ) self.assertEqual(query.run().doc_ids, ['doc3'])
def all_project_data_filter(domain, mobile_user_and_group_slugs): # Show everything but stuff we know for sure to exclude user_types = EMWF.selected_user_types(mobile_user_and_group_slugs) ids_to_exclude = _get_special_owner_ids( domain=domain, admin=HQUserType.ADMIN not in user_types, unknown=HQUserType.UNKNOWN not in user_types, web=HQUserType.WEB not in user_types, demo=HQUserType.DEMO_USER not in user_types, commtrack=False, ) return filters.NOT(case_es.owner(ids_to_exclude))
def subcase(node, context): """ Supports the following syntax: - subcase-exists('parent', {subcase filter} ) - subcase-count('host', {subcase_filter} ) {=, !=, >, <, >=, <=} {integer value} """ subcase_query = _parse_normalize_subcase_query(node) ids = _get_parent_case_ids_matching_subcase_query(subcase_query, context) if subcase_query.invert: if not ids: return filters.match_all() return filters.NOT(filters.doc_id(ids)) # uncomment once we are on ES > 2.4 # if not ids: # return filters.match_none() return filters.doc_id(ids)
def _get_form_counts_by_user(domain, datespan, is_submission_time, user_ids=None): form_query = FormES().domain(domain).filter(filters.NOT(xmlns_filter(SYSTEM_FORM_XMLNS))) if is_submission_time: form_query = (form_query .submitted(gte=datespan.startdate.date(), lte=datespan.enddate.date())) else: form_query = (form_query .completed(gte=datespan.startdate.date(), lte=datespan.enddate.date())) if user_ids: form_query = form_query.user_id(user_ids) form_query = (form_query .user_aggregation() .size(0)) return form_query.run().aggregations.user.counts_by_bucket()
def test_not_or_rewrite(self): json_output = { "query": { "bool": { "filter": [ { "bool": { "must_not": { "bool": { "should": [ { "term": { "type": "A" } }, { "term": { "type": "B" } } ] } } } }, { "match_all": {} } ], "must": { "match_all": {} } } }, "size": SIZE_LIMIT } query = HQESQuery('cases').filter( filters.NOT( filters.OR(filters.term('type', 'A'), filters.term('type', 'B')) ) ) self.checkQuery(query, json_output)
def _get_users_filter(self, mobile_user_and_group_slugs): truthy_only = functools.partial(filter, None) users_data = EMWF.pull_users_and_groups(self.domain, mobile_user_and_group_slugs, include_inactive=True) selected_user_types = EMWF.selected_user_types( mobile_user_and_group_slugs) all_mobile_workers_selected = HQUserType.REGISTERED in selected_user_types if not all_mobile_workers_selected or users_data.admin_and_demo_users: return form_es.user_id( truthy_only(u.user_id for u in users_data.combined_users)) else: negated_ids = util.get_all_users_by_domain( self.domain, user_filter=HQUserType.all_but_users(), simplified=True, ) return es_filters.NOT( form_es.user_id( truthy_only(user.user_id for user in negated_ids)))
def _get_form_counts_by_user(domain, datespan, is_submission_time, user_ids=None, export=False): es_instance = ES_EXPORT_INSTANCE if export else ES_DEFAULT_INSTANCE form_query = FormES(es_instance_alias=es_instance).domain(domain) for xmlns in SYSTEM_FORM_XMLNS_MAP.keys(): form_query = form_query.filter(filters.NOT(xmlns_filter(xmlns))) if is_submission_time: form_query = (form_query.submitted(gte=datespan.startdate.date(), lte=datespan.enddate.date())) else: form_query = (form_query.completed(gte=datespan.startdate.date(), lte=datespan.enddate.date())) if user_ids: form_query = form_query.user_id(user_ids) form_query = (form_query.user_aggregation().size(0)) return form_query.run().aggregations.user.counts_by_bucket()
def test_not_and_rewrite(self): json_output = { "query": { "filtered": { "filter": { "and": [{ 'or': ( { "not": { "term": { "type": "A" } } }, { "not": { "term": { "type": "B" } } }, ) }, { "match_all": {} }] }, "query": { "match_all": {} } } }, "size": SIZE_LIMIT } query = HQESQuery('cases').filter( filters.NOT( filters.AND(filters.term('type', 'A'), filters.term('type', 'B')))) self.checkQuery(query, json_output)
def get_assigned_patients(self): """get list of patients and their submissions on who this chw is assigned as primary hp""" source = [ "_id", "name", "pactid.#value", "hp_status.#value", "dot_status.#value" ] case_query = (get_base_case_es_query(0, 100).filter( filters.term('type', PACT_CASE_TYPE)).filter( filters.term( 'hp.#value', self.get_user().raw_username)).filter( filters.NOT( filters.term( 'hp_status.#value', 'discharged'))).source(source).raw_query) chw_patients_res = self.case_es.run_query(case_query) assigned_patients = chw_patients_res['hits']['hits'] for x in assigned_patients: x['info_url'] = self.pact_case_link(x['_id']) if x['dot_status.#value'] is not None or x[ 'dot_status.#value'] != "": x['dot_url'] = self.pact_dot_link(x['_id']) return sorted(assigned_patients, key=lambda x: int(x['pactid.#value']))
def _get_case_case_counts_by_owner(domain, datespan, case_types, is_total=False, owner_ids=None): case_query = (CaseES() .domain(domain) .opened_range(lte=datespan.enddate) .NOT(closed_range_filter(lt=datespan.startdate)) .terms_aggregation('owner_id', 'owner_id') .size(0)) if case_types: case_query = case_query.filter({"terms": {"type.exact": case_types}}) else: case_query = case_query.filter(filters.NOT(case_type_filter('commcare-user'))) if not is_total: case_query = case_query.active_in_range( gte=datespan.startdate, lte=datespan.enddate ) if owner_ids: case_query = case_query.owner(owner_ids) return case_query.run().aggregations.owner_id.counts_by_bucket()
def to_es_filter(self): return esfilters.NOT(self.operand_filter.to_es_filter())
def not_(node, context): from corehq.apps.case_search.filter_dsl import build_filter_from_ast confirm_args_count(node, 1) return filters.NOT(build_filter_from_ast(node.args[0], context))
def filter_users_in_test_locations(couch_user, domain, user_query): if couch_user.get_role(domain).name == CPMU_ROLE_NAME: test_location_ids = find_test_location_ids(domain) user_query = user_query.filter( filters.NOT(location_filter(test_location_ids))) return user_query
def _get_parent_case_ids_matching_subcase_query(subcase_query, context): """Get a list of case IDs for cases that have a subcase with the given index identifier and matching the subcase predicate filter. Only cases with `[>,=] case_count_gt` subcases will be returned. """ # TODO: validate that the subcase filter doesn't contain any ancestor filtering from corehq.apps.case_search.filter_dsl import ( MAX_RELATED_CASES, build_filter_from_ast, ) if subcase_query.subcase_filter: subcase_filter = build_filter_from_ast(subcase_query.subcase_filter, context) else: subcase_filter = filters.match_all() index_identifier_filter = filters.term('indices.identifier', subcase_query.index_identifier) index_query = queries.nested( 'indices', queries.filtered( queries.match_all(), filters.AND( index_identifier_filter, filters.NOT(filters.term('indices.referenced_id', '')) # exclude deleted indices ) ) ) es_query = ( CaseSearchES().domain(context.domain) .filter(index_query) .filter(subcase_filter) .aggregation( aggregations.NestedAggregation( 'indices', 'indices', ).aggregation( aggregations.FilterAggregation( 'matching_indices', index_identifier_filter ).aggregation( aggregations.TermsAggregation( 'referenced_id', 'indices.referenced_id' ) ) ) ) ) if es_query.count() > MAX_RELATED_CASES: from ..exceptions import TooManyRelatedCasesError raise TooManyRelatedCasesError( _("The related case lookup you are trying to perform would return too many cases"), serialize(subcase_query.subcase_filter) ) counts_by_parent_id = es_query.run().aggregations.indices.matching_indices.referenced_id.counts_by_bucket() if subcase_query.op == '>' and subcase_query.count <= 0: return list(counts_by_parent_id) return [ case_id for case_id, count in counts_by_parent_id.items() if subcase_query.filter_count(count) ]