def _equality(node): """Returns the filter for an equality operation (=, !=) """ if isinstance(node.left, Step) and (isinstance( node.right, integer_types + (string_types, float, FunctionCall))): # This is a leaf node case_property_name = serialize(node.left) value = _unwrap_function(node.right) if value == '': q = case_property_missing(case_property_name) else: q = exact_case_property_text_query(case_property_name, value) if node.op == '!=': return filters.NOT(q) return q if isinstance(node.right, Step): _raise_step_RHS(node) raise CaseFilterError( _("We didn't understand what you were trying to do with {}"). format(serialize(node)), serialize(node))
def unwrap_value(value, context): """Returns the value of the node if it is wrapped in a function, otherwise just returns the node """ if isinstance(value, Step): raise CaseFilterError( _("You cannot reference a case property on the right side " "of an operation. If \"{}\" is meant to be a value, please surround it with " "quotation marks").format(serialize(value)), "") acceptable_types = (int, str, float, bool, FunctionCall, UnaryExpression) if not isinstance(value, acceptable_types): raise CaseFilterError(_("Unexpected type for value expression"), serialize(value)) if isinstance(value, UnaryExpression) and value.op == '-': return -1 * value.right if not isinstance(value, FunctionCall): return value try: return XPATH_VALUE_FUNCTIONS[value.name](value, context) except KeyError: raise CaseFilterError( _("We don't know what to do with the function \"{}\". Accepted functions are: {}" ).format( value.name, ", ".join(list(XPATH_VALUE_FUNCTIONS.keys())), ), serialize(value)) except XPathFunctionException as e: raise CaseFilterError(str(e), serialize(value))
def _comparison_raw(case_property_name_raw, op, value_raw, node): if not isinstance(case_property_name_raw, Step): raise CaseFilterError( _("We didn't understand what you were trying to do with {}"). format(serialize(node)), serialize(node)) case_property_name = serialize(case_property_name_raw) value = unwrap_value(value_raw, context) if op in [EQ, NEQ]: query = case_property_query(case_property_name, value, fuzzy=context.fuzzy) if op == NEQ: query = filters.NOT(query) return query else: try: return case_property_range_query( case_property_name, **{RANGE_OP_MAPPING[op]: value}) except (TypeError, ValueError): raise CaseFilterError( _("The right hand side of a comparison must be a number or date. " "Dates must be surrounded in quotation marks"), serialize(node), )
def _equality(node): """Returns the filter for an equality operation (=, !=) """ if isinstance(node.left, Step) and ( isinstance(node.right, integer_types + (string_types, float, FunctionCall))): # This is a leaf node case_property_name = serialize(node.left) value = _unwrap_function(node.right) if value == '': q = case_property_missing(case_property_name) else: q = exact_case_property_text_query(case_property_name, value) if node.op == '!=': return filters.NOT(q) return q if isinstance(node.right, Step): _raise_step_RHS(node) raise CaseFilterError( _("We didn't understand what you were trying to do with {}").format(serialize(node)), serialize(node) )
def _raise_step_RHS(node): raise CaseFilterError( _("You cannot reference a case property on the right side " "of a boolean operation. If \"{}\" is meant to be a value, please surround it with " "quotation marks").format(serialize(node.right)), serialize(node) )
def visit(node): if isinstance(node, FunctionCall): if node.name in XPATH_QUERY_FUNCTIONS: return XPATH_QUERY_FUNCTIONS[node.name](node, context) else: raise XPathFunctionException( _("'{name}' is not a valid standalone function").format( name=node.name), serialize(node)) if not hasattr(node, 'op'): raise CaseFilterError( _("Your search query is required to have at least one boolean operator ({boolean_ops})" ).format(boolean_ops=", ".join(COMPARISON_OPERATORS), ), serialize(node)) if _is_ancestor_case_lookup(node): # this node represents a filter on a property for a related case return _walk_ancestor_cases(node) if _is_subcase_count(node): return XPATH_QUERY_FUNCTIONS['subcase-count'](node, context) if node.op in COMPARISON_OPERATORS: # This node is a leaf return _comparison(node) if node.op in list(OPERATOR_MAPPING.keys()): # This is another branch in the tree return OPERATOR_MAPPING[node.op](visit(node.left), visit(node.right)) raise CaseFilterError( _("We don't know what to do with operator '{}'. Please try reformatting your query." .format(node.op)), serialize(node))
def visit(node): if not hasattr(node, 'op'): raise CaseFilterError( _("Your search query is required to have at least one boolean operator ({boolean_ops})").format( boolean_ops=", ".join(list(COMPARISON_MAPPING.keys()) + [EQ, NEQ]), ), serialize(node) ) if _is_related_case_lookup(node): # this node represents a filter on a property for a related case return _walk_related_cases(node) if node.op in [EQ, NEQ]: # This node is a leaf return _equality(node) if node.op in list(COMPARISON_MAPPING.keys()): # This node is a leaf return _comparison(node) if node.op in list(OPERATOR_MAPPING.keys()): # This is another branch in the tree return OPERATOR_MAPPING[node.op](visit(node.left), visit(node.right)) raise CaseFilterError( _("We don't know what to do with operator '{}'. Please try reformatting your query.".format(node.op)), serialize(node) )
def _equality(node): """Returns the filter for an equality operation (=, !=) """ acceptable_rhs_types = (int, str, float, FunctionCall, UnaryExpression) if isinstance(node.left, Step) and (isinstance(node.right, acceptable_rhs_types)): # This is a leaf node case_property_name = serialize(node.left) value = _unwrap_function(node.right) if value == '': q = case_property_missing(case_property_name) elif fuzzy: q = case_property_text_query(case_property_name, value, fuzziness='AUTO') else: q = exact_case_property_text_query(case_property_name, value) if node.op == '!=': return filters.NOT(q) return q if isinstance(node.right, Step): _raise_step_RHS(node) raise CaseFilterError( _("We didn't understand what you were trying to do with {}"). format(serialize(node)), serialize(node))
def visit(node): if not hasattr(node, 'op'): raise CaseFilterError( _("Your search query is required to have at least one boolean operator ({boolean_ops})" ).format(boolean_ops=", ".join( list(COMPARISON_MAPPING.keys()) + [EQ, NEQ]), ), serialize(node)) if _is_related_case_lookup(node): # this node represents a filter on a property for a related case return _walk_related_cases(node) if node.op in [EQ, NEQ]: # This node is a leaf return _equality(node) if node.op in list(COMPARISON_MAPPING.keys()): # This node is a leaf return _comparison(node) if node.op in list(OPERATOR_MAPPING.keys()): # This is another branch in the tree return OPERATOR_MAPPING[node.op](visit(node.left), visit(node.right)) raise CaseFilterError( _("We don't know what to do with operator '{}'. Please try reformatting your query." .format(node.op)), serialize(node))
def _comparison(node): """Returns the filter for a comparison operation (>, <, >=, <=) """ try: case_property_name = serialize(node.left) value = node.right return case_property_range_query(case_property_name, **{COMPARISON_MAPPING[node.op]: value}) except TypeError: raise CaseFilterError( _("The right hand side of a comparison must be a number or date"), serialize(node), )
def _comparison(node): """Returns the filter for a comparison operation (>, <, >=, <=) """ try: case_property_name = serialize(node.left) value = _unwrap_function(node.right) return case_property_range_query(case_property_name, **{COMPARISON_MAPPING[node.op]: value}) except (TypeError, ValueError): raise CaseFilterError( _("The right hand side of a comparison must be a number or date. " "Dates must be surrounded in quotation marks"), serialize(node), )
def _unwrap_function(node): """Returns the value of the node if it is wrapped in a function, otherwise just returns the node """ if not isinstance(node, FunctionCall): return node try: return XPATH_FUNCTIONS[node.name](node) except KeyError: raise CaseFilterError( _("We don't know what to do with the function \"{}\". Accepted functions are: {}" ).format( node.name, ", ".join(list(XPATH_FUNCTIONS.keys())), ), serialize(node)) except XPathFunctionException as e: raise CaseFilterError(six.text_type(e), serialize(node))
def _parent_property_lookup(node): """given a node of the form `parent/foo = 'thing'`, return all case_ids where `foo = thing` """ if isinstance(node.right, Step): _raise_step_RHS(node) new_query = "{} {} '{}'".format(serialize(node.left.right), node.op, node.right) return CaseSearchES().domain(domain).xpath_query(domain, new_query).scroll_ids()
def _unwrap_function(node): """Returns the value of the node if it is wrapped in a function, otherwise just returns the node """ if not isinstance(node, FunctionCall): return node try: return XPATH_FUNCTIONS[node.name](node) except KeyError: raise CaseFilterError( _("We don't know what to do with the function \"{}\". Accepted functions are: {}").format( node.name, ", ".join(list(XPATH_FUNCTIONS.keys())), ), serialize(node) ) except XPathFunctionException as e: raise CaseFilterError(six.text_type(e), serialize(node))
def visit(node): if not hasattr(node, 'op'): return if node.op in ([EQ, NEQ] + list(COMPARISON_MAPPING.keys())): columns.add(serialize(node.left)) if node.op in list(OPERATOR_MAPPING.keys()): visit(node.left) visit(node.right)
def date_add(node, context): from corehq.apps.case_search.dsl_utils import unwrap_value assert node.name == 'date-add' confirm_args_count(node, 3) date_arg = unwrap_value(node.args[0], context) date_value = _value_to_date(node, date_arg) interval_type = unwrap_value(node.args[1], context) interval_types = ("days", "weeks", "months", "years") if interval_type not in interval_types: raise XPathFunctionException( _("The \"date-add\" function expects the 'interval' argument to be one of {types}" ).format(types=interval_types), serialize(node)) quantity = unwrap_value(node.args[2], context) if isinstance(quantity, str): try: quantity = float(quantity) except (ValueError, TypeError): raise XPathFunctionException( _("The \"date-add\" function expects the interval quantity to be a numeric value" ), serialize(node)) if not isinstance(quantity, (int, float)): raise XPathFunctionException( _("The \"date-add\" function expects the interval quantity to be a numeric value" ), serialize(node)) if interval_type in ("years", "months") and int(quantity) != quantity: raise XPathFunctionException( _("Non-integer years and months are ambiguous and not supported by the \"date-add\" function" ), serialize(node)) try: result = date_value + relativedelta(**{interval_type: quantity}) except Exception as e: # catchall in case of an unexpected error raise XPathFunctionException(str(e), serialize(node)) return result.strftime(ISO_DATE_FORMAT)
def _value_to_date(node, value): if isinstance(value, int): parsed_date = datetime.date(1970, 1, 1) + datetime.timedelta(days=value) elif isinstance(value, str): try: parsed_date = parse_date(value) except ValueError: raise XPathFunctionException( _("{} is not a valid date").format(value), serialize(node)) elif isinstance(value, datetime.date): parsed_date = value else: parsed_date = None if parsed_date is None: raise XPathFunctionException( _("Invalid date value. Dates must be an integer or a string of the format \"YYYY-mm-dd\"" ), serialize(node)) return parsed_date
def _parent_property_lookup(node): """given a node of the form `parent/foo = 'thing'`, return all case_ids where `foo = thing` """ es_filter = _comparison_raw(node.left.right, node.op, node.right, node) es_query = CaseSearchES().domain(context.domain).filter(es_filter) if es_query.count() > MAX_RELATED_CASES: new_query = '{} {} "{}"'.format(serialize(node.left.right), node.op, node.right) raise TooManyRelatedCasesError( _("The related case lookup you are trying to perform would return too many cases" ), new_query) return es_query.scroll_ids()
def _parent_property_lookup(node): """given a node of the form `parent/foo = 'thing'`, return all case_ids where `foo = thing` """ if isinstance(node.right, Step): _raise_step_RHS(node) new_query = "{} {} '{}'".format(serialize(node.left.right), node.op, node.right) es_query = CaseSearchES().domain(domain).xpath_query(domain, new_query) if es_query.count() > MAX_RELATED_CASES: raise CaseFilterError( _("The related case lookup you are trying to perform would return too many cases" ), new_query) return es_query.scroll_ids()
def _parent_property_lookup(node): """given a node of the form `parent/foo = 'thing'`, return all case_ids where `foo = thing` """ if isinstance(node.right, Step): _raise_step_RHS(node) new_query = "{} {} '{}'".format(serialize(node.left.right), node.op, node.right) es_query = CaseSearchES().domain(domain).xpath_query(domain, new_query) if es_query.count() > MAX_RELATED_CASES: raise CaseFilterError( _("The related case lookup you are trying to perform would return too many cases"), new_query ) return es_query.scroll_ids()
def _walk_related_cases(node): """Return a query that will fulfill the filter on the related case. :param node: a node returned from eulxml.xpath.parse of the form `parent/grandparent/property = 'value'` Since ES has no way of performing joins, we filter down in stages: 1. Find the ids of all cases where the condition is met 2. Walk down the case hierarchy, finding all related cases with the right identifier to the ids found in (1). 3. Return the lowest of these ids as an related case query filter """ # fetch the ids of the highest level cases that match the case_property # i.e. all the cases which have `property = 'value'` ids = _parent_property_lookup(node) # get the related case path we need to walk, i.e. `parent/grandparent/property` n = node.left while _is_related_case_lookup(n): # This walks down the tree and finds case ids that match each identifier # This is basically performing multiple "joins" to find related cases since ES # doesn't have a way to relate models together # Get the path to the related case, e.g. `parent/grandparent` # On subsequent run throughs, it walks down the tree (e.g. n = [parent, /, grandparent]) n = n.left identifier = serialize( n.right) # the identifier at this step, e.g. `grandparent` # get the ids of the cases that point at the previous level's cases # this has the potential of being a very large list ids = _child_case_lookup(ids, identifier=identifier) if not ids: break # after walking the full tree, get the final level we are interested in, i.e. `parent` final_identifier = serialize(n.left) return reverse_index_case_query(ids, final_identifier)
def _walk_related_cases(node): """Return a query that will fulfill the filter on the related case. :param node: a node returned from eulxml.xpath.parse of the form `parent/grandparent/property = 'value'` Since ES has no way of performing joins, we filter down in stages: 1. Find the ids of all cases where the condition is met 2. Walk down the case hierarchy, finding all related cases with the right identifier to the ids found in (1). 3. Return the lowest of these ids as an related case query filter """ # fetch the ids of the highest level cases that match the case_property # i.e. all the cases which have `property = 'value'` ids = _parent_property_lookup(node) # get the related case path we need to walk, i.e. `parent/grandparent/property` n = node.left while _is_related_case_lookup(n): # This walks down the tree and finds case ids that match each identifier # This is basically performing multiple "joins" to find related cases since ES # doesn't have a way to relate models together # Get the path to the related case, e.g. `parent/grandparent` # On subsequent run throughs, it walks down the tree (e.g. n = [parent, /, grandparent]) n = n.left identifier = serialize(n.right) # the identifier at this step, e.g. `grandparent` # get the ids of the cases that point at the previous level's cases # this has the potential of being a very large list ids = _child_case_lookup(ids, identifier=identifier) if not ids: break # after walking the full tree, get the final level we are interested in, i.e. `parent` final_identifier = serialize(n.left) return reverse_index_case_query(ids, final_identifier)
def as_tuple(self): subcase_filter = serialize(self.subcase_filter) if self.subcase_filter else None return ( self.index_identifier, subcase_filter, self.op, self.count, self.invert )
def _extract_subcase_query_parts(node): current_node = node if isinstance(node, BinaryExpression): count_op = node.op case_count = node.right current_node = node.left if count_op not in [">", "<", "<=", ">=", "=", "!="]: raise XPathFunctionException( _("Unsupported operator for use with 'subcase-count': {op}").format(op=count_op), serialize(node) ) if isinstance(case_count, UnaryExpression): if case_count.op == '+': case_count = case_count.right else: raise XPathFunctionException( _("'subcase-count' must be compared to a positive integer"), serialize(node) ) try: case_count = int(case_count) except (ValueError, TypeError): raise XPathFunctionException( _("'subcase-count' must be compared to a positive integer"), serialize(node) ) if not isinstance(current_node, FunctionCall) or str(current_node.name) != "subcase-count": raise XPathFunctionException( _("XPath incorrectly formatted. Expected 'subcase-count'"), serialize(current_node) ) else: if not isinstance(node, FunctionCall) or str(node.name) != "subcase-exists": raise XPathFunctionException( _("XPath incorrectly formatted. Expected 'subcase-exists'"), serialize(node) ) case_count = 0 count_op = ">" args = current_node.args if not 1 <= len(args) <= 2: raise XPathFunctionException( _("'{name}' expects one or two arguments").format(name=current_node.name), serialize(node) ) index_identifier = args[0] subcase_filter = args[1] if len(args) == 2 else None if not isinstance(index_identifier, str): raise XPathFunctionException( _("'{name}' error. Index identifier must be a string").format(name=current_node.name), serialize(node) ) return index_identifier, subcase_filter, count_op, case_count
def _get_parent_case_ids_matching_subcase_query(subcase_query, context): """Get a list of case IDs for cases that have a subcase with the given index identifier and matching the subcase predicate filter. Only cases with `[>,=] case_count_gt` subcases will be returned. """ # TODO: validate that the subcase filter doesn't contain any ancestor filtering from corehq.apps.case_search.filter_dsl import ( MAX_RELATED_CASES, build_filter_from_ast, ) if subcase_query.subcase_filter: subcase_filter = build_filter_from_ast(subcase_query.subcase_filter, context) else: subcase_filter = filters.match_all() index_identifier_filter = filters.term('indices.identifier', subcase_query.index_identifier) index_query = queries.nested( 'indices', queries.filtered( queries.match_all(), filters.AND( index_identifier_filter, filters.NOT(filters.term('indices.referenced_id', '')) # exclude deleted indices ) ) ) es_query = ( CaseSearchES().domain(context.domain) .filter(index_query) .filter(subcase_filter) .aggregation( aggregations.NestedAggregation( 'indices', 'indices', ).aggregation( aggregations.FilterAggregation( 'matching_indices', index_identifier_filter ).aggregation( aggregations.TermsAggregation( 'referenced_id', 'indices.referenced_id' ) ) ) ) ) if es_query.count() > MAX_RELATED_CASES: from ..exceptions import TooManyRelatedCasesError raise TooManyRelatedCasesError( _("The related case lookup you are trying to perform would return too many cases"), serialize(subcase_query.subcase_filter) ) counts_by_parent_id = es_query.run().aggregations.indices.matching_indices.referenced_id.counts_by_bucket() if subcase_query.op == '>' and subcase_query.count <= 0: return list(counts_by_parent_id) return [ case_id for case_id, count in counts_by_parent_id.items() if subcase_query.filter_count(count) ]