def _evaluate_expression( expression: BooleanRuleExpression, pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]], ranges_left: Set[Range], steps_for_debugging: List[Dict[str, Any]], flags: Optional[Dict[str, Any]] = None, ) -> Set[Range]: if expression.operator in OPERATORS_WITH_CHILDREN: if expression.children is None: raise SemgrepError( f"operator '{expression.operator}' must have child operators") # recurse on the nested expressions if expression.operator == OPERATORS.AND_EITHER: # remove anything that does not equal one of these ranges evaluated_ranges = [ _evaluate_expression( expr, pattern_ids_to_pattern_matches, ranges_left.copy(), steps_for_debugging, flags=flags, ) for expr in expression.children ] ranges_left.intersection_update(flatten(evaluated_ranges)) elif expression.operator == OPERATORS.AND_ALL: # chain intersection eagerly; intersect for every AND'ed child for expr in expression.children: remainining_ranges = _evaluate_expression( expr, pattern_ids_to_pattern_matches, ranges_left.copy(), steps_for_debugging, flags=flags, ) ranges_left.intersection_update(remainining_ranges) else: raise UnknownOperatorError( f"unknown operator {expression.operator}") logger.debug(f"after filter `{expression.operator}`: {ranges_left}") steps_for_debugging.append({ "filter": f"{pattern_name_for_operator(expression.operator)}", "pattern_id": None, "ranges": list(ranges_left), }) else: if expression.children is not None: raise SemgrepError( f"operator '{expression.operator}' must not have child operators" ) ranges_left = _evaluate_single_expression( expression, pattern_ids_to_pattern_matches, ranges_left, steps_for_debugging, flags=flags, ) return ranges_left
def _evaluate_expression( expression: BooleanRuleExpression, pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]], ranges_left: Set[Range], steps_for_debugging: List[DebuggingStep], allow_exec: bool, ) -> Set[Range]: if expression.operator in OPERATORS_WITH_CHILDREN: if expression.children is None: raise SemgrepError( f"operator '{expression.operator}' must have child operators" ) # recurse on the nested expressions if expression.operator == OPERATORS.AND_EITHER: # remove anything that does not equal one of these ranges evaluated_ranges = [ _evaluate_expression( expr, pattern_ids_to_pattern_matches, ranges_left.copy(), steps_for_debugging, allow_exec=allow_exec, ) for expr in expression.children ] ranges_left.intersection_update(flatten(evaluated_ranges)) elif expression.operator == OPERATORS.AND_ALL: # chain intersection eagerly; intersect for every AND'ed child for expr in expression.children: remainining_ranges = _evaluate_expression( expr, pattern_ids_to_pattern_matches, ranges_left.copy(), steps_for_debugging, allow_exec=allow_exec, ) ranges_left.intersection_update(remainining_ranges) else: raise UnknownOperatorError(f"unknown operator {expression.operator}") else: if expression.children is not None: raise SemgrepError( f"operator '{expression.operator}' must not have child operators" ) ranges_left = _evaluate_single_expression( expression, pattern_ids_to_pattern_matches, ranges_left, allow_exec=allow_exec, ) add_debugging_info( expression, ranges_left, pattern_ids_to_pattern_matches, steps_for_debugging, ) return ranges_left
def _evaluate_single_expression( expression: BooleanRuleExpression, pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]], ranges_left: Set[Range], allow_exec: bool, metavariable_propagation: bool, ) -> Set[Range]: if not expression.pattern_id: raise SemgrepError( f"expected expression '{expression}' to have pattern_id") ranges_for_pattern = { x.range for x in pattern_ids_to_pattern_matches.get(expression.pattern_id, []) } if expression.operator == OPERATORS.AND: # remove all ranges that don't equal the ranges for this pattern output_ranges = filter_ranges_with_propagation( ranges_left, ranges_for_pattern, predicate=lambda r1, r2: r1 == r2, metavariable_propagation=metavariable_propagation, ) elif expression.operator == OPERATORS.AND_NOT: # remove all ranges that DO equal the ranges for this pattern output_ranges = ranges_left.difference(ranges_for_pattern) elif expression.operator == OPERATORS.AND_INSIDE: # remove all ranges (not enclosed by) or (not equal to) the inside ranges output_ranges = filter_ranges_with_propagation( ranges_left, ranges_for_pattern, predicate=lambda r1, r2: r1.is_enclosing_or_eq(r2), metavariable_propagation=metavariable_propagation, ) elif expression.operator == OPERATORS.AND_NOT_INSIDE: # remove all ranges enclosed by or equal to output_ranges = { _range for _range in ranges_left if not any( pattern_range.is_enclosing_or_eq(_range) for pattern_range in ranges_for_pattern) } elif expression.operator == OPERATORS.REGEX: # remove all ranges that don't equal the ranges for this pattern output_ranges = ranges_left.intersection(ranges_for_pattern) elif expression.operator == OPERATORS.NOT_REGEX: # remove the result if pattern-not-regex is within another pattern output_ranges = { _range for _range in ranges_left if not any( _range.is_range_enclosing_or_eq(pattern_range) or pattern_range.is_range_enclosing_or_eq(_range) for pattern_range in ranges_for_pattern) } elif expression.operator == OPERATORS.WHERE_PYTHON: if not allow_exec: raise SemgrepError( f"at least one rule needs to execute arbitrary code; this is dangerous! if you want to continue, enable the flag: {RCE_RULE_FLAG}", code=NEED_ARBITRARY_CODE_EXEC_EXIT_CODE, ) if not isinstance(expression.operand, str): raise SemgrepError( f"expected operator '{expression.operator}' to have string value guaranteed by schema" ) output_ranges = get_where_python_range_matches( expression.operand, ranges_left, list(flatten(pattern_ids_to_pattern_matches.values())), ) elif expression.operator == OPERATORS.METAVARIABLE_REGEX: if not isinstance(expression.operand, YamlMap): raise SemgrepError( f"expected operator '{expression.operator}' to have mapping value guaranteed by schema" ) output_ranges = get_re_range_matches( expression.operand["metavariable"].value, expression.operand["regex"].value, ranges_left, list(flatten(pattern_ids_to_pattern_matches.values())), ) elif expression.operator == OPERATORS.METAVARIABLE_COMPARISON: if not isinstance(expression.operand, YamlMap): raise SemgrepError( f"expected operator '{expression.operator}' to have mapping value guaranteed by schema" ) strip = expression.operand.get("strip") base = expression.operand.get("base") output_ranges = get_comparison_range_matches( expression.operand["metavariable"].value, expression.operand["comparison"].value, strip.value if strip is not None else None, base.value if base is not None else None, ranges_left, list(flatten(pattern_ids_to_pattern_matches.values())), ) else: raise UnknownOperatorError(f"unknown operator {expression.operator}") return output_ranges
def _evaluate_single_expression( expression: BooleanRuleExpression, pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]], ranges_left: Set[Range], steps_for_debugging: List[Dict[str, Any]], flags: Optional[Dict[str, Any]] = None, ) -> Set[Range]: if not expression.pattern_id: raise SemgrepError( f"expected expression '{expression}' to have pattern_id") ranges_for_pattern = [ x.range for x in pattern_ids_to_pattern_matches.get(expression.pattern_id, []) ] if expression.operator == OPERATORS.AND: # remove all ranges that don't equal the ranges for this pattern output_ranges = ranges_left.intersection(ranges_for_pattern) elif expression.operator == OPERATORS.AND_NOT: # remove all ranges that DO equal the ranges for this pattern output_ranges = ranges_left.difference(ranges_for_pattern) elif expression.operator == OPERATORS.AND_INSIDE: # remove all ranges (not enclosed by) or (not equal to) the inside ranges output_ranges = { _range for _range in ranges_left if any( pattern_range.is_enclosing_or_eq(_range) for pattern_range in ranges_for_pattern) } elif expression.operator == OPERATORS.AND_NOT_INSIDE: # remove all ranges enclosed by or equal to output_ranges = { _range for _range in ranges_left if not any( pattern_range.is_enclosing_or_eq(_range) for pattern_range in ranges_for_pattern) } elif expression.operator == OPERATORS.WHERE_PYTHON: if not flags or not flags[RCE_RULE_FLAG]: raise SemgrepError( f"at least one rule needs to execute arbitrary code; this is dangerous! if you want to continue, enable the flag: {RCE_RULE_FLAG}", code=NEED_ARBITRARY_CODE_EXEC_EXIT_CODE, ) if not isinstance(expression.operand, str): raise SemgrepError( f"expected operator '{expression.operator}' to have string value guaranteed by schema" ) output_ranges = { pattern_match.range for pattern_match in list( flatten(pattern_ids_to_pattern_matches.values())) if pattern_match.range in ranges_left and _where_python_statement_matches(expression.operand, pattern_match.metavars) } elif expression.operator == OPERATORS.REGEX: # remove all ranges that don't equal the ranges for this pattern output_ranges = ranges_left.intersection(ranges_for_pattern) elif expression.operator == OPERATORS.NOT_REGEX: # remove the result if pattern-not-regex is within another pattern output_ranges = { _range for _range in ranges_left if not any( _range.is_range_enclosing_or_eq(pattern_range) or pattern_range.is_range_enclosing_or_eq(_range) for pattern_range in ranges_for_pattern) } elif expression.operator == OPERATORS.METAVARIABLE_REGEX: if not isinstance(expression.operand, dict): raise SemgrepError( f"expected operator '{expression.operator}' to have mapping value guaranteed by schema" ) output_ranges = get_re_range_matches( expression.operand["metavariable"], expression.operand["regex"], ranges_left, list(flatten(pattern_ids_to_pattern_matches.values())), ) elif expression.operator == OPERATORS.METAVARIABLE_COMPARISON: if not isinstance(expression.operand, dict): raise SemgrepError( f"expected operator '{expression.operator}' to have mapping value guaranteed by schema" ) output_ranges = get_comparison_range_matches( expression.operand["metavariable"], expression.operand["comparison"], expression.operand.get("strip"), expression.operand.get("base"), ranges_left, list(flatten(pattern_ids_to_pattern_matches.values())), ) else: raise UnknownOperatorError(f"unknown operator {expression.operator}") metavars_for_patterns = get_metavar_debugging_info( expression, pattern_ids_to_pattern_matches) add_debugging_info(expression, output_ranges, metavars_for_patterns, steps_for_debugging) return output_ranges
def _evaluate_single_expression( expression: BooleanRuleExpression, pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]], ranges_left: Set[Range], steps_for_debugging: List[Dict[str, Any]], flags: Optional[Dict[str, Any]] = None, ) -> Set[Range]: assert expression.pattern_id, f"<internal error: expected pattern id: {expression}>" results_for_pattern = [ x.range for x in pattern_ids_to_pattern_matches.get(expression.pattern_id, []) ] output_ranges: Set[Range] = set() metavars_for_patterns = get_metavar_debugging_info( expression, pattern_ids_to_pattern_matches) if expression.operator == OPERATORS.AND: add_debugging_info(expression, output_ranges, metavars_for_patterns, steps_for_debugging) # remove all ranges that don't equal the ranges for this pattern return ranges_left.intersection(results_for_pattern) elif expression.operator == OPERATORS.AND_NOT: # remove all ranges that DO equal the ranges for this pattern # difference_update = Remove all elements of another set from this set. output_ranges = ranges_left.difference(results_for_pattern) add_debugging_info(expression, output_ranges, metavars_for_patterns, steps_for_debugging) return output_ranges elif expression.operator == OPERATORS.AND_INSIDE: # remove all ranges (not enclosed by) or (not equal to) the inside ranges for arange in ranges_left: for keep_inside_this_range in results_for_pattern: is_enclosed = keep_inside_this_range.is_enclosing_or_eq(arange) # print( # f'candidate range is {arange}, needs to be `{operator}` {keep_inside_this_range}; keep?: {keep}') if is_enclosed: output_ranges.add(arange) break # found a match, no need to keep going add_debugging_info(expression, output_ranges, metavars_for_patterns, steps_for_debugging) return output_ranges elif expression.operator == OPERATORS.AND_NOT_INSIDE: # remove all ranges enclosed by or equal to output_ranges = ranges_left.copy() for arange in ranges_left: for keep_inside_this_range in results_for_pattern: if keep_inside_this_range.is_enclosing_or_eq(arange): output_ranges.remove(arange) break add_debugging_info(expression, output_ranges, metavars_for_patterns, steps_for_debugging) return output_ranges elif expression.operator == OPERATORS.WHERE_PYTHON: if not flags or not flags[RCE_RULE_FLAG]: raise SemgrepError( f"at least one rule needs to execute arbitrary code; this is dangerous! if you want to continue, enable the flag: {RCE_RULE_FLAG}", code=NEED_ARBITRARY_CODE_EXEC_EXIT_CODE, ) if not isinstance(expression.operand, str): raise SemgrepError("pattern-where-python must have a string value") # Look through every range that hasn't been filtered yet for pattern_match in list( flatten(pattern_ids_to_pattern_matches.values())): # Only need to check where-python clause if the range hasn't already been filtered if pattern_match.range in ranges_left: logger.debug( f"WHERE is {expression.operand}, metavars: {pattern_match.metavars}" ) if _where_python_statement_matches(expression.operand, pattern_match.metavars): output_ranges.add(pattern_match.range) add_debugging_info(expression, output_ranges, metavars_for_patterns, steps_for_debugging) return output_ranges elif expression.operator == OPERATORS.REGEX: # remove all ranges that don't equal the ranges for this pattern output_ranges = ranges_left.intersection(results_for_pattern) add_debugging_info(expression, output_ranges, metavars_for_patterns, steps_for_debugging) return output_ranges elif expression.operator == OPERATORS.METAVARIABLE_REGEX: output_ranges = get_re_range_matches( expression.operand["metavariable"], expression.operand["regex"], ranges_left, list(flatten(pattern_ids_to_pattern_matches.values())), ) add_debugging_info(expression, output_ranges, metavars_for_patterns, steps_for_debugging) return output_ranges elif expression.operator == OPERATORS.METAVARIABLE_COMPARISON: output_ranges = get_comparison_range_matches( expression.operand["metavariable"], expression.operand["comparison"], expression.operand.get("strip"), expression.operand.get("base"), ranges_left, list(flatten(pattern_ids_to_pattern_matches.values())), ) add_debugging_info(expression, output_ranges, metavars_for_patterns, steps_for_debugging) return output_ranges else: raise UnknownOperatorError(f"unknown operator {expression.operator}")
def _evaluate_single_expression( expression: BooleanRuleExpression, pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]], ranges_left: Set[Range], steps_for_debugging: List[Dict[str, Any]], flags: Optional[Dict[str, Any]] = None, ) -> Set[Range]: assert expression.pattern_id, f"<internal error: expected pattern id: {expression}>" results_for_pattern = [ x.range for x in pattern_ids_to_pattern_matches.get(expression.pattern_id, []) ] if expression.operator == OPERATORS.AND: # remove all ranges that don't equal the ranges for this pattern return ranges_left.intersection(results_for_pattern) elif expression.operator == OPERATORS.AND_NOT: # remove all ranges that DO equal the ranges for this pattern # difference_update = Remove all elements of another set from this set. output_ranges = ranges_left.difference(results_for_pattern) debug_print(f"after filter `{expression.operator}`: {output_ranges}") steps_for_debugging.append({ "filter": pattern_name_for_operator(expression.operator), "pattern_id": expression.pattern_id, "ranges": list(output_ranges), }) return output_ranges elif expression.operator == OPERATORS.AND_INSIDE: # remove all ranges (not enclosed by) or (not equal to) the inside ranges output_ranges = set() for arange in ranges_left: for keep_inside_this_range in results_for_pattern: is_enclosed = keep_inside_this_range.is_enclosing_or_eq(arange) # print( # f'candidate range is {arange}, needs to be `{operator}` {keep_inside_this_range}; keep?: {keep}') if is_enclosed: output_ranges.add(arange) break # found a match, no need to keep going debug_print(f"after filter `{expression.operator}`: {output_ranges}") steps_for_debugging.append({ "filter": pattern_name_for_operator(expression.operator), "pattern_id": expression.pattern_id, "ranges": list(output_ranges), }) return output_ranges elif expression.operator == OPERATORS.AND_NOT_INSIDE: # remove all ranges enclosed by or equal to output_ranges = ranges_left.copy() for arange in ranges_left: for keep_inside_this_range in results_for_pattern: if keep_inside_this_range.is_enclosing_or_eq(arange): output_ranges.remove(arange) break debug_print(f"after filter `{expression.operator}`: {output_ranges}") steps_for_debugging.append({ "filter": pattern_name_for_operator(expression.operator), "pattern_id": expression.pattern_id, "ranges": list(output_ranges), }) return output_ranges elif expression.operator == OPERATORS.WHERE_PYTHON: if not flags or not flags[RCE_RULE_FLAG]: raise SemgrepError( f"at least one rule needs to execute arbitrary code; this is dangerous! if you want to continue, enable the flag: {RCE_RULE_FLAG}", code=NEED_ARBITRARY_CODE_EXEC_EXIT_CODE, ) assert expression.operand, "must have operand for this operator type" output_ranges = set() # Look through every range that hasn't been filtered yet for pattern_match in list( flatten(pattern_ids_to_pattern_matches.values())): # Only need to check where-python clause if the range hasn't already been filtered if pattern_match.range in ranges_left: debug_print( f"WHERE is {expression.operand}, metavars: {pattern_match.metavars}" ) if _where_python_statement_matches(expression.operand, pattern_match.metavars): output_ranges.add(pattern_match.range) debug_print(f"after filter `{expression.operator}`: {output_ranges}") steps_for_debugging.append({ "filter": pattern_name_for_operator(expression.operator), "pattern_id": expression.pattern_id, "ranges": list(output_ranges), }) return output_ranges elif expression.operator == OPERATORS.REGEX: # remove all ranges that don't equal the ranges for this pattern output_ranges = ranges_left.intersection(results_for_pattern) debug_print(f"after filter `{expression.operator}`: {output_ranges}") steps_for_debugging.append({ "filter": pattern_name_for_operator(expression.operator), "pattern_id": expression.pattern_id, "ranges": list(output_ranges), }) return output_ranges else: raise UnknownOperatorError(f"unknown operator {expression.operator}")