Exemple #1
0
def _evaluate_expression(
    expression: BooleanRuleExpression,
    pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]],
    ranges_left: Set[Range],
    steps_for_debugging: List[Dict[str, Any]],
    flags: Optional[Dict[str, Any]] = None,
) -> Set[Range]:
    if expression.operator in OPERATORS_WITH_CHILDREN:
        if expression.children is None:
            raise SemgrepError(
                f"operator '{expression.operator}' must have child operators")

        # recurse on the nested expressions
        if expression.operator == OPERATORS.AND_EITHER:
            # remove anything that does not equal one of these ranges
            evaluated_ranges = [
                _evaluate_expression(
                    expr,
                    pattern_ids_to_pattern_matches,
                    ranges_left.copy(),
                    steps_for_debugging,
                    flags=flags,
                ) for expr in expression.children
            ]
            ranges_left.intersection_update(flatten(evaluated_ranges))
        elif expression.operator == OPERATORS.AND_ALL:
            # chain intersection eagerly; intersect for every AND'ed child
            for expr in expression.children:
                remainining_ranges = _evaluate_expression(
                    expr,
                    pattern_ids_to_pattern_matches,
                    ranges_left.copy(),
                    steps_for_debugging,
                    flags=flags,
                )
                ranges_left.intersection_update(remainining_ranges)
        else:
            raise UnknownOperatorError(
                f"unknown operator {expression.operator}")

        logger.debug(f"after filter `{expression.operator}`: {ranges_left}")
        steps_for_debugging.append({
            "filter": f"{pattern_name_for_operator(expression.operator)}",
            "pattern_id": None,
            "ranges": list(ranges_left),
        })
    else:
        if expression.children is not None:
            raise SemgrepError(
                f"operator '{expression.operator}' must not have child operators"
            )

        ranges_left = _evaluate_single_expression(
            expression,
            pattern_ids_to_pattern_matches,
            ranges_left,
            steps_for_debugging,
            flags=flags,
        )
    return ranges_left
Exemple #2
0
def _evaluate_expression(
    expression: BooleanRuleExpression,
    pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]],
    ranges_left: Set[Range],
    steps_for_debugging: List[DebuggingStep],
    allow_exec: bool,
) -> Set[Range]:
    if expression.operator in OPERATORS_WITH_CHILDREN:
        if expression.children is None:
            raise SemgrepError(
                f"operator '{expression.operator}' must have child operators"
            )

        # recurse on the nested expressions
        if expression.operator == OPERATORS.AND_EITHER:
            # remove anything that does not equal one of these ranges
            evaluated_ranges = [
                _evaluate_expression(
                    expr,
                    pattern_ids_to_pattern_matches,
                    ranges_left.copy(),
                    steps_for_debugging,
                    allow_exec=allow_exec,
                )
                for expr in expression.children
            ]
            ranges_left.intersection_update(flatten(evaluated_ranges))
        elif expression.operator == OPERATORS.AND_ALL:
            # chain intersection eagerly; intersect for every AND'ed child
            for expr in expression.children:
                remainining_ranges = _evaluate_expression(
                    expr,
                    pattern_ids_to_pattern_matches,
                    ranges_left.copy(),
                    steps_for_debugging,
                    allow_exec=allow_exec,
                )
                ranges_left.intersection_update(remainining_ranges)
        else:
            raise UnknownOperatorError(f"unknown operator {expression.operator}")
    else:
        if expression.children is not None:
            raise SemgrepError(
                f"operator '{expression.operator}' must not have child operators"
            )

        ranges_left = _evaluate_single_expression(
            expression,
            pattern_ids_to_pattern_matches,
            ranges_left,
            allow_exec=allow_exec,
        )

    add_debugging_info(
        expression,
        ranges_left,
        pattern_ids_to_pattern_matches,
        steps_for_debugging,
    )
    return ranges_left
Exemple #3
0
def _evaluate_single_expression(
    expression: BooleanRuleExpression,
    pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]],
    ranges_left: Set[Range],
    allow_exec: bool,
    metavariable_propagation: bool,
) -> Set[Range]:

    if not expression.pattern_id:
        raise SemgrepError(
            f"expected expression '{expression}' to have pattern_id")

    ranges_for_pattern = {
        x.range
        for x in pattern_ids_to_pattern_matches.get(expression.pattern_id, [])
    }

    if expression.operator == OPERATORS.AND:
        # remove all ranges that don't equal the ranges for this pattern
        output_ranges = filter_ranges_with_propagation(
            ranges_left,
            ranges_for_pattern,
            predicate=lambda r1, r2: r1 == r2,
            metavariable_propagation=metavariable_propagation,
        )
    elif expression.operator == OPERATORS.AND_NOT:
        # remove all ranges that DO equal the ranges for this pattern
        output_ranges = ranges_left.difference(ranges_for_pattern)
    elif expression.operator == OPERATORS.AND_INSIDE:
        # remove all ranges (not enclosed by) or (not equal to) the inside ranges
        output_ranges = filter_ranges_with_propagation(
            ranges_left,
            ranges_for_pattern,
            predicate=lambda r1, r2: r1.is_enclosing_or_eq(r2),
            metavariable_propagation=metavariable_propagation,
        )
    elif expression.operator == OPERATORS.AND_NOT_INSIDE:
        # remove all ranges enclosed by or equal to
        output_ranges = {
            _range
            for _range in ranges_left if not any(
                pattern_range.is_enclosing_or_eq(_range)
                for pattern_range in ranges_for_pattern)
        }
    elif expression.operator == OPERATORS.REGEX:
        # remove all ranges that don't equal the ranges for this pattern
        output_ranges = ranges_left.intersection(ranges_for_pattern)
    elif expression.operator == OPERATORS.NOT_REGEX:
        # remove the result if pattern-not-regex is within another pattern
        output_ranges = {
            _range
            for _range in ranges_left if not any(
                _range.is_range_enclosing_or_eq(pattern_range)
                or pattern_range.is_range_enclosing_or_eq(_range)
                for pattern_range in ranges_for_pattern)
        }
    elif expression.operator == OPERATORS.WHERE_PYTHON:
        if not allow_exec:
            raise SemgrepError(
                f"at least one rule needs to execute arbitrary code; this is dangerous! if you want to continue, enable the flag: {RCE_RULE_FLAG}",
                code=NEED_ARBITRARY_CODE_EXEC_EXIT_CODE,
            )
        if not isinstance(expression.operand, str):
            raise SemgrepError(
                f"expected operator '{expression.operator}' to have string value guaranteed by schema"
            )
        output_ranges = get_where_python_range_matches(
            expression.operand,
            ranges_left,
            list(flatten(pattern_ids_to_pattern_matches.values())),
        )

    elif expression.operator == OPERATORS.METAVARIABLE_REGEX:
        if not isinstance(expression.operand, YamlMap):
            raise SemgrepError(
                f"expected operator '{expression.operator}' to have mapping value guaranteed by schema"
            )
        output_ranges = get_re_range_matches(
            expression.operand["metavariable"].value,
            expression.operand["regex"].value,
            ranges_left,
            list(flatten(pattern_ids_to_pattern_matches.values())),
        )
    elif expression.operator == OPERATORS.METAVARIABLE_COMPARISON:
        if not isinstance(expression.operand, YamlMap):
            raise SemgrepError(
                f"expected operator '{expression.operator}' to have mapping value guaranteed by schema"
            )
        strip = expression.operand.get("strip")
        base = expression.operand.get("base")
        output_ranges = get_comparison_range_matches(
            expression.operand["metavariable"].value,
            expression.operand["comparison"].value,
            strip.value if strip is not None else None,
            base.value if base is not None else None,
            ranges_left,
            list(flatten(pattern_ids_to_pattern_matches.values())),
        )
    else:
        raise UnknownOperatorError(f"unknown operator {expression.operator}")

    return output_ranges
Exemple #4
0
def _evaluate_single_expression(
    expression: BooleanRuleExpression,
    pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]],
    ranges_left: Set[Range],
    steps_for_debugging: List[Dict[str, Any]],
    flags: Optional[Dict[str, Any]] = None,
) -> Set[Range]:

    if not expression.pattern_id:
        raise SemgrepError(
            f"expected expression '{expression}' to have pattern_id")

    ranges_for_pattern = [
        x.range
        for x in pattern_ids_to_pattern_matches.get(expression.pattern_id, [])
    ]

    if expression.operator == OPERATORS.AND:
        # remove all ranges that don't equal the ranges for this pattern
        output_ranges = ranges_left.intersection(ranges_for_pattern)
    elif expression.operator == OPERATORS.AND_NOT:
        # remove all ranges that DO equal the ranges for this pattern
        output_ranges = ranges_left.difference(ranges_for_pattern)
    elif expression.operator == OPERATORS.AND_INSIDE:
        # remove all ranges (not enclosed by) or (not equal to) the inside ranges
        output_ranges = {
            _range
            for _range in ranges_left if any(
                pattern_range.is_enclosing_or_eq(_range)
                for pattern_range in ranges_for_pattern)
        }
    elif expression.operator == OPERATORS.AND_NOT_INSIDE:
        # remove all ranges enclosed by or equal to
        output_ranges = {
            _range
            for _range in ranges_left if not any(
                pattern_range.is_enclosing_or_eq(_range)
                for pattern_range in ranges_for_pattern)
        }
    elif expression.operator == OPERATORS.WHERE_PYTHON:
        if not flags or not flags[RCE_RULE_FLAG]:
            raise SemgrepError(
                f"at least one rule needs to execute arbitrary code; this is dangerous! if you want to continue, enable the flag: {RCE_RULE_FLAG}",
                code=NEED_ARBITRARY_CODE_EXEC_EXIT_CODE,
            )
        if not isinstance(expression.operand, str):
            raise SemgrepError(
                f"expected operator '{expression.operator}' to have string value guaranteed by schema"
            )
        output_ranges = {
            pattern_match.range
            for pattern_match in list(
                flatten(pattern_ids_to_pattern_matches.values()))
            if pattern_match.range in ranges_left
            and _where_python_statement_matches(expression.operand,
                                                pattern_match.metavars)
        }
    elif expression.operator == OPERATORS.REGEX:
        # remove all ranges that don't equal the ranges for this pattern
        output_ranges = ranges_left.intersection(ranges_for_pattern)
    elif expression.operator == OPERATORS.NOT_REGEX:
        # remove the result if pattern-not-regex is within another pattern
        output_ranges = {
            _range
            for _range in ranges_left if not any(
                _range.is_range_enclosing_or_eq(pattern_range)
                or pattern_range.is_range_enclosing_or_eq(_range)
                for pattern_range in ranges_for_pattern)
        }
    elif expression.operator == OPERATORS.METAVARIABLE_REGEX:
        if not isinstance(expression.operand, dict):
            raise SemgrepError(
                f"expected operator '{expression.operator}' to have mapping value guaranteed by schema"
            )
        output_ranges = get_re_range_matches(
            expression.operand["metavariable"],
            expression.operand["regex"],
            ranges_left,
            list(flatten(pattern_ids_to_pattern_matches.values())),
        )
    elif expression.operator == OPERATORS.METAVARIABLE_COMPARISON:
        if not isinstance(expression.operand, dict):
            raise SemgrepError(
                f"expected operator '{expression.operator}' to have mapping value guaranteed by schema"
            )
        output_ranges = get_comparison_range_matches(
            expression.operand["metavariable"],
            expression.operand["comparison"],
            expression.operand.get("strip"),
            expression.operand.get("base"),
            ranges_left,
            list(flatten(pattern_ids_to_pattern_matches.values())),
        )
    else:
        raise UnknownOperatorError(f"unknown operator {expression.operator}")

    metavars_for_patterns = get_metavar_debugging_info(
        expression, pattern_ids_to_pattern_matches)
    add_debugging_info(expression, output_ranges, metavars_for_patterns,
                       steps_for_debugging)
    return output_ranges
Exemple #5
0
def _evaluate_single_expression(
    expression: BooleanRuleExpression,
    pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]],
    ranges_left: Set[Range],
    steps_for_debugging: List[Dict[str, Any]],
    flags: Optional[Dict[str, Any]] = None,
) -> Set[Range]:

    assert expression.pattern_id, f"<internal error: expected pattern id: {expression}>"
    results_for_pattern = [
        x.range
        for x in pattern_ids_to_pattern_matches.get(expression.pattern_id, [])
    ]
    output_ranges: Set[Range] = set()
    metavars_for_patterns = get_metavar_debugging_info(
        expression, pattern_ids_to_pattern_matches)

    if expression.operator == OPERATORS.AND:
        add_debugging_info(expression, output_ranges, metavars_for_patterns,
                           steps_for_debugging)
        # remove all ranges that don't equal the ranges for this pattern
        return ranges_left.intersection(results_for_pattern)
    elif expression.operator == OPERATORS.AND_NOT:
        # remove all ranges that DO equal the ranges for this pattern
        # difference_update = Remove all elements of another set from this set.
        output_ranges = ranges_left.difference(results_for_pattern)
        add_debugging_info(expression, output_ranges, metavars_for_patterns,
                           steps_for_debugging)
        return output_ranges
    elif expression.operator == OPERATORS.AND_INSIDE:
        # remove all ranges (not enclosed by) or (not equal to) the inside ranges
        for arange in ranges_left:
            for keep_inside_this_range in results_for_pattern:
                is_enclosed = keep_inside_this_range.is_enclosing_or_eq(arange)
                # print(
                #    f'candidate range is {arange}, needs to be `{operator}` {keep_inside_this_range}; keep?: {keep}')
                if is_enclosed:
                    output_ranges.add(arange)
                    break  # found a match, no need to keep going

        add_debugging_info(expression, output_ranges, metavars_for_patterns,
                           steps_for_debugging)
        return output_ranges
    elif expression.operator == OPERATORS.AND_NOT_INSIDE:
        # remove all ranges enclosed by or equal to
        output_ranges = ranges_left.copy()
        for arange in ranges_left:
            for keep_inside_this_range in results_for_pattern:
                if keep_inside_this_range.is_enclosing_or_eq(arange):
                    output_ranges.remove(arange)
                    break
        add_debugging_info(expression, output_ranges, metavars_for_patterns,
                           steps_for_debugging)
        return output_ranges
    elif expression.operator == OPERATORS.WHERE_PYTHON:
        if not flags or not flags[RCE_RULE_FLAG]:
            raise SemgrepError(
                f"at least one rule needs to execute arbitrary code; this is dangerous! if you want to continue, enable the flag: {RCE_RULE_FLAG}",
                code=NEED_ARBITRARY_CODE_EXEC_EXIT_CODE,
            )
        if not isinstance(expression.operand, str):
            raise SemgrepError("pattern-where-python must have a string value")

        # Look through every range that hasn't been filtered yet
        for pattern_match in list(
                flatten(pattern_ids_to_pattern_matches.values())):
            # Only need to check where-python clause if the range hasn't already been filtered

            if pattern_match.range in ranges_left:
                logger.debug(
                    f"WHERE is {expression.operand}, metavars: {pattern_match.metavars}"
                )
                if _where_python_statement_matches(expression.operand,
                                                   pattern_match.metavars):
                    output_ranges.add(pattern_match.range)
        add_debugging_info(expression, output_ranges, metavars_for_patterns,
                           steps_for_debugging)
        return output_ranges
    elif expression.operator == OPERATORS.REGEX:
        # remove all ranges that don't equal the ranges for this pattern
        output_ranges = ranges_left.intersection(results_for_pattern)
        add_debugging_info(expression, output_ranges, metavars_for_patterns,
                           steps_for_debugging)
        return output_ranges
    elif expression.operator == OPERATORS.METAVARIABLE_REGEX:
        output_ranges = get_re_range_matches(
            expression.operand["metavariable"],
            expression.operand["regex"],
            ranges_left,
            list(flatten(pattern_ids_to_pattern_matches.values())),
        )
        add_debugging_info(expression, output_ranges, metavars_for_patterns,
                           steps_for_debugging)
        return output_ranges
    elif expression.operator == OPERATORS.METAVARIABLE_COMPARISON:
        output_ranges = get_comparison_range_matches(
            expression.operand["metavariable"],
            expression.operand["comparison"],
            expression.operand.get("strip"),
            expression.operand.get("base"),
            ranges_left,
            list(flatten(pattern_ids_to_pattern_matches.values())),
        )
        add_debugging_info(expression, output_ranges, metavars_for_patterns,
                           steps_for_debugging)
        return output_ranges
    else:
        raise UnknownOperatorError(f"unknown operator {expression.operator}")
Exemple #6
0
def _evaluate_single_expression(
    expression: BooleanRuleExpression,
    pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]],
    ranges_left: Set[Range],
    steps_for_debugging: List[Dict[str, Any]],
    flags: Optional[Dict[str, Any]] = None,
) -> Set[Range]:

    assert expression.pattern_id, f"<internal error: expected pattern id: {expression}>"
    results_for_pattern = [
        x.range
        for x in pattern_ids_to_pattern_matches.get(expression.pattern_id, [])
    ]

    if expression.operator == OPERATORS.AND:
        # remove all ranges that don't equal the ranges for this pattern
        return ranges_left.intersection(results_for_pattern)
    elif expression.operator == OPERATORS.AND_NOT:
        # remove all ranges that DO equal the ranges for this pattern
        # difference_update = Remove all elements of another set from this set.
        output_ranges = ranges_left.difference(results_for_pattern)
        debug_print(f"after filter `{expression.operator}`: {output_ranges}")
        steps_for_debugging.append({
            "filter":
            pattern_name_for_operator(expression.operator),
            "pattern_id":
            expression.pattern_id,
            "ranges":
            list(output_ranges),
        })
        return output_ranges
    elif expression.operator == OPERATORS.AND_INSIDE:
        # remove all ranges (not enclosed by) or (not equal to) the inside ranges
        output_ranges = set()
        for arange in ranges_left:
            for keep_inside_this_range in results_for_pattern:
                is_enclosed = keep_inside_this_range.is_enclosing_or_eq(arange)
                # print(
                #    f'candidate range is {arange}, needs to be `{operator}` {keep_inside_this_range}; keep?: {keep}')
                if is_enclosed:
                    output_ranges.add(arange)
                    break  # found a match, no need to keep going
        debug_print(f"after filter `{expression.operator}`: {output_ranges}")
        steps_for_debugging.append({
            "filter":
            pattern_name_for_operator(expression.operator),
            "pattern_id":
            expression.pattern_id,
            "ranges":
            list(output_ranges),
        })
        return output_ranges
    elif expression.operator == OPERATORS.AND_NOT_INSIDE:
        # remove all ranges enclosed by or equal to
        output_ranges = ranges_left.copy()
        for arange in ranges_left:
            for keep_inside_this_range in results_for_pattern:
                if keep_inside_this_range.is_enclosing_or_eq(arange):
                    output_ranges.remove(arange)
                    break
        debug_print(f"after filter `{expression.operator}`: {output_ranges}")
        steps_for_debugging.append({
            "filter":
            pattern_name_for_operator(expression.operator),
            "pattern_id":
            expression.pattern_id,
            "ranges":
            list(output_ranges),
        })
        return output_ranges
    elif expression.operator == OPERATORS.WHERE_PYTHON:
        if not flags or not flags[RCE_RULE_FLAG]:
            raise SemgrepError(
                f"at least one rule needs to execute arbitrary code; this is dangerous! if you want to continue, enable the flag: {RCE_RULE_FLAG}",
                code=NEED_ARBITRARY_CODE_EXEC_EXIT_CODE,
            )
        assert expression.operand, "must have operand for this operator type"

        output_ranges = set()
        # Look through every range that hasn't been filtered yet
        for pattern_match in list(
                flatten(pattern_ids_to_pattern_matches.values())):
            # Only need to check where-python clause if the range hasn't already been filtered

            if pattern_match.range in ranges_left:
                debug_print(
                    f"WHERE is {expression.operand}, metavars: {pattern_match.metavars}"
                )
                if _where_python_statement_matches(expression.operand,
                                                   pattern_match.metavars):
                    output_ranges.add(pattern_match.range)
        debug_print(f"after filter `{expression.operator}`: {output_ranges}")
        steps_for_debugging.append({
            "filter":
            pattern_name_for_operator(expression.operator),
            "pattern_id":
            expression.pattern_id,
            "ranges":
            list(output_ranges),
        })
        return output_ranges
    elif expression.operator == OPERATORS.REGEX:
        # remove all ranges that don't equal the ranges for this pattern
        output_ranges = ranges_left.intersection(results_for_pattern)
        debug_print(f"after filter `{expression.operator}`: {output_ranges}")
        steps_for_debugging.append({
            "filter":
            pattern_name_for_operator(expression.operator),
            "pattern_id":
            expression.pattern_id,
            "ranges":
            list(output_ranges),
        })
        return output_ranges
    else:
        raise UnknownOperatorError(f"unknown operator {expression.operator}")