def handle_file(filename: str, output: TextOutput): with open(filename, "rb") as f: code = f.read() for regex in find_regexes(code): pattern = regex.pattern if len(pattern) < 5: continue # (.+)+ if pattern.count("*") + pattern.count("+") + pattern.count(",}") < 2: continue # no ReDoS possible try: logging.debug("%s#%s: %s", filename, regex.lineno, pattern) parsed = SreOpParser().parse_sre(pattern, regex.flags) except: try: fixed = fix_js_regex(pattern) re.compile(fixed, regex.flags) except: if regex.definitely_regex: print( f"Error parsing: {pattern} from {filename} line {regex.lineno}\n" ) continue try: parsed = SreOpParser().parse_sre(fixed, regex.flags) except: print( f"Error in regexploit parsing: {pattern} from {filename}") print(traceback.format_exc()) continue try: output.next() for redos in find(parsed): if redos.starriness > 2: context = None try: context = code.splitlines()[regex.lineno - 1].decode().strip() except UnicodeDecodeError: pass output.record( redos, pattern, filename=filename, lineno=regex.lineno, context=context, ) except Exception: print( f"Error finding ReDoS: {pattern} from {filename} #{regex.lineno}" ) print(traceback.format_exc())
def javascript(pattern: str, flags: int): try: return SreOpParser().parse_sre(pattern) except: try: fixed = fix_js_regex(pattern) re.compile(fixed) except: raise try: return SreOpParser().parse_sre(fixed) except: print(traceback.format_exc()) raise
def handle_file(filename: str, output: TextOutput): with open(filename, "rb") as f: code = f.read() try: code_ast = ast.parse(code) pnv = PythonNodeVisitor() pnv.visit(code_ast) except RecursionError: print(f"RecursionError parsing AST for {filename}") return except SyntaxError as e: print(f"Bad Python3 syntax in {filename}: {e}") return for regex in pnv.patterns: try: parsed = SreOpParser().parse_sre(regex.pattern, regex.flags) except re.error: continue # We will have many strings which aren't actually regexes try: output.next() for redos in find(parsed): if redos.starriness > 2: context = None try: context = code.splitlines()[regex.lineno - 1].decode().strip() except UnicodeDecodeError: pass output.record( redos, regex.pattern, filename=filename, lineno=regex.lineno, context=context, ) except Exception: print( f"Error finding ReDoS: {regex.pattern} from {filename} #{regex.lineno}" ) print(traceback.format_exc())
def handle(self, elem): if isinstance(elem, str) and len(elem) > 5: try: parsed = SreOpParser().parse_sre(elem) except re.error: return # We will have many strings which aren't actually regexes try: self.output.next() for redos in find(parsed): if redos.starriness > 2: self.output.record( redos, elem, filename=self.filename, ) except Exception: print(f"Error finding ReDoS: {elem} from {self.filename}") print(traceback.format_exc()) elif isinstance(elem, list): for _elem in elem: self.handle(_elem) elif isinstance(elem, dict): for _elem in elem.values(): self.handle(_elem)
def from_regex(pattern: str) -> Union[Repeat, Character]: (parsed_char, ) = sre_parse(pattern) repeat = SreOpParser().parse_op(*parsed_char) return repeat
def test_negative_lookahead_finite(): r = SreOpParser().parse_sre(r"(?!b)[a-d]{1,3}") assert r == SreOpParser().parse_sre(r"[acd][a-d]{0,2}")
def test_negative_lookahead_infinite(): r = SreOpParser().parse_sre(r"(?!b)[a-d]+") assert r == SreOpParser().parse_sre(r"[acd][a-d]*")
def from_regex(pattern: str): return SreOpParser().parse_sre(pattern)
def from_regex(pattern: str) -> Character: (parsed_char, ) = sre_parse(pattern) char = SreOpParser().parse_op(*parsed_char) assert isinstance(char, Character) return char
def test_category_category_covers_none(): assert SreOpParser().parse_sre(r"[^x0-9\w\W]") is None
def test_negative_lookahead(): assert SreOpParser().parse_sre( r"(?![0248])(?!6)(?!a)(?!xyz123)\d") == from_regex(r"[13579]")
def from_regex(pattern: str, flags: int = 0) -> Sequence: return SreOpParser().parse_sre(pattern, flags)
def handle_line_from_node(line: str, output: TextOutput): regex = json.loads(line) if pattern := regex.get("pattern"): if (pattern_len := len(pattern)) < 5: return # (.+)+ if pattern_len == 8059 and pattern.startswith("\\u{1F3F4}(?:\\u{E0067"): return # annoying emoji regex if pattern.count("*") + pattern.count("+") + pattern.count(",}") < 2: return # no ReDoS possible filename = regex["filename"] lineno = regex["lineno"] try: logging.debug("%s#%s: %s", filename, lineno, pattern) parsed = SreOpParser().parse_sre(pattern) except: try: fixed = fix_js_regex(pattern) re.compile(fixed) except: print(f"Error parsing: {pattern} from {filename}\n") return try: parsed = SreOpParser().parse_sre(fixed) except: print(f"Error in regexploit parsing: {pattern} from {filename}") print(traceback.format_exc()) return output.next() try:
def python(pattern: str, flags: int): return SreOpParser().parse_sre(pattern, flags)