def find_redos(pattern: str, flags: int, output: TextOutput, parser):
    try:
        parsed = parser(pattern, flags)
    except Exception as e:
        print(f"Error parsing: {pattern}", e)
        return
    output.next()
    for redos in find(parsed):
        if redos.starriness > 2:
            output.record(redos, pattern)
            yield redos
def handle_file(filename: str, output: TextOutput):
    with open(filename, "rb") as f:
        code = f.read()
    for regex in find_regexes(code):
        pattern = regex.pattern
        if len(pattern) < 5:
            continue  # (.+)+
        if pattern.count("*") + pattern.count("+") + pattern.count(",}") < 2:
            continue  # no ReDoS possible
        try:
            logging.debug("%s#%s: %s", filename, regex.lineno, pattern)
            parsed = SreOpParser().parse_sre(pattern, regex.flags)
        except:
            try:
                fixed = fix_js_regex(pattern)
                re.compile(fixed, regex.flags)
            except:
                if regex.definitely_regex:
                    print(
                        f"Error parsing: {pattern} from {filename} line {regex.lineno}\n"
                    )
                continue
            try:
                parsed = SreOpParser().parse_sre(fixed, regex.flags)
            except:
                print(
                    f"Error in regexploit parsing: {pattern} from {filename}")
                print(traceback.format_exc())
                continue
        try:
            output.next()
            for redos in find(parsed):
                if redos.starriness > 2:
                    context = None
                    try:
                        context = code.splitlines()[regex.lineno -
                                                    1].decode().strip()
                    except UnicodeDecodeError:
                        pass
                    output.record(
                        redos,
                        pattern,
                        filename=filename,
                        lineno=regex.lineno,
                        context=context,
                    )
        except Exception:
            print(
                f"Error finding ReDoS: {pattern} from {filename} #{regex.lineno}"
            )
            print(traceback.format_exc())
Beispiel #3
0
def main():
    with warnings.catch_warnings():
        # Some weird regexes emit warnings
        warnings.simplefilter("ignore", category=FutureWarning)
        warnings.simplefilter("ignore", category=DeprecationWarning)
        parser = argparse.ArgumentParser(
            description="Parse regexes out of python files and scan them for ReDoS"
        )
        parser.add_argument("files", nargs="+", help="Python files or directories")
        parser.add_argument(
            "--glob", action="store_true", help="Glob the input filenames (**/*)"
        )
        parser.add_argument("--verbose", action="store_true", help="Verbose logging")
        parser.add_argument(
            "--ignore", action="append", help="Paths containing this string are ignored"
        )
        args = parser.parse_args()

        if args.verbose:
            logging.basicConfig(level=logging.DEBUG)

        files = file_generator(args.files, args.glob, ["*.py"], args.ignore)
        output = TextOutput()
        for filename in files:
            logging.debug(filename)
            handle_file(filename, output)
        print(f"Processed {output.regexes} regexes")
Beispiel #4
0
def handle_file(filename: str, output: TextOutput):
    with open(filename, "rb") as f:
        code = f.read()
    try:
        code_ast = ast.parse(code)
        pnv = PythonNodeVisitor()
        pnv.visit(code_ast)
    except RecursionError:
        print(f"RecursionError parsing AST for {filename}")
        return
    except SyntaxError as e:
        print(f"Bad Python3 syntax in {filename}: {e}")
        return
    for regex in pnv.patterns:
        try:
            parsed = SreOpParser().parse_sre(regex.pattern, regex.flags)
        except re.error:
            continue  # We will have many strings which aren't actually regexes
        try:
            output.next()
            for redos in find(parsed):
                if redos.starriness > 2:
                    context = None
                    try:
                        context = code.splitlines()[regex.lineno - 1].decode().strip()
                    except UnicodeDecodeError:
                        pass
                    output.record(
                        redos,
                        regex.pattern,
                        filename=filename,
                        lineno=regex.lineno,
                        context=context,
                    )
        except Exception:
            print(
                f"Error finding ReDoS: {regex.pattern} from {filename} #{regex.lineno}"
            )
            print(traceback.format_exc())
def main():
    parser = argparse.ArgumentParser(
        description="Parse regexes from stdin and scan them for ReDoS")
    parser.add_argument(
        "-f",
        "--flavour",
        "--flavor",
        choices=["python", "js"],
        default="python",
        help="Regex language",
    )
    parser.add_argument("-v",
                        "--verbose",
                        action="count",
                        default=0,
                        help="Verbose logging")
    parser.add_argument(
        "-u",
        "--unescape",
        action="store_true",
        help=
        "Unescape the regular expressions before parsing them (e.g. double backslashes)",
    )
    args = parser.parse_args()
    sys.argv = sys.argv[:1]
    if args.verbose == 1:
        logging.basicConfig(level=logging.INFO)
    elif args.verbose > 1:
        logging.basicConfig(level=logging.DEBUG)

    isatty = sys.stdin.isatty()
    if isatty:
        print("Welcome to Regexploit. Enter your regexes:")
    output = TextOutput()
    try:
        for line in fileinput.input():
            found = False
            line = line.rstrip("\n")
            if args.unescape:
                # \\d -> \d
                line = line.encode().decode("unicode_escape")
            for _ in find_redos(
                    line, 0, output,
                    javascript if args.flavour == "js" else python):
                found = True
            if isatty and not found:
                print("No ReDoS found.")
    except KeyboardInterrupt:
        pass
def main():
    if not os.path.isdir(
        os.path.join(os.path.split(__file__)[0], "javascript", "node_modules")
    ):
        path = os.path.join(os.path.split(__file__)[0], "javascript")
        print("The JavaScript & TypeScript parsers require some node modules.\n")
        print(f"Run (cd {path}; npm install)")
        sys.exit(1)
    with warnings.catch_warnings():
        warnings.simplefilter(
            "ignore", category=FutureWarning
        )  # Some js regexes are weird
        parser = argparse.ArgumentParser(
            description="Parse regexes out of javascript files and scan them for ReDoS"
        )
        parser.add_argument("files", nargs="+", help="Javascript or typescript files")
        parser.add_argument(
            "--node",
            help="Location of nodejs executable (rather than using node from PATH)",
        )
        parser.add_argument(
            "--glob", action="store_true", help="Glob the input filenames (**/*)"
        )
        parser.add_argument("--verbose", action="store_true", help="Verbose logging")
        parser.add_argument(
            "--ignore", action="append", help="Paths containing this string are ignored"
        )
        args = parser.parse_args()

        if args.verbose:
            logging.basicConfig(level=logging.DEBUG)

        output = TextOutput(js_flavour=True)
        files = file_generator(args.files, args.glob, ["*.js", "*.ts"], args.ignore)
        while True:
            batch = []
            for _ in range(50):
                try:
                    batch.append(next(files))
                except StopIteration:
                    if batch:
                        process_files(batch, args.node, output)
                    return
            process_files(batch, args.node, output)
        print(f"Processed {output.regexes} regexes")