コード例 #1
0
def test_log() -> None:
    with tempfile.NamedTemporaryFile("w") as f_tmp:
        flutes.set_log_file(f_tmp.name)
        flutes.set_log_file(f_tmp.name)
        flutes.set_logging_level("warning")
        flutes.log("info output", "info")
        flutes.log("warning output", "warning")
        flutes.log("error output", "error")
        flutes.log("success output", "success")
コード例 #2
0
ファイル: run_decompiler.py プロジェクト: xcode2010/ghcc
def main() -> None:
    if args.n_procs == 0:
        # Only do this on the single-threaded case.
        flutes.register_ipython_excepthook()
    flutes.log(f"Running with {args.n_procs} worker processes", "warning")

    # Check for/create output directories
    make_directory(args.output_dir)

    # Use RAM-backed memory for tmp if available
    if os.path.exists('/dev/shm'):
        tempfile.tempdir = '/dev/shm'

    flutes.set_log_file(args.log_file)
    write_pseudo_registry()

    # Obtain a list of all binaries
    binaries = get_binary_mapping(args.binary_mapping_cache_file)

    flutes.log(f"{len(binaries)} binaries to process.")
    file_count = 0
    db = ghcc.BinaryDB()

    with flutes.safe_pool(args.n_procs, closing=[db]) as pool:
        decompile_fn: Callable[[BinaryInfo],
                               DecompilationResult] = functools.partial(
                                   decompile,
                                   output_dir=args.output_dir,
                                   binary_dir=args.binaries_dir,
                                   timeout=args.timeout)
        for result in pool.imap_unordered(decompile_fn,
                                          iter_binaries(db, binaries)):
            file_count += 1
            if result is not None:
                db.add_binary(result.info["repo_owner"],
                              result.info["repo_name"], result.hash,
                              result.status is DecompilationStatus.Success)
            if file_count % 100 == 0:
                flutes.log(f"Processed {file_count} binaries",
                           force_console=True)
コード例 #3
0
ファイル: match_functions.py プロジェクト: xcode2010/ghcc
def main() -> None:
    if not ghcc.utils.verify_docker_image(verbose=True):
        exit(1)

    sys.setrecursionlimit(10000)
    args = Arguments()
    if args.pdb:
        flutes.register_ipython_excepthook()
        if args.n_procs == 0:
            globals()['match_functions'] = match_functions.__wrapped__

    if not args.verbose:
        flutes.set_logging_level("quiet", console=True, file=False)
    flutes.set_log_file(args.log_file)
    flutes.log("Running with arguments:\n" + args.to_string(),
               force_console=True)

    if os.path.exists(args.temp_dir):
        flutes.log(
            f"Removing contents of temporary folder '{args.temp_dir}'...",
            "warning",
            force_console=True)
        ghcc.utils.run_docker_command(
            ["rm", "-rf", "/usr/src/*"],
            user=0,
            directory_mapping={args.temp_dir: "/usr/src"})

    db = ghcc.MatchFuncDB()
    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    manager = flutes.ProgressBarManager(
        verbose=args.show_progress,
        bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}{postfix}]")
    with flutes.safe_pool(args.n_procs, closing=[db, manager]) as pool:
        iterator, stats = iter_repos(
            db,
            args.max_repos,
            skip_to=args.skip_to,
            cache_path=args.repo_binary_info_cache_path,
            force_reprocess=args.force_reprocess)
        match_fn: Callable[[RepoInfo], Result] = functools.partial(
            match_functions,
            archive_folder=args.archive_dir,
            temp_folder=args.temp_dir,
            decompile_folder=args.decompile_dir,
            use_fake_libc_headers=args.use_fake_libc_headers,
            preprocess_timeout=args.preprocess_timeout,
            progress_bar=manager.proxy)

        repo_count = stats.repo_count
        func_count = stats.func_count
        func_without_ast_count = stats.func_without_ast_count
        for result in pool.imap_unordered(match_fn, iterator):
            if result is None:
                # Exception occurred.
                if args.exit_on_exception:
                    flutes.log(
                        f"Exception occurred, exiting because 'exit_on_exception' is True",
                        "warning")
                    break
                continue

            # Write the matched functions to disk.
            result: Result  # type: ignore
            repo_dir = output_dir / result.repo_owner / result.repo_name
            repo_dir.mkdir(parents=True, exist_ok=True)
            with (repo_dir / "matched_funcs.jsonl").open("w") as f:
                for matched_func in result.matched_functions:
                    f.write(
                        json.dumps(matched_func._asdict(),
                                   separators=(',', ':')) + "\n")
            for sha, code in result.preprocessed_original_code.items():
                with (repo_dir / f"{sha}.c").open("w") as f:
                    pos = code.rfind(ghcc.parse.FAKE_LIBC_END_LINE)
                    if pos != -1:
                        code = code[(pos +
                                     len(ghcc.parse.FAKE_LIBC_END_LINE)):]
                    f.write(code)

            if args.write_db:
                db.add_repo(
                    result.repo_owner,
                    result.repo_name,
                    files_found=result.files_found,
                    funcs_found=result.functions_found,
                    funcs_matched=len(result.matched_functions),
                    funcs_matched_without_ast=result.funcs_without_asts)

            repo_count += 1
            func_count += len(result.matched_functions)
            func_without_ast_count += result.funcs_without_asts
            if repo_count % 100 == 0:
                flutes.log(
                    f"Processed {repo_count} repositories, {func_count} functions matched "
                    f"({func_without_ast_count} w/o AST)",
                    force_console=True)
コード例 #4
0
def main() -> None:
    if not ghcc.utils.verify_docker_image(verbose=True):
        exit(1)

    args = Arguments()
    if args.n_procs == 0:
        # Only do this on the single-threaded case.
        flutes.register_ipython_excepthook()
    flutes.set_log_file(args.log_file)
    flutes.set_logging_level(args.logging_level, console=True, file=False)
    flutes.log("Running with arguments:\n" + args.to_string(),
               force_console=True)

    if os.path.exists(args.clone_folder):
        flutes.log(
            f"Removing contents of clone folder '{args.clone_folder}'...",
            "warning",
            force_console=True)
        ghcc.utils.run_docker_command(
            ["rm", "-rf", "/usr/src/*"],
            user=0,
            directory_mapping={args.clone_folder: "/usr/src"})

    flutes.log("Crawling starts...", "warning", force_console=True)
    db = ghcc.RepoDB()
    libraries: Set[str] = set()
    if args.record_libraries is not None and os.path.exists(
            args.record_libraries):
        with open(args.record_libraries, "r") as f:
            libraries = set(f.read().split())

    def flush_libraries():
        if args.record_libraries is not None:
            with open(args.record_libraries, "w") as f:
                f.write("\n".join(libraries))

    with flutes.safe_pool(args.n_procs, closing=[db, flush_libraries]) as pool:
        iterator = iter_repos(db, args.repo_list_file, args.max_repos)
        pipeline_fn: Callable[
            [RepoInfo], Optional[PipelineResult]] = functools.partial(
                clone_and_compile,
                clone_folder=args.clone_folder,
                binary_folder=args.binary_folder,
                archive_folder=args.archive_folder,
                recursive_clone=args.recursive_clone,
                clone_timeout=args.clone_timeout,
                compile_timeout=args.compile_timeout,
                force_reclone=args.force_reclone,
                force_recompile=args.force_recompile,
                docker_batch_compile=args.docker_batch_compile,
                max_archive_size=args.max_archive_size,
                compression_type=args.compression_type,
                record_libraries=(args.record_libraries is not None),
                record_metainfo=args.record_metainfo,
                gcc_override_flags=args.gcc_override_flags)
        repo_count = 0
        meta_info = MetaInfo()
        for result in pool.imap_unordered(pipeline_fn, iterator):
            repo_count += 1
            if repo_count % 100 == 0:
                flutes.log(f"Processed {repo_count} repositories",
                           force_console=True)
            if result is None:
                continue
            repo_owner, repo_name = result.repo_info.repo_owner, result.repo_info.repo_name
            if args.write_db:
                if result.clone_success is not None or result.repo_info.db_result is None:
                    # There's probably an inconsistency somewhere if we didn't clone while `db_result` is None.
                    # To prevent more errors, just add it to the DB.
                    repo_size = result.repo_size or -1  # a value of zero is probably also wrong
                    clone_success = result.clone_success if result.clone_success is not None else True
                    db.add_repo(repo_owner,
                                repo_name,
                                clone_success,
                                repo_size=repo_size)
                    flutes.log(f"Added {repo_owner}/{repo_name} to DB")
                if result.makefiles is not None:
                    update_result = db.update_makefile(
                        repo_owner,
                        repo_name,
                        result.makefiles,
                        ignore_length_mismatch=True)
                    if not update_result:
                        flutes.log(
                            f"Makefiles of {repo_owner}/{repo_name} not saved to DB due to Unicode encoding "
                            f"errors", "error")
            if result.libraries is not None:
                libraries.update(result.libraries)
                if repo_count % 10 == 0:  # flush every 10 repos
                    flush_libraries()

            if args.record_metainfo:
                meta_info.add_repo(result)
                if repo_count % 100 == 0:
                    flutes.log(repr(meta_info), force_console=True)

        flutes.log(repr(meta_info), force_console=True)